diff --git a/CMakeLists.txt b/CMakeLists.txt index 368d9390c9ac14160b181cddf77d5c5b12776f5c..fb52f0b84b67d28f2af26fc316e5a1c904faa28a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -285,6 +285,7 @@ add_subdirectory("${PUGS_SOURCE_DIR}/packages/kokkos") # set as SYSTEM for static analysis include_directories(SYSTEM ${KOKKOS_SOURCE_DIR}/core/src) include_directories(SYSTEM ${KOKKOS_SOURCE_DIR}/containers/src) +include_directories(SYSTEM ${KOKKOS_SOURCE_DIR}/tpls/desul/include) include_directories(SYSTEM ${KOKKOS_BINARY_DIR}) set(PUGS_BUILD_KOKKOS_DEVICES "") diff --git a/packages/kokkos/.github/workflows/continuous-integration-workflow-hpx.yml b/packages/kokkos/.github/workflows/continuous-integration-workflow-hpx.yml new file mode 100644 index 0000000000000000000000000000000000000000..b17d173ba2488891bd0ab3b3c8eb31d797b526eb --- /dev/null +++ b/packages/kokkos/.github/workflows/continuous-integration-workflow-hpx.yml @@ -0,0 +1,88 @@ +name: github-Linux-hpx + +on: [push, pull_request] + +concurrency: + group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +jobs: + hpx: + name: hpx + runs-on: [ubuntu-latest] + + steps: + - name: checkout code + uses: actions/checkout@v2.2.0 + with: + path: kokkos + - name: setup hpx dependencies + run: | + sudo apt update + sudo apt install \ + clang \ + hwloc \ + libasio-dev \ + libboost-all-dev \ + ninja-build + - name: checkout hpx + uses: actions/checkout@v2.2.0 + with: + repository: STELLAR-GROUP/hpx + ref: 1.7.1 + path: hpx + - uses: actions/cache@v2 + id: cache-hpx + with: + path: ./hpx/install + key: kokkos-hwloc-${{ github.ref }}-${{ github.sha }} + restore-keys: kokkos-hwloc-${{ github.ref }} + - name: configure hpx + if: steps.cache-hpx.outputs.cache-hit != 'true' + run: | + mkdir -p hpx/{build,install} + cd hpx/build + cmake \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_INSTALL_PREFIX=$PWD/../install \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DHPX_WITH_UNITY_BUILD=ON \ + -DHPX_WITH_MALLOC=system \ + -DHPX_WITH_NETWORKING=OFF \ + -DHPX_WITH_EXAMPLES=OFF \ + -DHPX_WITH_TESTS=OFF \ + .. + - name: build and install hpx + if: steps.cache-hpx.outputs.cache-hit != 'true' + working-directory: hpx/build + run: ninja -j2 install + + - name: configure kokkos + run: | + mkdir -p kokkos/{build,install} + cd kokkos/build + cmake \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_INSTALL_PREFIX=$PWD/../install \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_CXX_FLAGS="-Werror" \ + -DHPX_ROOT=$PWD/../../hpx/install \ + -DKokkos_ARCH_NATIVE=ON \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF \ + -DKokkos_ENABLE_EXAMPLES=ON \ + -DKokkos_ENABLE_HPX=ON \ + -DKokkos_ENABLE_HPX_ASYNC_DISPATCH=ON \ + -DKokkos_ENABLE_SERIAL=OFF \ + -DKokkos_ENABLE_TESTS=ON \ + .. + + - name: build_and_install_kokkos + working-directory: kokkos/build + run: ninja -j2 install + + - name: test_kokkos + working-directory: kokkos/build + run: ctest --timeout 2000 -j2 --output-on-failure diff --git a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml index dba3a70745ce57d91e788e92fefd66a915ae0826..b2b4bfc3109d9aa10966b2866c472fa4d8457c5f 100644 --- a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml +++ b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml @@ -14,27 +14,33 @@ jobs: cxx: ['g++', 'clang++'] cmake_build_type: ['Release', 'Debug'] backend: ['OPENMP'] + clang-tidy: [''] include: - distro: 'fedora:intel' cxx: 'icpc' cmake_build_type: 'Release' backend: 'OPENMP' + clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpc' cmake_build_type: 'Debug' backend: 'OPENMP' + clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpx' cmake_build_type: 'Release' backend: 'OPENMP' + clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpx' cmake_build_type: 'Debug' backend: 'OPENMP' + clang-tidy: '' - distro: 'ubuntu:latest' cxx: 'clang++' cmake_build_type: 'RelWithDebInfo' backend: 'THREADS' + clang-tidy: '-DCMAKE_CXX_CLANG_TIDY="clang-tidy;-warnings-as-errors=*"' - distro: 'ubuntu:latest' cxx: 'g++' cmake_build_type: 'RelWithDebInfo' @@ -45,6 +51,21 @@ jobs: # see https://github.com/actions/virtual-environments/issues/3812 options: --security-opt seccomp=unconfined steps: + - name: Checkout desul + uses: actions/checkout@v2.2.0 + with: + repository: desul/desul + ref: 477da9c8f40f8db369c28dd3f93a67e376d8511b + path: desul + - name: Install desul + working-directory: desul + run: | + git submodule init + git submodule update + mkdir build + cd build + cmake -DDESUL_ENABLE_TESTS=OFF -DCMAKE_INSTALL_PREFIX=/usr/desul-install .. + sudo cmake --build . --target install --parallel 2 - name: Checkout code uses: actions/checkout@v2.2.0 - uses: actions/cache@v2 @@ -58,11 +79,17 @@ jobs: - name: maybe_use_external_gtest if: ${{ matrix.distro == 'ubuntu:latest' }} run: sudo apt-get update && sudo apt-get install -y libgtest-dev - - name: CMake + - name: maybe_install_clang_tidy + if: ${{ matrix.clang-tidy != '' }} + run: sudo apt-get update && sudo apt-get install -y clang-tidy + - name: Configure Kokkos run: | cmake -B builddir \ -DCMAKE_INSTALL_PREFIX=/usr \ + ${{ matrix.clang-tidy }} \ + -Ddesul_ROOT=/usr/desul-install/ \ -DKokkos_ARCH_NATIVE=ON \ + -DKokkos_ENABLE_DESUL_ATOMICS_EXTERNAL=ON \ -DKokkos_ENABLE_HWLOC=ON \ -DKokkos_ENABLE_${{ matrix.backend }}=ON \ -DKokkos_ENABLE_TESTS=ON \ diff --git a/packages/kokkos/.github/workflows/osx.yml b/packages/kokkos/.github/workflows/osx.yml index 69a09adf89d1496a91858284633db1bcb4a7a81c..0e043c5f8f1b6ea81965a7a6e87a33d6bfc7e7b3 100644 --- a/packages/kokkos/.github/workflows/osx.yml +++ b/packages/kokkos/.github/workflows/osx.yml @@ -31,6 +31,7 @@ jobs: -DKokkos_ENABLE_${{ matrix.backend }}=On -DCMAKE_CXX_FLAGS="-Werror" -DCMAKE_CXX_STANDARD=14 + -DKokkos_ARCH_NATIVE=ON -DKokkos_ENABLE_COMPILER_WARNINGS=ON -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF -DKokkos_ENABLE_TESTS=On diff --git a/packages/kokkos/.gitignore b/packages/kokkos/.gitignore index eb2257762bdbc1a0536bb04ef935d94387a5578d..a36540be876f163bfd226812e825a6fd9da23f31 100644 --- a/packages/kokkos/.gitignore +++ b/packages/kokkos/.gitignore @@ -12,6 +12,7 @@ testing/ /out/build /CMakeSettings.json /out/mytest +CMakeUserPresets.json # build directories in source tree /build* diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo index 91e0f8daac3d6ca34dc5bde954f56a1d8ea44f86..9b53d527695cc0db4672b75762f449344c305baa 100644 --- a/packages/kokkos/.gitrepo +++ b/packages/kokkos/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:kokkos/kokkos.git branch = master - commit = d19aab9981a2c447e832a7b4eb7b16992328fb14 - parent = a64ea7589ca011edd41ad9a3468d091cd093430c + commit = 61d7db55fceac3318c987a291f77b844fd94c165 + parent = 91d53e3cfb9a55832aae102ca677044a47f2515d method = merge cmdver = 0.4.3 diff --git a/packages/kokkos/.jenkins b/packages/kokkos/.jenkins index b5d7fc3071d286694c40a1ce111a8fb04a1134a5..09052840e6a7ea4115c1be9abb3f2aa2e644eabd 100644 --- a/packages/kokkos/.jenkins +++ b/packages/kokkos/.jenkins @@ -25,25 +25,57 @@ pipeline { sh './scripts/docker/check_format_cpp.sh' } } - stage('Build') { parallel { - stage('CUDA-11.4-NVHPC') { + stage('OPENACC-NVHPC-CUDA-11.6') { agent { dockerfile { filename 'Dockerfile.nvhpc' dir 'scripts/docker' - label 'nvidia-docker && volta && large_images' + label 'nvidia-docker && large_images' + args '--env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' + } + } + steps { + sh '''rm -rf build && mkdir -p build && cd build && \ + /opt/cmake/bin/cmake \ + -DCMAKE_CXX_COMPILER=nvc++ \ + -DCMAKE_CXX_STANDARD=17 \ + -DKokkos_ARCH_NATIVE=ON \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_TESTS=ON \ + -DKokkos_ENABLE_OPENACC=ON \ + -DKokkos_ARCH_VOLTA70=ON \ + .. && \ + make -j8 && ctest --verbose''' + } + } + stage('CUDA-11.6-NVHPC') { + agent { + dockerfile { + filename 'Dockerfile.nvhpc' + dir 'scripts/docker' + label 'nvidia-docker && large_images' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } + environment { + OMP_NUM_THREADS = 8 + // Nested OpenMP does not work for this configuration, + // so disabling it + OMP_MAX_ACTIVE_LEVELS = 1 + OMP_PLACES = 'threads' + OMP_PROC_BIND = 'spread' + NVCC_WRAPPER_DEFAULT_COMPILER = 'nvc++' + } steps { sh '''rm -rf build && mkdir -p build && cd build && \ /opt/cmake/bin/cmake \ -DCMAKE_BUILD_TYPE=Debug \ - -DCMAKE_CXX_COMPILER=nvc++ \ + -DCMAKE_CXX_COMPILER=$WORKSPACE/bin/nvcc_wrapper \ -DCMAKE_CXX_FLAGS=-Werror \ -DCMAKE_CXX_STANDARD=17 \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF \ -DKokkos_ENABLE_TESTS=ON \ @@ -70,7 +102,8 @@ pipeline { -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_CXX_FLAGS="-Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-linker-warnings" \ + -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-linker-warnings" \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ARCH_VOLTA70=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF \ @@ -101,6 +134,7 @@ pipeline { } environment { OMP_NUM_THREADS = 8 + OMP_MAX_ACTIVE_LEVELS = 3 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' } @@ -113,6 +147,7 @@ pipeline { -DCMAKE_CXX_COMPILER=hipcc \ -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument -DNDEBUG" \ -DCMAKE_CXX_STANDARD=14 \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF \ -DKokkos_ENABLE_TESTS=ON \ @@ -146,6 +181,7 @@ pipeline { -DCMAKE_CXX_COMPILER=hipcc \ -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument" \ -DCMAKE_CXX_STANDARD=17 \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ @@ -160,6 +196,7 @@ pipeline { } } } +/* stage('OPENMPTARGET-ROCm-4.5') { agent { dockerfile { @@ -172,6 +209,7 @@ pipeline { } environment { OMP_NUM_THREADS = 8 + OMP_MAX_ACTIVE_LEVELS = 3 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' LC_ALL = 'C' @@ -185,6 +223,7 @@ pipeline { -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_COMPILER=amdclang++ \ -DCMAKE_CXX_STANDARD=17 \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=OFF \ -DKokkos_ENABLE_TESTS=ON \ @@ -202,6 +241,7 @@ pipeline { } } } +*/ stage('OPENMPTARGET-Clang') { agent { dockerfile { @@ -218,6 +258,7 @@ pipeline { -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-Wno-unknown-cuda-version -Werror -Wno-undefined-internal -Wno-pass-failed" \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ @@ -254,6 +295,7 @@ pipeline { -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS=-Werror \ -DCMAKE_CXX_STANDARD=14 \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ @@ -306,12 +348,15 @@ pipeline { filename 'Dockerfile.nvcc' dir 'scripts/docker' additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.0-devel --build-arg ADDITIONAL_PACKAGES="g++-8 gfortran clang" --build-arg CMAKE_VERSION=3.17.3' - label 'nvidia-docker && volta' + label 'nvidia-docker' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } environment { OMP_NUM_THREADS = 8 + // Nested OpenMP does not work for this configuration, + // so disabling it + OMP_MAX_ACTIVE_LEVELS = 1 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' NVCC_WRAPPER_DEFAULT_COMPILER = 'g++-8' @@ -325,6 +370,7 @@ pipeline { -DCMAKE_CXX_COMPILER=g++-8 \ -DCMAKE_CXX_FLAGS=-Werror \ -DCMAKE_CXX_STANDARD=17 \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ENABLE_CUDA=ON \ @@ -343,7 +389,7 @@ pipeline { -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=$WORKSPACE/bin/nvcc_wrapper \ - -DCMAKE_CXX_FLAGS=-Werror \ + -DCMAKE_CXX_FLAGS=-Werror --Werror=all-warnings -Xcudafe --diag_suppress=3159 \ -DCMAKE_CXX_STANDARD=17 \ -DKokkos_INSTALL_TESTING=ON \ .. && \ @@ -367,13 +413,13 @@ pipeline { } } } - stage('CUDA-10.1-NVCC-DEBUG') { + stage('CUDA-11.6-NVCC-DEBUG') { agent { dockerfile { filename 'Dockerfile.nvcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=nvidia/cuda:10.1-devel' - label 'nvidia-docker && volta' + additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.6.0-devel-ubuntu20.04' + label 'nvidia-docker' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } @@ -386,6 +432,7 @@ pipeline { -DCMAKE_CXX_COMPILER=$WORKSPACE/bin/nvcc_wrapper \ -DCMAKE_CXX_FLAGS=-Werror \ -DCMAKE_CXX_STANDARD=14 \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEBUG=ON \ -DKokkos_ENABLE_DEBUG_BOUNDS_CHECK=ON \ @@ -416,6 +463,7 @@ pipeline { } environment { OMP_NUM_THREADS = 8 + OMP_MAX_ACTIVE_LEVELS = 3 OMP_PROC_BIND = 'true' } steps { @@ -424,6 +472,7 @@ pipeline { -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_STANDARD=14 \ -DCMAKE_CXX_FLAGS=-Werror \ + -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ @@ -431,6 +480,7 @@ pipeline { -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ENABLE_LIBDL=OFF \ -DKokkos_ENABLE_LIBQUADMATH=ON \ + -DKokkos_ENABLE_SERIAL=ON \ -DCMAKE_PREFIX_PATH=/usr/lib/gcc/x86_64-linux-gnu/5.3.1 \ .. && \ make -j8 && ctest --verbose && gcc -I$PWD/../core/src/ ../core/unit_test/tools/TestCInterface.c''' diff --git a/packages/kokkos/BUILD.md b/packages/kokkos/BUILD.md index 114baf99f1dd32937e35147065f04a512f0fd257..a8985ef1fd8c5f8507d646d70bb5b1cf756e711c 100644 --- a/packages/kokkos/BUILD.md +++ b/packages/kokkos/BUILD.md @@ -27,7 +27,7 @@ When configuring your project just set: -DKokkos_ROOT=${kokkos_install_prefix} \ -DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos} ```` -Note: You may need the following if using some versions of CMake (e.g. 3.12): +Note: You may need the following if your project requires a minimum CMake version older than 3.12: ````cmake cmake_policy(SET CMP0074 NEW) ```` @@ -171,6 +171,9 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`. * Kokkos_ENABLE_HPX_ASYNC_DISPATCH * Whether HPX supports asynchronous dispatch * BOOL Default: OFF +* Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC + * Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2). This is an experimental performance feature and currently has issue when using with UCX. See https://github.com/kokkos/kokkos/issues/4228 for more details. + * BOOL Default: OFF * Kokkos_ENABLE_LARGE_MEM_TESTS * Whether to perform extra large memory tests * BOOL_Default: OFF @@ -235,6 +238,9 @@ The following options control `find_package` paths for CMake-based TPLs: ## Architecture Keywords Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`. +* Kokkos_ARCH_NATIVE + * Whether to optimize for the the local CPU architecture + * BOOL Default: OFF * Kokkos_ARCH_AMDAVX * Whether to optimize for the AMDAVX architecture * BOOL Default: OFF @@ -310,12 +316,24 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_ * Kokkos_ARCH_POWER9 * Whether to optimize for the POWER9 architecture * BOOL Default: OFF +* Kokkos_ARCH_ICL + * Whether to optimize for the ICL architecture + * BOOL Default: OFF +* Kokkos_ARCH_ICX + * Whether to optimize for the ICX architecture + * BOOL Default: OFF +* Kokkos_ARCH_SKL + * Whether to optimize for the SKL architecture + * BOOL Default: OFF * Kokkos_ARCH_SKX * Whether to optimize for the SKX architecture * BOOL Default: OFF * Kokkos_ARCH_SNB * Whether to optimize for the SNB architecture * BOOL Default: OFF +* Kokkos_ARCH_SPR + * Whether to optimize for the SPR architecture + * BOOL Default: OFF * Kokkos_ARCH_TURING75 * Whether to optimize for the TURING75 architecture * BOOL Default: OFF diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index a908507704ed725fd8033aaa4da83d439d659178..e81f2944519e1b39b31e1c9d7332b3aa6cb8d45e 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,157 @@ # Change Log +## [3.7.00](https://github.com/kokkos/kokkos/tree/3.7.00) (2022-08-22) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.01...3.7.00) + +### Features: +- Use non-volatile `join()` member functions and `operator+=` in `parallel_reduce/scan` [\#4931](https://github.com/kokkos/kokkos/pull/4931) [\#4954](https://github.com/kokkos/kokkos/pull/4954) [\#4951](https://github.com/kokkos/kokkos/pull/4951) +- Add `SIMD` sub package (requires C++17) [\#5016](https://github.com/kokkos/kokkos/pull/5016) +- Add `is_finalized()` [\#5247](https://github.com/kokkos/kokkos/pull/5247) +- Promote mathematical functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#4791](https://github.com/kokkos/kokkos/pull/4791) +- Promote `min`, `max`, `clamp`, `minmax` functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#5170](https://github.com/kokkos/kokkos/pull/5170) +- Add `round`, `logb`, `nextafter`, `copysign`, and `signbit` math functions [\#4768](https://github.com/kokkos/kokkos/pull/4768) +- Add `HIPManagedSpace`, similar to `CudaUVMSpace` [\#5112](https://github.com/kokkos/kokkos/pull/5112) +- Accept view construction allocation properties in `create_mirror[_view,_view_and_copy]` and `resize/realloc` [\#5125](https://github.com/kokkos/kokkos/pull/5125) [\#5095](https://github.com/kokkos/kokkos/pull/5095) [\#5035](https://github.com/kokkos/kokkos/pull/5035) [\#4805](https://github.com/kokkos/kokkos/pull/4805) [\#4844](https://github.com/kokkos/kokkos/pull/4844) +- Allow `MemorySpace::allocate()` to be called with execution space [\#4826](https://github.com/kokkos/kokkos/pull/4826) +- Experimental: Compile time view subscriber [\#4197](https://github.com/kokkos/kokkos/pull/4197) + +### Backends and Archs Enhancements: +- Add support for Sapphire Rapids Intel architecture [\#5015](https://github.com/kokkos/kokkos/pull/5015) +- Add support for ICX, SKL and ICL Intel architectures [\#5013](https://github.com/kokkos/kokkos/pull/5013) [\#4929](https://github.com/kokkos/kokkos/pull/4929) +- Add arch flags for Intel GPU Ponte Vecchio [\#4932](https://github.com/kokkos/kokkos/pull/4932) +- SYCL: require GPU if GPU architecture was set at configuration time (i.e. do not allow fallback to CPU device) [\#5264](https://github.com/kokkos/kokkos/pull/5264) [\#5222](https://github.com/kokkos/kokkos/pull/5222) +- SYCL: Add `SYCL::sycl_queue()` for interoperability [\#5241](https://github.com/kokkos/kokkos/pull/5241) +- SYCL: Loosen restriction for using built-in `sycl::group_broadcast` [\#4552](https://github.com/kokkos/kokkos/pull/4552) +- SYCL: preserve address space [\#4396](https://github.com/kokkos/kokkos/pull/4396) +- OpenMPTarget: Adding a workaound for team scan [\#5219](https://github.com/kokkos/kokkos/pull/5219) +- OpenMPTarget: Adding logic to skip the kernel launch if `league_size=0` [\#5067](https://github.com/kokkos/kokkos/pull/5067) +- OpenMPTarget: Make sure `Kokkos::abort()` causes abnormal program termination when called on the host-side [\#4808](https://github.com/kokkos/kokkos/pull/4808) +- HIP: Make HIPHostPinnedSpace coarse-grained [\#5152](https://github.com/kokkos/kokkos/pull/5152) +- Refactor OpenMP `parallel_for` implementation to use more native OpenMP constructs [\#4664](https://github.com/kokkos/kokkos/pull/4664) +- Add option to optimize for local CPU architecture `Kokkos_ARCH_NATIVE` [\#4930](https://github.com/kokkos/kokkos/pull/4930) + + +### Implemented enhancements +- Add command line argument/environment variable to print the configuration [\#5233](https://github.com/kokkos/kokkos/pull/5233) +- Improve error message in view memory access violations [\#4950](https://github.com/kokkos/kokkos/pull/4950) +- Remove unnecessary fences in View initialization [\#4823](https://github.com/kokkos/kokkos/pull/4823) +- Make `View::shmem_size()` device-callable [\#4936](https://github.com/kokkos/kokkos/pull/4936) +- Update numerics support for `__float128` [\#5081](https://github.com/kokkos/kokkos/pull/5081) +- Add `log10` overload for `Kokkos::complex` [\#5009](https://github.com/kokkos/kokkos/pull/5009) +- Add `[[nodiscard]]` to `ScopeGuard` [\#5224](https://github.com/kokkos/kokkos/pull/5224) +- Add structured binding support for `Kokkos::Array` [\#4962](https://github.com/kokkos/kokkos/pull/4962) +- Enable accessing `Kokkos::Array` elements in constant expressions [\#4916](https://github.com/kokkos/kokkos/pull/4916) +- Mark `as_view_of_rank_n` as KOKKOS_FUNCTION [\#5248](https://github.com/kokkos/kokkos/pull/5248) +- Cleanup/rework fence overloads [\#5148](https://github.com/kokkos/kokkos/pull/5148) +- Assert that `Layout` construction from extents is valid in functions taking integer extents [\#5209](https://github.com/kokkos/kokkos/pull/5209) +- Add `fill_random` overload that takes an execution space as first argument [\#5181](https://github.com/kokkos/kokkos/pull/5181) +- Avoid some unnecessary fences in `parallel_reduce/scan` [\#5154](https://github.com/kokkos/kokkos/pull/5154) +- Include `KOKKOS_ENABLE_LIBDL` in options when printing configuration [\#5086](https://github.com/kokkos/kokkos/pull/5086) +- DynRankView: make `layout()` return the same as a corresponding static View [\#5026](https://github.com/kokkos/kokkos/pull/5026) +- Use `_mm_malloc` for icpx [\#5012](https://github.com/kokkos/kokkos/pull/5012) +- Avoid forcing matching execution spaces in `BinSort` constructor and `sort()` [\#4919](https://github.com/kokkos/kokkos/pull/4919) +- Check number of bins in `BinSort` [\#4890](https://github.com/kokkos/kokkos/pull/4890) +- Improve performance in parallel STL-like algorithms [\#4887](https://github.com/kokkos/kokkos/pull/4887) [\#4886](https://github.com/kokkos/kokkos/pull/4886) +- Disable `memset` on A64FX and launch `parallel_for` instead (performance) [\#4884](https://github.com/kokkos/kokkos/pull/4884) +- Allow non-power-of-two team sizes for team reductions and scans [\#4809](https://github.com/kokkos/kokkos/pull/4809) + +#### Harmonization of Kokkos execution environment initialization: +- Warn when unable to detect local MPI rank and user explicitly asked for it [\#5263](https://github.com/kokkos/kokkos/pull/5263) +- Refactor parsing of command line arguments and environment variables [\#5221](https://github.com/kokkos/kokkos/pull/5221) +- Refactor device selection at initialization [\#5211](https://github.com/kokkos/kokkos/pull/5211) +- Rename tools settings for consistency [\#5201](https://github.com/kokkos/kokkos/pull/5201) +- Print help only once [\#5128](https://github.com/kokkos/kokkos/pull/5128) +- Update precedence rule in initialization [\#5130](https://github.com/kokkos/kokkos/pull/5130) +- Warn instead of just ignoring user settings when kokkos-tools is disabled [\#5088](https://github.com/kokkos/kokkos/pull/5088) +- Drop numa args in threads backend initialization [\#5127](https://github.com/kokkos/kokkos/pull/5127) +- Warn users when a flag prefixed with -[-]kokkos is not recognized and do not remove it [\#5256](https://github.com/kokkos/kokkos/pull/5256) +- Give back to Core what belongs to Core (aka moving tune_internals option from Tools back to Core) [\#5202](https://github.com/kokkos/kokkos/pull/5202) + +#### Build system updates: +- `nvcc_wrapper`: filter out -pedantic-errors from nvcc options [\#5235](https://github.com/kokkos/kokkos/pull/5235) +- `nvcc_wrapper`: add known nvcc option --source-in-ptx [\#5052](https://github.com/kokkos/kokkos/pull/5052) +- Link libdl as interface library [\#5179](https://github.com/kokkos/kokkos/pull/5179) +- Only show GPU architectures with enabled corresponding backend [\#5119](https://github.com/kokkos/kokkos/pull/5119) +- Enable optional external desul build [\#5021](https://github.com/kokkos/kokkos/pull/5021) [\#5132](https://github.com/kokkos/kokkos/pull/5132) +- Export `Kokkos_CXX_STANDARD` variable with CMake [\#5068](https://github.com/kokkos/kokkos/pull/5068) +- Suppress warnings with nvc++ [\#5031](https://github.com/kokkos/kokkos/pull/5031) +- Disallow multiple host architectures in CMake [\#4996](https://github.com/kokkos/kokkos/pull/4996) +- Do not include compiler warning flags in the compile option of the cmake target [\#4989](https://github.com/kokkos/kokkos/pull/4989) +- AOT flags for OpenMPTarget targeting Intel GPUs [\#4915](https://github.com/kokkos/kokkos/pull/4915) +- Repurpose `Kokkos_ARCH_INTEL_GEN` for SYCL to mean JIT to be conforming with OMPT [\#4894](https://github.com/kokkos/kokkos/pull/4894) +- Replace amdgpu-target with offload-arch [\#4874](https://github.com/kokkos/kokkos/pull/4874) +- Do not enable `kokkos_launch_compiler` when `CMAKE_CXX_COMPILER_LAUNCHER` is set [\#4870](https://github.com/kokkos/kokkos/pull/4870) +- Move CMake version check up [\#4797](https://github.com/kokkos/kokkos/pull/4797) + +### Incompatibilities: +- Remove `KOKKOS_THREAD_LOCAL` [\#5064](https://github.com/kokkos/kokkos/pull/5064) +- Remove `KOKKOS_ENABLE_POSIX_MEMALIGN` [\#5011](https://github.com/kokkos/kokkos/pull/5011) +- Remove unused `KOKKOS_ENABLE_TM` [\#4995](https://github.com/kokkos/kokkos/pull/4995) +- Remove unused cmakedefine `KOKKOS_ENABLE_COMPILER_WARNINGS` [\#4883](https://github.com/kokkos/kokkos/pull/4883) +- Remove unused `KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK` [\#4882](https://github.com/kokkos/kokkos/pull/4882) +- Drop Instruction Set Architecture (ISA) macros [\#4981](https://github.com/kokkos/kokkos/pull/4981) +- Warn in `ScopeGuard` about illegal usage [\#5250](https://github.com/kokkos/kokkos/pull/5250) + +### Deprecations: +- Guard against non-public header inclusion [\#5178](https://github.com/kokkos/kokkos/pull/5178) +- Raise deprecation warnings if non empty WorkTag class is used [\#5230](https://github.com/kokkos/kokkos/pull/5230) +- Deprecate `parallel_*` overloads taking the label as trailing argument [\#5141](https://github.com/kokkos/kokkos/pull/5141) +- Deprecate nested types in functional [\#5185](https://github.com/kokkos/kokkos/pull/5185) +- Deprecate `InitArguments` struct and replace it with `InitializationSettings` [\#5135](https://github.com/kokkos/kokkos/pull/5135) +- Deprecate `finalize_all()` [\#5134](https://github.com/kokkos/kokkos/pull/5134) +- Deprecate command line arguments (other than `--help`) that are not prefixed with `kokkos-*` [\#5120](https://github.com/kokkos/kokkos/pull/5120) +- Deprecate `--[kokkos-]numa` cmdline arg and `KOKKOS_NUMA` env var [\#5117](https://github.com/kokkos/kokkos/pull/5117) +- Deprecate `--[kokkos-]threads` command line argument in favor of `--[kokkos-]num-threads` [\#5111](https://github.com/kokkos/kokkos/pull/5111) +- Deprecate `Kokkos::common_view_alloc_prop` [\#5059](https://github.com/kokkos/kokkos/pull/5059) +- Deprecate `Kokkos::is_reducer_type` [\#4957](https://github.com/kokkos/kokkos/pull/4957) +- Deprecate `OffsetView` constructors taking `index_list_type` [\#4810](https://github.com/kokkos/kokkos/pull/4810) +- Deprecate overloads of `Kokkos::sort` taking a parameter `bool always_use_kokkos_sort` [\#5382](https://github.com/kokkos/kokkos/issues/5382) +- Warn about `parallel_reduce` cases that call `join()` with volatile-qualified arguments [\#5215](https://github.com/kokkos/kokkos/pull/5215) + +### Bug Fixes: +- CUDA Reductions: Fix data races reported by Nvidia `compute-sanitizer` [\#4855](https://github.com/kokkos/kokkos/pull/4855) +- Work around Intel compiler bug [\#5301](https://github.com/kokkos/kokkos/pull/5301) +- Avoid allocating memory for UniqueToken [\#5300](https://github.com/kokkos/kokkos/pull/5300) +- DynamicView: Properly resize mirror instances after construction [\#5276](https://github.com/kokkos/kokkos/pull/5276) +- Remove Kokkos::Rank limit of 6 ranks [\#5271](https://github.com/kokkos/kokkos/pull/5271) +- Do not forget to set last element to nullptr when removing a flag in `Kokkos::initialize` [\#5272](https://github.com/kokkos/kokkos/pull/5272) +- Fix CUDA+MSVC build issue [\#5261](https://github.com/kokkos/kokkos/pull/5261) +- Fix `DynamicView::resize_serial` [\#5220](https://github.com/kokkos/kokkos/pull/5220) +- Fix cmake default compiler flags for unknown compiler [\#5217](https://github.com/kokkos/kokkos/pull/5217) +- Fix `move_backward` [\#5191](https://github.com/kokkos/kokkos/pull/5191) +- Fixing issue 5196 - missing symbol with intel compiler [\#5207](https://github.com/kokkos/kokkos/pull/5207) +- Preserve `KOKKOS_INVALID_INDEX` in ViewDimension and ArrayLayout construction [\#5188](https://github.com/kokkos/kokkos/pull/5188) +- Finalize `deep_copy_space` early avoiding printing to `std::cerr` for Cuda [\#5151](https://github.com/kokkos/kokkos/pull/5151) +- Use correct policy in Threads MDRange `parallel_reduce` [\#5123](https://github.com/kokkos/kokkos/pull/5123) +- Fix building with NVCC as the CXX compiler while the CUDA backend is not enabled [\#5115](https://github.com/kokkos/kokkos/pull/5115) +- OpenMPTarget Index range fix for MDRange. [\#5089](https://github.com/kokkos/kokkos/pull/5089) +- Fix bug with CUDA's team reduction for empty ranges [\#5079](https://github.com/kokkos/kokkos/pull/5079) +- Fix using `ZeroMemset` for Serial [\#5077](https://github.com/kokkos/kokkos/pull/5077) +- Fix `Kokkos::Vector::push_back` for default execution space [\#5047](https://github.com/kokkos/kokkos/pull/5047) +- ScatterView: Fix ScatterMin/ScatterMax to use proper atomics [\#5045](https://github.com/kokkos/kokkos/pull/5045) +- Fix calling `ZeroMemset` in `deep_copy` [\#5040](https://github.com/kokkos/kokkos/pull/5040) +- Make View self-assignment not produce double-free [\#5024](https://github.com/kokkos/kokkos/pull/5024) +- Guard against unrecognized pragma with intel compilers [\#5019](https://github.com/kokkos/kokkos/pull/5019) +- Fix racing condition in `HIPParallelLaunch` [\#5008](https://github.com/kokkos/kokkos/pull/5008) +- KokkosP: Fix `device_id` in profiling [\#4997](https://github.com/kokkos/kokkos/pull/4997) +- Fix for `Kokkos::vector::insert` into empty vector with begin and end iterators [\#4988](https://github.com/kokkos/kokkos/pull/4988) +- Fix Core header files installation [\#4984](https://github.com/kokkos/kokkos/pull/4984) +- Fix bounds errors with `Kokkos::sort` [\#4980](https://github.com/kokkos/kokkos/pull/4980) +- Fixup let `RangePolicy::set_chunk_size` return a reference to self [\#4918](https://github.com/kokkos/kokkos/pull/4918) +- Fix allocating large Views [\#4907](https://github.com/kokkos/kokkos/pull/4907) +- Fix combined reductions with `Kokkos::View` [\#4896](https://github.com/kokkos/kokkos/pull/4896) +- Fixed `_CUDA_ARCH__` to `__CUDA_ARCH__` for CUDA LDG [\#4893](https://github.com/kokkos/kokkos/pull/4893) +- Fixup `View::access()` truncate parameter pack [\#4876](https://github.com/kokkos/kokkos/pull/4876) +- Fix `abort` with HIP backend for ROCm 5.0.2 and beyond [\#4873](https://github.com/kokkos/kokkos/pull/4873) +- Fix HIP version when printing the configuration [\#4872](https://github.com/kokkos/kokkos/pull/4872) +- Fix scratch lock array when using scratch level 1 [\#4871](https://github.com/kokkos/kokkos/pull/4871) +- Fix Makefile.kokkos to work with fujitsu compiler [\#4867](https://github.com/kokkos/kokkos/pull/4867) +- cmake: Correct link THREADS link option [\#4854](https://github.com/kokkos/kokkos/pull/4854) +- UniqueToken `impl_acquire` function should be device only [\#4819](https://github.com/kokkos/kokkos/pull/4819) +- Fix example calls to non existing static `print_configuration` [\#4806](https://github.com/kokkos/kokkos/pull/4806) +- Fix requests for large team scratch sizes [\#4728](https://github.com/kokkos/kokkos/pull/4728) + + ## [3.6.01](https://github.com/kokkos/kokkos/tree/3.6.01) (2022-05-23) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.00...3.6.01) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index b0a54118a0a52482c4670d72f28c133e5b32ef47..a05bfcdb94d53e0a7d453d62909e9a5686f6cc41 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -1,3 +1,4 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) # Disable in-source builds to prevent source tree corruption. if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) @@ -28,11 +29,6 @@ SET(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) SET(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) -# Needed to simplify syntax of if statements -CMAKE_POLICY(SET CMP0054 NEW) -# Needed to make IN_LIST a valid operator -CMAKE_POLICY(SET CMP0057 NEW) - # Is this a build as part of Trilinos? IF(COMMAND TRIBITS_PACKAGE_DECL) SET(KOKKOS_HAS_TRILINOS ON) @@ -72,7 +68,6 @@ ENDFUNCTION() LIST(APPEND CMAKE_MODULE_PATH cmake/Modules) IF(NOT KOKKOS_HAS_TRILINOS) - cmake_minimum_required(VERSION 3.16 FATAL_ERROR) set(CMAKE_DISABLE_SOURCE_CHANGES ON) set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) @@ -80,7 +75,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) # downstream dependencies need to match this! SET(KOKKOS_COMPILE_LANGUAGE CXX) # use lower case here since we didn't parse options yet - IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) + IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_CUDA) # Without this as a language for the package we would get a C++ compiler enabled. # but we still need a C++ compiler even if we build all our cpp files as CUDA only @@ -90,9 +85,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) # days. SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX) - IF (Kokkos_ENABLE_CUDA) - SET(KOKKOS_COMPILE_LANGUAGE CUDA) - ENDIF() + SET(KOKKOS_COMPILE_LANGUAGE CUDA) ENDIF() IF (Spack_WORKAROUND) @@ -135,14 +128,11 @@ ENDIF() set(Kokkos_VERSION_MAJOR 3) -set(Kokkos_VERSION_MINOR 6) -set(Kokkos_VERSION_PATCH 01) +set(Kokkos_VERSION_MINOR 7) +set(Kokkos_VERSION_PATCH 00) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") -MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables") -CMAKE_POLICY(SET CMP0074 NEW) - # Load either the real TriBITS or a TriBITS wrapper # for certain utility functions that are universal (like GLOBAL_SET) INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) @@ -204,11 +194,16 @@ KOKKOS_SETUP_BUILD_ENVIRONMENT() OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF) SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontainers Kokkos::kokkosalgorithms) -SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms) +SET(KOKKOS_SUB_LIBRARIES kokkoscore kokkoscontainers kokkosalgorithms) +IF (KOKKOS_CXX_STANDARD GREATER_EQUAL 17) + LIST(APPEND KOKKOS_EXT_LIBRARIES Kokkos::kokkossimd) + LIST(APPEND KOKKOS_SUB_LIBRARIES kokkossimd) +ENDIF() +SET(KOKKOS_INT_LIBRARIES kokkos ${KOKKOS_SUB_LIBRARIES}) SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES}) IF (KOKKOS_HAS_TRILINOS) - SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + SET(TRILINOS_INCDIR ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) SET(KOKKOS_IS_SUBDIRECTORY TRUE) ELSEIF(HAS_PARENT) @@ -296,7 +291,7 @@ IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING) #Make sure in-tree projects can reference this as Kokkos:: #to match the installed target names ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos) - TARGET_LINK_LIBRARIES(kokkos INTERFACE kokkoscore kokkoscontainers kokkosalgorithms) + TARGET_LINK_LIBRARIES(kokkos INTERFACE ${KOKKOS_SUB_LIBRARIES}) KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) ENDIF() INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index 10c4bc46300ab9459815b7e4b5282f04150bcabc..d493abbf1421973a973e93775d90ef83e502e2cd 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -1,21 +1,21 @@ # Default settings common options. KOKKOS_VERSION_MAJOR = 3 -KOKKOS_VERSION_MINOR = 6 -KOKKOS_VERSION_PATCH = 01 +KOKKOS_VERSION_MINOR = 7 +KOKKOS_VERSION_PATCH = 00 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial #KOKKOS_DEVICES ?= "OpenMP" KOKKOS_DEVICES ?= "Threads" # Options: -# Intel: KNC,KNL,SNB,HSW,BDW,SKX +# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 # AMD-GPUS: Vega900,Vega906,Vega908,Vega90A # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 -# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP +# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC KOKKOS_ARCH ?= "" # Options: yes,no KOKKOS_DEBUG ?= "no" @@ -183,6 +183,8 @@ KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VE KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC) +# TODO fujitsu can emulate gcc or clang. Only clang mode works at the moment. +KOKKOS_INTERNAL_COMPILER_FUJITSU := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),FUJITSU) # Check Host Compiler if using NVCC through nvcc_wrapper ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) @@ -211,8 +213,23 @@ endif ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) KOKKOS_INTENAL_COMPILER_CLANG = 0 endif +# Fujitsu passes also as clang and gcc respectively +ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) + # TODO handle gcc flags and workaround for bug? + # fujitsu (gcc mode) is bugged, see https://github.com/kokkos/kokkos/issues/4730 + $(warning Warning: ${CXX} in Trad Mode '-Nnoclang' (default) is not recommended. Use 'CXX = ${CXX} -Nclang' instead.) + # HACK since fujitsu only accepts some gcc flags, disable gcc here? + # KOKKOS_INTERNAL_COMPILER_GCC = 0 + endif + # TODO handle clang flags + # warnings: works fine as is + # openmp: handled + #KOKKOS_INTERNAL_COMPILER_CLANG = 0 +endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + # TODO empty variable if fujitsu (clang mode) passes as clang KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell $(CXX) --version | grep version | cut -d ' ' -f3 | tr -d '.') ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) @@ -262,7 +279,12 @@ else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp else - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp + ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1) + # fujitsu (clang mode) fails with `=libomp` + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + else + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp + endif endif else ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) @@ -290,11 +312,15 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) #KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp -Wno-openmp-mapping KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget else - #Assume GCC - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping + else + #Assume GCC + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none + endif endif endif @@ -334,8 +360,12 @@ KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM) KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB) KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW) KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW) +KOKKOS_INTERNAL_USE_ARCH_SKL := $(call kokkos_has_string,$(KOKKOS_ARCH),SKL) KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX) KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL) +KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL) +KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX) +KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) @@ -343,6 +373,7 @@ KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH), KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) +KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC) # NVIDIA based. NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper @@ -426,19 +457,9 @@ KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_W KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) -KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX)) - -# Decide what ISA level we are able to support. -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) -KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC)) -KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9)) -KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7)) - -# Decide whether we can support transactional memory -KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX)) # Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) @@ -541,36 +562,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL") endif -ifeq ($(KOKKOS_INTERNAL_USE_TM), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM") - tmp := $(call kokkos_append_header,"$H""endif") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64") - tmp := $(call kokkos_append_header,"$H""endif") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC") - tmp := $(call kokkos_append_header,"$H""endif") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE") - tmp := $(call kokkos_append_header,"$H""endif") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE") - tmp := $(call kokkos_append_header,"$H""endif") -endif - #only add the c++ standard flags if this is not CMake tmp := $(call kokkos_append_header,"/* General Settings */") ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) @@ -1031,7 +1022,28 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) endif endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKL), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xSKYLAKE + KOKKOS_LDFLAGS += -xSKYLAKE + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Nothing here yet. + KOKKOS_CXXFLAGS += -march=skylake + KOKKOS_LDFLAGS += -march=skylake + endif + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) @@ -1045,13 +1057,31 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) else # Nothing here yet. - KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm - KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm + KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 + KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 endif endif endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICL), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + KOKKOS_CXXFLAGS += -march=icelake-client -mtune=icelake-client + KOKKOS_LDFLAGS += -march=icelake-client -mtune=icelake-client +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICX), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + KOKKOS_CXXFLAGS += -march=icelake-server -mtune=icelake-server + KOKKOS_LDFLAGS += -march=icelake-server -mtune=icelake-server +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SPR), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + KOKKOS_CXXFLAGS += -march=sapphirerapids -mtune=sapphirerapids + KOKKOS_LDFLAGS += -march=sapphirerapids -mtune=sapphirerapids +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC") KOKKOS_CXXFLAGS += -mmic @@ -1081,7 +1111,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64 -Xopenmp-target -march endif KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 endif @@ -1182,29 +1212,29 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900 + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx900 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906 + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx908 + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx90a + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a endif KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp + KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp endif KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) @@ -1220,51 +1250,67 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) endif endif -# Figure out the architecture flag for SYCL. +# Figure out Intel architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) - # Lets start with adding architecture defines - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9-" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen11" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen12lp" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device dg1" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device xehp" - endif + KOKKOS_INTERNAL_LC_BACKEND := sycl +endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + KOKKOS_INTERNAL_LC_BACKEND := openmp +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen9" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen11" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen12lp" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device dg1" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device xehp" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_PVC") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device 12.4.0" +endif +ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.hpp) - KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda - KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG) + KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda -fsycl-dead-args-optimization + KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) KOKKOS_LDFLAGS+=-fsycl - KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG) + KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) -D__STRICT_ANSI__ + KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) endif ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS") + KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include else ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) $(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) ) @@ -1349,7 +1395,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp + KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) ifneq ($(CUDA_PATH),) @@ -1407,6 +1453,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) KOKKOS_TPL_LIBRARY_NAMES += pthread endif +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.hpp) +endif + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) @@ -1439,15 +1490,6 @@ ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) endif -# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial -# device to avoid a link warning. -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) -endif -ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC)) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC)) -endif - # With Cygwin functions such as fdopen and fileno are not defined # when strict ansi is enabled. strict ansi gets enabled with -std=c++14 # though. So we hard undefine it here. Not sure if that has any bad side effects diff --git a/packages/kokkos/Makefile.targets b/packages/kokkos/Makefile.targets index a9cb12e1b46f3e8baa443576528b3eee07c9fded..876726e9479966921f0774d4f365d07120d14e97 100644 --- a/packages/kokkos/Makefile.targets +++ b/packages/kokkos/Makefile.targets @@ -16,10 +16,6 @@ Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ho $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp -Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp -Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp @@ -41,6 +37,13 @@ Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/ Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) +Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp +Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp +endif + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -50,8 +53,8 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp -Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp +Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) @@ -70,20 +73,18 @@ Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp -Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp +Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) -Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) -Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp +Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp endif diff --git a/packages/kokkos/README.md b/packages/kokkos/README.md index f6c500cc1a73ee937064b848d4edd1633d6c596c..033346e956e040db030fdb334287bd5a71c5fafa 100644 --- a/packages/kokkos/README.md +++ b/packages/kokkos/README.md @@ -10,270 +10,48 @@ hierarchies and multiple types of execution resources. It currently can use CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other backends in development. -Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem, -which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as -profiling and debugging tools (https://github.com/kokkos/kokkos-tools). +**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.** -# Learning about Kokkos - -The best way to start learning about Kokkos is going through the Kokkos Lectures. -They are online available at https://kokkos.link/the-lectures and contain a mix -of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem -capabilities. - -A programming guide and API reference can be found on the Wiki -(https://github.com/kokkos/kokkos/wiki). - -For questions find us on Slack: https://kokkosteam.slack.com or open a github issue. - -For non-public questions send an email to -crtrott(at)sandia.gov - -# Contributing to Kokkos - -We are open and try to encourage contributions from external developers. -To do so please first open an issue describing the contribution and then issue -a pull request against the develop branch. For larger features it may be good -to get guidance from the core development team first through the github issue. - -Note that Kokkos Core is licensed under standard 3-clause BSD terms of use. -Which means contributing to Kokkos allows anyone else to use your contributions -not just for public purposes but also for closed source commercial projects. -For specifics see the LICENSE file contained in the repository or distribution. - -# Requirements - -### Minimum Compiler Versions - -Generally Kokkos should work with all compiler versions newer than the minimum. -However as in all sufficiently complex enough code, we have to work around compiler -bugs with almost all compilers. So compiler versions we don't test may have issues -we are unaware of. - -* GCC: 5.3.0 -* Clang: 4.0.0 -* Intel: 17.0.1 -* NVCC: 9.2.88 -* NVC++: 21.5 -* ROCm: 4.3 -* MSVC: 19.29 -* IBM XL: 16.1.1 -* Fujitsu: 4.5.0 -* ARM/Clang 20.1 - -### Primary Tested Compilers - -* GCC: 5.3.0, 6.1.0, 7.3.0, 8.3, 9.2, 10.0 -* NVCC: 9.2.88, 10.1, 11.0 -* Clang: 8.0.0, 9.0.0, 10.0.0, 12.0.0 -* Intel 17.4, 18.1, 19.5 -* MSVC: 19.29 -* ARM/Clang: 20.1 -* IBM XL: 16.1.1 -* ROCm: 4.3.0 +For the complete documentation, click below: -### Build system: +# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki) -* CMake >= 3.16: required -* CMake >= 3.18: Fortran linkage. This does not affect most mixed Fortran/Kokkos builds. See [build issues](BUILD.md#KnownIssues). -* CMake >= 3.21.1 for NVC++ - -Primary tested compiler are passing in release mode -with warnings as errors. They also are tested with a comprehensive set of -backend combinations (i.e. OpenMP, Threads, Serial, OpenMP+Serial, ...). -We are using the following set of flags: -* GCC: - ```` - -Wall -Wunused-parameter -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits - -Wignored-qualifiers -Wempty-body - -Wclobbered -Wuninitialized - ```` -* Intel: - ```` - -Wall -Wunused-parameter -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits - -Wuninitialized - ```` -* Clang: - ```` - -Wall -Wunused-parameter -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits - -Wuninitialized - ```` - -* NVCC: - ```` - -Wall -Wunused-parameter -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits - -Wuninitialized - ```` - -Other compilers are tested occasionally, in particular when pushing from develop to -master branch. These are tested less rigorously without `-Werror` and only for a select set of backends. - -# Building and Installing Kokkos -Kokkos provide a CMake build system and a raw Makefile build system. -The CMake build system is strongly encouraged and will be the most rigorously supported in future releases. -Full details are given in the [build instructions](BUILD.md). Basic setups are shown here: - -## CMake - -The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`: -````bash -cmake $srcdir \ - -DCMAKE_CXX_COMPILER=$path_to_compiler \ - -DCMAKE_INSTALL_PREFIX=$path_to_install \ - -DKokkos_ENABLE_OPENMP=On \ - -DKokkos_ARCH_HSW=On \ - -DKokkos_HWLOC_DIR=$path_to_hwloc -```` -then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages. - -To validate the Kokkos build, configure with -```` - -DKokkos_ENABLE_TESTS=On -```` -and run `make test` after completing the build. +# Learning about Kokkos -For your CMake project using Kokkos, code such as the following: +To start learning about Kokkos: -````cmake -find_package(Kokkos) -... -target_link_libraries(myTarget Kokkos::kokkos) -```` -should be added to your CMakeLists.txt. Your configure should additionally include -```` --DKokkos_DIR=$path_to_install/cmake/lib/Kokkos -```` -or -```` --DKokkos_ROOT=$path_to_install -```` -for the install location given above. +- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities. -## Spack -An alternative to manually building with the CMake is to use the Spack package manager. -To get started, download the Spack [repo](https://github.com/spack/spack). -```` -A basic installation would be done as: -````bash -> spack install kokkos -```` -Spack allows options and and compilers to be tuned in the install command. -````bash -> spack install kokkos@3.0 %gcc@7.3.0 +openmp -```` -This example illustrates the three most common parameters to Spack: -* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options. -* Version: immediately following `kokkos` the `@version` can specify a particular Kokkos to build -* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option. +- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. -For a complete list of Kokkos options, run: -````bash -> spack info kokkos -```` -Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". -Generally, Spack usage should never really require you to reference the computer-generated unique install folder. -More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with: -````bash -> spack find -p kokkos ... -```` -where `...` is the unique spec identifying the particular Kokkos configuration and version. -Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest). +- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html). -## Raw Makefile +- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability. -Raw Makefiles are only supported via inline builds. See below. +For questions find us on Slack: https://kokkosteam.slack.com or open a github issue. -## Inline Builds vs. Installed Package -For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package. -The main reason is that you may otherwise need many different -configurations of Kokkos installed depending on the required compile time -features an application needs. For example there is only one default -execution space, which means you need different installations to have OpenMP -or C++ threads as the default space. Also for the CUDA backend there are certain -choices, such as allowing relocatable device code, which must be made at -installation time. Building Kokkos inline uses largely the same process -as compiling an application against an installed Kokkos library. +For non-public questions send an email to: *crtrott(at)sandia.gov* -For CMake, this means copying over the Kokkos source code into your project and adding `add_subdirectory(kokkos)` to your CMakeLists.txt. +# Contributing to Kokkos -For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build. +Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute. -# Kokkos and CUDA UVM +# Requirements, Building and Installing -Kokkos does support UVM as a specific memory space called CudaUVMSpace. -Allocations made with that space are accessible from host and device. -You can tell Kokkos to use that as the default space for Cuda allocations. -In either case UVM comes with a number of restrictions: -* You can't access allocations on the host while a kernel is potentially -running. This will lead to segfaults. To avoid that you either need to -call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or -you can set the environment variable CUDA_LAUNCH_BLOCKING=1. -* In multi socket multi GPU machines without NVLINK, UVM defaults -to using zero copy allocations for technical reasons related to using multiple -GPUs from the same process. If an executable doesn't do that (e.g. each -MPI rank of an application uses a single GPU [can be the same GPU for -multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. -This will enforce proper UVM allocations, but can lead to errors if -more than a single GPU is used by a single process. +All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html). +Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html). # Citing Kokkos -If you publish work which mentions Kokkos, please cite the following paper: - -````BibTex -@ARTICLE{9485033, - author={Trott, Christian R. and Lebrun-Grandié, Damien and Arndt, Daniel and Ciesko, Jan and Dang, Vinh and Ellingwood, Nathan and Gayatri, Rahulkumar and Harvey, Evan and Hollman, Daisy S. and Ibanez, Dan and Liber, Nevin and Madsen, Jonathan and Miles, Jeff and Poliakoff, David and Powell, Amy and Rajamanickam, Sivasankaran and Simberg, Mikael and Sunderland, Dan and Turcksin, Bruno and Wilke, Jeremiah}, - journal={IEEE Transactions on Parallel and Distributed Systems}, - title={Kokkos 3: Programming Model Extensions for the Exascale Era}, - year={2022}, - volume={33}, - number={4}, - pages={805-817}, - doi={10.1109/TPDS.2021.3097283}} -```` - -If you use more than one Kokkos EcoSystem package, please also cite: - -````BibTex -@ARTICLE{9502936, - author={Trott, Christian and Berger-Vergiat, Luc and Poliakoff, David and Rajamanickam, Sivasankaran and Lebrun-Grandie, Damien and Madsen, Jonathan and Al Awar, Nader and Gligoric, Milos and Shipman, Galen and Womeldorff, Geoff}, - journal={Computing in Science Engineering}, - title={The Kokkos EcoSystem: Comprehensive Performance Portability for High Performance Computing}, - year={2021}, - volume={23}, - number={5}, - pages={10-18}, - doi={10.1109/MCSE.2021.3098509}} -```` - - -And if you feel generous: feel free to cite the original Kokkos paper which describes most of the basic Kokkos concepts: - -````BibTeX -@article{CarterEdwards20143202, - title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ", - journal = "Journal of Parallel and Distributed Computing ", - volume = "74", - number = "12", - pages = "3202 - 3216", - year = "2014", - note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ", - issn = "0743-7315", - doi = "https://doi.org/10.1016/j.jpdc.2014.07.003", - url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257", - author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland" -} -```` +Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html). -##### [LICENSE](https://github.com/kokkos/kokkos/blob/master/LICENSE) +# License [](https://opensource.org/licenses/BSD-3-Clause) Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software. +The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or +[here](https://github.com/kokkos/kokkos/blob/master/LICENSE). diff --git a/packages/kokkos/algorithms/src/CMakeLists.txt b/packages/kokkos/algorithms/src/CMakeLists.txt index 4b60d887f79bd7737a42554808ac1de0f4bb5ac7..597626b11115db2d8c78e54c9cb5d24af21c2731 100644 --- a/packages/kokkos/algorithms/src/CMakeLists.txt +++ b/packages/kokkos/algorithms/src/CMakeLists.txt @@ -11,6 +11,7 @@ FILE(GLOB ALGO_HEADERS *.hpp) FILE(GLOB ALGO_SOURCES *.cpp) LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp) +APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp) INSTALL ( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" diff --git a/packages/kokkos/algorithms/src/Kokkos_Random.hpp b/packages/kokkos/algorithms/src/Kokkos_Random.hpp index 59c11afd9a24b146792e384d831a896ec17b1ba3..1d85ffdfb911f7dd4981ab1b2f98c903270e4628 100644 --- a/packages/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/packages/kokkos/algorithms/src/Kokkos_Random.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_RANDOM_HPP #define KOKKOS_RANDOM_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM +#endif #include <Kokkos_Core.hpp> #include <Kokkos_Complex.hpp> @@ -648,63 +652,44 @@ struct Random_UniqueIndex { } }; -#ifdef KOKKOS_ENABLE_CUDA -template <class MemorySpace> -struct Random_UniqueIndex<Kokkos::Device<Kokkos::Cuda, MemorySpace>> { - using locks_view_type = - View<int**, Kokkos::Device<Kokkos::Cuda, MemorySpace>>; - KOKKOS_FUNCTION - static int get_state_idx(const locks_view_type& locks_) { -#ifdef __CUDA_ARCH__ - const int i_offset = - (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; - int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * - blockDim.x * blockDim.y * blockDim.z + - i_offset) % - locks_.extent(0); - while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { - i += blockDim.x * blockDim.y * blockDim.z; - if (i >= static_cast<int>(locks_.extent(0))) { - i = i_offset; - } - } - return i; -#else - (void)locks_; - return 0; -#endif - } -}; +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) + +#if defined(KOKKOS_ENABLE_CUDA) +#define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Cuda +#elif defined(KOKKOS_ENABLE_HIP) +#define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Experimental::HIP #endif -#ifdef KOKKOS_ENABLE_HIP template <class MemorySpace> struct Random_UniqueIndex< - Kokkos::Device<Kokkos::Experimental::HIP, MemorySpace>> { + Kokkos::Device<KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP, MemorySpace>> { using locks_view_type = - View<int**, Kokkos::Device<Kokkos::Experimental::HIP, MemorySpace>>; + View<int**, Kokkos::Device<KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP, + MemorySpace>>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { -#ifdef __HIP_DEVICE_COMPILE__ - const int i_offset = - (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; - int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * + KOKKOS_IF_ON_DEVICE(( + const int i_offset = + (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; + int i = + (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * blockDim.x * blockDim.y * blockDim.z + i_offset) % locks_.extent(0); - while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { - i += blockDim.x * blockDim.y * blockDim.z; - if (i >= static_cast<int>(locks_.extent(0))) { - i = i_offset; - } - } - return i; -#else - (void)locks_; - return 0; -#endif + while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { + i += blockDim.x * blockDim.y * blockDim.z; + if (i >= static_cast<int>(locks_.extent(0))) { + i = i_offset; + } + } + + return i;)) + KOKKOS_IF_ON_HOST(((void)locks_; return 0;)) } }; + +#undef KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP + #endif #ifdef KOKKOS_ENABLE_SYCL @@ -1279,7 +1264,6 @@ struct fill_random_functor_begin_end; template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 0, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1303,7 +1287,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 0, template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1331,7 +1314,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1, template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1361,7 +1343,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2, template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1392,7 +1373,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3, template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1424,7 +1404,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4, template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1458,7 +1437,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5, template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1494,7 +1472,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6, template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1532,7 +1509,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7, template <class ViewType, class RandomPool, int loops, class IndexType> struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8, IndexType> { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1569,34 +1545,57 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8, } }; -template <class ViewType, class RandomPool, class IndexType = int64_t> -void fill_random(ViewType a, RandomPool g, +template <class ExecutionSpace, class ViewType, class RandomPool, + class IndexType = int64_t> +void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, typename ViewType::const_value_type begin, typename ViewType::const_value_type end) { int64_t LDA = a.extent(0); if (LDA > 0) - parallel_for("Kokkos::fill_random", (LDA + 127) / 128, - Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128, - ViewType::Rank, IndexType>( - a, g, begin, end)); + parallel_for( + "Kokkos::fill_random", + Kokkos::RangePolicy<ExecutionSpace>(exec, 0, (LDA + 127) / 128), + Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128, + ViewType::Rank, IndexType>( + a, g, begin, end)); } } // namespace Impl +template <class ExecutionSpace, class ViewType, class RandomPool, + class IndexType = int64_t> +void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, + typename ViewType::const_value_type begin, + typename ViewType::const_value_type end) { + Impl::apply_to_view_of_static_rank( + [&](auto dst) { Kokkos::Impl::fill_random(exec, dst, g, begin, end); }, + a); +} + +template <class ExecutionSpace, class ViewType, class RandomPool, + class IndexType = int64_t> +void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, + typename ViewType::const_value_type range) { + fill_random(exec, a, g, 0, range); +} + template <class ViewType, class RandomPool, class IndexType = int64_t> void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin, typename ViewType::const_value_type end) { - Impl::apply_to_view_of_static_rank( - [&](auto dst) { Kokkos::Impl::fill_random(dst, g, begin, end); }, a); + fill_random(typename ViewType::execution_space{}, a, g, begin, end); } template <class ViewType, class RandomPool, class IndexType = int64_t> void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { - fill_random(a, g, 0, range); + fill_random(typename ViewType::execution_space{}, a, g, 0, range); } } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM +#endif #endif diff --git a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp index ce97de9b7dfb1f97116a18c91c714433e451eec8..ad0c2d47b6d20d2022b5c60e81e7268b53d47f13 100644 --- a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_SORT_HPP_ #define KOKKOS_SORT_HPP_ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT +#endif #include <Kokkos_Core.hpp> @@ -120,13 +124,13 @@ class BinSort { // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. - using src_view_type = typename std::conditional< + using src_view_type = std::conditional_t< Kokkos::is_view<SrcViewType>::value, Kokkos::View<typename SrcViewType::const_data_type, typename SrcViewType::array_layout, typename SrcViewType::device_type, Kokkos::MemoryTraits<Kokkos::RandomAccess> >, - typename SrcViewType::const_type>::type; + typename SrcViewType::const_type>; using perm_view_type = typename PermuteViewType::const_type; @@ -151,8 +155,11 @@ class BinSort { } }; - using execution_space = typename Space::execution_space; - using bin_op_type = BinSortOp; + // Naming this alias "execution_space" would be problematic since it would be + // considered as execution space for the various functors which might use + // another execution space through sort() or create_permute_vector(). + using exec_space = typename Space::execution_space; + using bin_op_type = BinSortOp; struct bin_count_tag {}; struct bin_offset_tag {}; @@ -171,13 +178,13 @@ class BinSort { // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. - using const_rnd_key_view_type = typename std::conditional< + using const_rnd_key_view_type = std::conditional_t< Kokkos::is_view<KeyViewType>::value, Kokkos::View<typename KeyViewType::const_data_type, typename KeyViewType::array_layout, typename KeyViewType::device_type, Kokkos::MemoryTraits<Kokkos::RandomAccess> >, - const_key_view_type>::type; + const_key_view_type>; using non_const_key_scalar = typename KeyViewType::non_const_value_type; using const_key_scalar = typename KeyViewType::const_value_type; @@ -220,6 +227,14 @@ class BinSort { range_begin(range_begin_), range_end(range_end_), sort_within_bins(sort_within_bins_) { + static_assert( + Kokkos::SpaceAccessibility<ExecutionSpace, + typename Space::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + if (bin_op.max_bins() <= 0) + Kokkos::abort( + "The number of bins in the BinSortOp object must be greater than 0!"); bin_count_atomic = Kokkos::View<int*, Space>( "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); bin_count_const = bin_count_atomic; @@ -235,7 +250,7 @@ class BinSort { BinSort(const_key_view_type keys_, int range_begin_, int range_end_, BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(execution_space{}, keys_, range_begin_, range_end_, bin_op_, + : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, sort_within_bins_) {} template <typename ExecutionSpace> @@ -245,13 +260,19 @@ class BinSort { BinSort(const_key_view_type keys_, BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(execution_space{}, keys_, bin_op_, sort_within_bins_) {} + : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} //---------------------------------------- // Create the permutation vector, the bin_offset array and the bin_count // array. Can be called again if keys changed - template <class ExecutionSpace = execution_space> - void create_permute_vector(const ExecutionSpace& exec = execution_space{}) { + template <class ExecutionSpace = exec_space> + void create_permute_vector(const ExecutionSpace& exec = exec_space{}) { + static_assert( + Kokkos::SpaceAccessibility<ExecutionSpace, + typename Space::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + const size_t len = range_end - range_begin; Kokkos::parallel_for( "Kokkos::Sort::BinCount", @@ -281,6 +302,17 @@ class BinSort { template <class ExecutionSpace, class ValuesViewType> void sort(const ExecutionSpace& exec, ValuesViewType const& values, int values_range_begin, int values_range_end) const { + static_assert( + Kokkos::SpaceAccessibility<ExecutionSpace, + typename Space::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + static_assert( + Kokkos::SpaceAccessibility< + ExecutionSpace, typename ValuesViewType::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "of the View argument!"); + using scratch_view_type = Kokkos::View<typename ValuesViewType::data_type, typename ValuesViewType::array_layout, @@ -340,7 +372,7 @@ class BinSort { template <class ValuesViewType> void sort(ValuesViewType const& values, int values_range_begin, int values_range_end) const { - execution_space exec; + exec_space exec; sort(exec, values, values_range_begin, values_range_end); exec.fence("Kokkos::Sort: fence after sorting"); } @@ -428,7 +460,7 @@ struct BinOp1D { BinOp1D() = default; - // Construct BinOp with number of bins, minimum value and maxuimum value + // Construct BinOp with number of bins, minimum value and maximum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, typename KeyViewType::const_value_type max) : max_bins_(max_bins__ + 1), @@ -554,11 +586,7 @@ struct min_max_functor { template <class ExecutionSpace, class ViewType> std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort( - const ExecutionSpace& exec, ViewType const& view, - bool const always_use_kokkos_sort = false) { - if (!always_use_kokkos_sort) { - if (Impl::try_std_sort(view, exec)) return; - } + const ExecutionSpace& exec, ViewType const& view) { using CompType = BinOp1D<ViewType>; Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result; @@ -596,12 +624,38 @@ std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort( bin_sort.sort(exec, view); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +template <class ExecutionSpace, class ViewType> +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload not taking bool always_use_kokkos_sort") +std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort( + const ExecutionSpace& exec, ViewType const& view, + bool const always_use_kokkos_sort) { + if (!always_use_kokkos_sort && Impl::try_std_sort(view, exec)) { + return; + } else { + sort(exec, view); + } +} +#endif + +template <class ViewType> +void sort(ViewType const& view) { + typename ViewType::execution_space exec; + sort(exec, view); + exec.fence("Kokkos::Sort: fence after sorting"); +} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 template <class ViewType> -void sort(ViewType const& view, bool const always_use_kokkos_sort = false) { +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload not taking bool always_use_kokkos_sort") +void sort(ViewType const& view, bool const always_use_kokkos_sort) { typename ViewType::execution_space exec; sort(exec, view, always_use_kokkos_sort); exec.fence("Kokkos::Sort: fence after sorting"); } +#endif template <class ExecutionSpace, class ViewType> std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort( @@ -635,4 +689,8 @@ void sort(ViewType view, size_t const begin, size_t const end) { } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT +#endif #endif diff --git a/packages/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp b/packages/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp index 2e3babbcf0af85e854dd896d52cec8c661171d53..3e0f731cf0528a7145703a04004e0c1095003422 100644 --- a/packages/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp +++ b/packages/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp @@ -44,59 +44,103 @@ #ifndef KOKKOS_STD_ALGORITHMS_HPP #define KOKKOS_STD_ALGORITHMS_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS +#endif /// \file Kokkos_StdAlgorithms.hpp /// \brief Kokkos counterparts for Standard C++ Library algorithms -#include <std_algorithms/Kokkos_Constraints.hpp> -#include <std_algorithms/Kokkos_RandomAccessIterator.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> +#include "std_algorithms/impl/Kokkos_Constraints.hpp" +#include "std_algorithms/impl/Kokkos_RandomAccessIterator.hpp" +#include "std_algorithms/Kokkos_BeginEnd.hpp" // distance -#include <std_algorithms/Kokkos_Distance.hpp> +#include "std_algorithms/Kokkos_Distance.hpp" + +// note that we categorize below the headers +// following the std classification. -// move, swap, iter_swap -#include "std_algorithms/Kokkos_ModifyingOperations.hpp" +// modifying ops +#include "std_algorithms/Kokkos_Swap.hpp" +#include "std_algorithms/Kokkos_IterSwap.hpp" -// find, find_if, find_if_not -// for_each, for_each_n -// mismatch -// equal -// count_if, count -// all_of, any_of, none_of -// adjacent_find -// lexicographical_compare -// search, search_n -// find_first_of, find_end -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> +// non-modifying sequence +#include "std_algorithms/Kokkos_AdjacentFind.hpp" +#include "std_algorithms/Kokkos_Count.hpp" +#include "std_algorithms/Kokkos_CountIf.hpp" +#include "std_algorithms/Kokkos_AllOf.hpp" +#include "std_algorithms/Kokkos_AnyOf.hpp" +#include "std_algorithms/Kokkos_NoneOf.hpp" +#include "std_algorithms/Kokkos_Equal.hpp" +#include "std_algorithms/Kokkos_Find.hpp" +#include "std_algorithms/Kokkos_FindIf.hpp" +#include "std_algorithms/Kokkos_FindIfNot.hpp" +#include "std_algorithms/Kokkos_FindEnd.hpp" +#include "std_algorithms/Kokkos_FindFirstOf.hpp" +#include "std_algorithms/Kokkos_ForEach.hpp" +#include "std_algorithms/Kokkos_ForEachN.hpp" +#include "std_algorithms/Kokkos_LexicographicalCompare.hpp" +#include "std_algorithms/Kokkos_Mismatch.hpp" +#include "std_algorithms/Kokkos_Search.hpp" +#include "std_algorithms/Kokkos_SearchN.hpp" -// replace, replace_copy_if, replace_copy, replace_if -// copy, copy_n, copy_backward, copy_if -// fill, fill_n -// transform -// generate, generate_n -// reverse, reverse_copy -// move, move_backward -// swap_ranges -// unique, unique_copy -// rotate, rotate_copy -// remove, remove_if, remove_copy, remove_copy_if -// shift_left, shift_right -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> +// modifying sequence +#include "std_algorithms/Kokkos_Fill.hpp" +#include "std_algorithms/Kokkos_FillN.hpp" +#include "std_algorithms/Kokkos_Replace.hpp" +#include "std_algorithms/Kokkos_ReplaceIf.hpp" +#include "std_algorithms/Kokkos_ReplaceCopyIf.hpp" +#include "std_algorithms/Kokkos_ReplaceCopy.hpp" +#include "std_algorithms/Kokkos_Copy.hpp" +#include "std_algorithms/Kokkos_CopyN.hpp" +#include "std_algorithms/Kokkos_CopyBackward.hpp" +#include "std_algorithms/Kokkos_CopyIf.hpp" +#include "std_algorithms/Kokkos_Transform.hpp" +#include "std_algorithms/Kokkos_Generate.hpp" +#include "std_algorithms/Kokkos_GenerateN.hpp" +#include "std_algorithms/Kokkos_Reverse.hpp" +#include "std_algorithms/Kokkos_ReverseCopy.hpp" +#include "std_algorithms/Kokkos_Move.hpp" +#include "std_algorithms/Kokkos_MoveBackward.hpp" +#include "std_algorithms/Kokkos_SwapRanges.hpp" +#include "std_algorithms/Kokkos_Unique.hpp" +#include "std_algorithms/Kokkos_UniqueCopy.hpp" +#include "std_algorithms/Kokkos_Rotate.hpp" +#include "std_algorithms/Kokkos_RotateCopy.hpp" +#include "std_algorithms/Kokkos_Remove.hpp" +#include "std_algorithms/Kokkos_RemoveIf.hpp" +#include "std_algorithms/Kokkos_RemoveCopy.hpp" +#include "std_algorithms/Kokkos_RemoveCopyIf.hpp" +#include "std_algorithms/Kokkos_ShiftLeft.hpp" +#include "std_algorithms/Kokkos_ShiftRight.hpp" -// is_sorted_until, is_sorted -#include <std_algorithms/Kokkos_SortingOperations.hpp> +// sorting +#include "std_algorithms/Kokkos_IsSortedUntil.hpp" +#include "std_algorithms/Kokkos_IsSorted.hpp" -// min_element, max_element, minmax_element -#include <std_algorithms/Kokkos_MinMaxElementOperations.hpp> +// min/max element +#include "std_algorithms/Kokkos_MinElement.hpp" +#include "std_algorithms/Kokkos_MaxElement.hpp" +#include "std_algorithms/Kokkos_MinMaxElement.hpp" -// is_partitioned, partition_copy, partition_point -#include <std_algorithms/Kokkos_PartitioningOperations.hpp> +// partitioning +#include "std_algorithms/Kokkos_IsPartitioned.hpp" +#include "std_algorithms/Kokkos_PartitionCopy.hpp" +#include "std_algorithms/Kokkos_PartitionPoint.hpp" -// adjacent_difference -// reduce, transform_reduce -// exclusive_scan, transform_exclusive_scan -// inclusive_scan, transform_inclusive_scan -#include <std_algorithms/Kokkos_Numeric.hpp> +// numeric +#include "std_algorithms/Kokkos_AdjacentDifference.hpp" +#include "std_algorithms/Kokkos_Reduce.hpp" +#include "std_algorithms/Kokkos_TransformReduce.hpp" +#include "std_algorithms/Kokkos_ExclusiveScan.hpp" +#include "std_algorithms/Kokkos_TransformExclusiveScan.hpp" +#include "std_algorithms/Kokkos_InclusiveScan.hpp" +#include "std_algorithms/Kokkos_TransformInclusiveScan.hpp" +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS +#endif #endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp similarity index 72% rename from packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp rename to packages/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp index 03e5fd6aeb09975f6ced7da152c577b1a7fd3cc9..0a7cf06f5bdb166a566a1dd8ef517bbea47b6976 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp @@ -42,106 +42,15 @@ //@HEADER */ -#ifndef KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP -#define KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP +#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP +#define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP -#include <Kokkos_Core.hpp> -#include "../Kokkos_BeginEnd.hpp" -#include "../Kokkos_Constraints.hpp" -#include "../Kokkos_Distance.hpp" +#include "impl/Kokkos_AdjacentDifference.hpp" +#include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { -namespace Impl { -// ------------------------ -// -// functors -// -// ------------------------ -template <class ValueType1, class ValueType2, class RetType = ValueType2> -struct StdAdjacentDifferenceDefaultBinaryOpFunctor { - KOKKOS_FUNCTION - constexpr RetType operator()(const ValueType1& a, const ValueType2& b) const { - return a - b; - } -}; - -template <class InputIteratorType, class OutputIteratorType, - class BinaryOperator> -struct StdAdjacentDiffFunctor { - using index_type = typename InputIteratorType::difference_type; - - const InputIteratorType m_first_from; - const OutputIteratorType m_first_dest; - BinaryOperator m_op; - - KOKKOS_FUNCTION - void operator()(const index_type i) const { - const auto& my_value = m_first_from[i]; - if (i == 0) { - m_first_dest[i] = my_value; - } else { - const auto& left_value = m_first_from[i - 1]; - m_first_dest[i] = m_op(my_value, left_value); - } - } - - KOKKOS_FUNCTION - StdAdjacentDiffFunctor(InputIteratorType first_from, - OutputIteratorType first_dest, BinaryOperator op) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_op(std::move(op)) {} -}; - -// ------------------------------------------ -// adjacent_difference_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOp> -OutputIteratorType adjacent_difference_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - BinaryOp bin_op) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - if (first_from == last_from) { - return first_dest; - } - - // aliases - using value_type = typename OutputIteratorType::value_type; - using aux_view_type = ::Kokkos::View<value_type*, ExecutionSpace>; - using functor_t = - StdAdjacentDiffFunctor<InputIteratorType, OutputIteratorType, BinaryOp>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - aux_view_type aux_view("aux_view", num_elements); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - functor_t(first_from, first_dest, bin_op)); - ex.fence("Kokkos::adjacent_difference: fence after operation"); - - // return - return first_dest + num_elements; -} - -} // end namespace Impl - -// ------------------------ -// -// public API -// -// ------------------------ template <class ExecutionSpace, class InputIteratorType, class OutputIteratorType> std::enable_if_t<!::Kokkos::is_view<InputIteratorType>::value, diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp new file mode 100644 index 0000000000000000000000000000000000000000..332f9dd3693c6d14a77cc72700708d55681d8f5e --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP +#define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP + +#include "impl/Kokkos_AdjacentFind.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set1 +template <class ExecutionSpace, class IteratorType> +IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", + ex, first, last); +} + +template <class ExecutionSpace, class IteratorType> +IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::adjacent_find_impl(label, ex, first, last); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto adjacent_find(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, + KE::begin(v), KE::end(v)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto adjacent_find(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v)); +} + +// overload set2 +template <class ExecutionSpace, class IteratorType, class BinaryPredicateType> +IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, + IteratorType last, BinaryPredicateType pred) { + return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", + ex, first, last, pred); +} + +template <class ExecutionSpace, class IteratorType, class BinaryPredicateType> +IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + BinaryPredicateType pred) { + return Impl::adjacent_find_impl(label, ex, first, last, pred); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class BinaryPredicateType> +auto adjacent_find(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + BinaryPredicateType pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, + KE::begin(v), KE::end(v), pred); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class BinaryPredicateType> +auto adjacent_find(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + BinaryPredicateType pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..66a49541f333b5f5048067898863c2aedb103b57 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_HPP +#define KOKKOS_STD_ALGORITHMS_ALL_OF_HPP + +#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class Predicate> +bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first, + last, predicate); +} + +template <class ExecutionSpace, class InputIterator, class Predicate> +bool all_of(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, Predicate predicate) { + return Impl::all_of_impl(label, ex, first, last, predicate); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +bool all_of(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v), + KE::cend(v), std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +bool all_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e50e90f6da3d480610af10d1cc421edfe322c995 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ANY_OF_HPP +#define KOKKOS_STD_ALGORITHMS_ANY_OF_HPP + +#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class Predicate> +bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last, + predicate); +} + +template <class ExecutionSpace, class InputIterator, class Predicate> +bool any_of(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, Predicate predicate) { + return Impl::any_of_impl(label, ex, first, last, predicate); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +bool any_of(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v), + KE::cend(v), std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +bool any_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp index beb53fdd70c31c9fd02ba2cffd822ee2567fdd09..544919619204837921e6cac925c5f52b3c9c73fd 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp @@ -46,8 +46,8 @@ #define KOKKOS_BEGIN_END_HPP #include <Kokkos_View.hpp> -#include "Kokkos_RandomAccessIterator.hpp" -#include "Kokkos_Constraints.hpp" +#include "impl/Kokkos_RandomAccessIterator.hpp" +#include "impl/Kokkos_Constraints.hpp" /// \file Kokkos_BeginEnd.hpp /// \brief Kokkos begin, end, cbegin, cend diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b3237041b7f3c64b1b74c66f63329c3169f1eb1a --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp @@ -0,0 +1,97 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_HPP + +#include "impl/Kokkos_CopyCopyN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator copy(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last, + d_first); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::copy_impl(label, ex, first, last, d_first); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto copy(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_impl("Kokkos::copy_view_api_default", ex, + KE::cbegin(source), KE::cend(source), KE::begin(dest)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto copy(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source), + KE::begin(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp new file mode 100644 index 0000000000000000000000000000000000000000..83efd96672b32b842f5296a22e8c156295ebccfc --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP + +#include "impl/Kokkos_CopyBackward.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default", + ex, first, last, d_last); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 d_last) { + return Impl::copy_backward_impl(label, ex, first, last, d_last); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto copy_backward(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex, + cbegin(source), cend(source), end(dest)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto copy_backward(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source), + end(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c83cc29886ca9889697b4f76675f0071f92db1f3 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_IF_HPP + +#include "impl/Kokkos_CopyIf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class Predicate> +OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first, + Predicate pred) { + return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first, + last, d_first, std::move(pred)); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class Predicate> +OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first, Predicate pred) { + return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class Predicate> +auto copy_if(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex, + cbegin(source), cend(source), begin(dest), + std::move(pred)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class Predicate> +auto copy_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_if_impl(label, ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7f3b9374c74d436670740609afd1490b04ab7d10 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp @@ -0,0 +1,98 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_N_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_N_HPP + +#include "impl/Kokkos_CopyCopyN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class Size, + class OutputIterator> +OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count, + OutputIterator result) { + return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first, + count, result); +} + +template <class ExecutionSpace, class InputIterator, class Size, + class OutputIterator> +OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex, + InputIterator first, Size count, OutputIterator result) { + return Impl::copy_n_impl(label, ex, first, count, result); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class Size, class DataType2, class... Properties2> +auto copy_n(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, Size count, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex, + KE::cbegin(source), count, KE::begin(dest)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class Size, class DataType2, class... Properties2> +auto copy_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, Size count, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_n_impl(label, ex, KE::cbegin(source), count, + KE::begin(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a885ee4ad2f1546122a65eebe329ff523eaa7893 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COUNT_HPP +#define KOKKOS_STD_ALGORITHMS_COUNT_HPP + +#include "impl/Kokkos_CountCountIf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class T> +typename IteratorType::difference_type count(const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + const T& value) { + return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last, + value); +} + +template <class ExecutionSpace, class IteratorType, class T> +typename IteratorType::difference_type count(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + const T& value) { + return Impl::count_impl(label, ex, first, last, value); +} + +template <class ExecutionSpace, class DataType, class... Properties, class T> +auto count(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v), + KE::cend(v), value); +} + +template <class ExecutionSpace, class DataType, class... Properties, class T> +auto count(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..98b9d74c4c4e0fa369380d8533f53cd3a7790320 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP +#define KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP + +#include "impl/Kokkos_CountCountIf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class Predicate> +typename IteratorType::difference_type count_if(const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + Predicate predicate) { + return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first, + last, std::move(predicate)); +} + +template <class ExecutionSpace, class IteratorType, class Predicate> +typename IteratorType::difference_type count_if(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + Predicate predicate) { + return Impl::count_if_impl(label, ex, first, last, std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +auto count_if(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex, + KE::cbegin(v), KE::cend(v), std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +auto count_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp index ced4370472a714fd9416836048fc6055532d77ea..4e148642b10e3e6d2bd68d6cca04fb58caf07839 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp @@ -45,8 +45,8 @@ #ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP #define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP -#include "Kokkos_Constraints.hpp" -#include "Kokkos_RandomAccessIterator.hpp" +#include "impl/Kokkos_Constraints.hpp" +#include "impl/Kokkos_RandomAccessIterator.hpp" namespace Kokkos { namespace Experimental { diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8634019fad23a7eaecc9faf13bcb951ff21bb372 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp @@ -0,0 +1,198 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_EQUAL_HPP +#define KOKKOS_STD_ALGORITHMS_EQUAL_HPP + +#include "impl/Kokkos_Equal.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, + last1, first2); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2) { + return Impl::equal_impl(label, ex, first1, last1, first2); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { + return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, + last1, first2, std::move(predicate)); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_impl(label, ex, first1, last1, first2, + std::move(predicate)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +bool equal(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + ::Kokkos::View<DataType2, Properties2...>& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +bool equal(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + ::Kokkos::View<DataType2, Properties2...>& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +bool equal(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + ::Kokkos::View<DataType2, Properties2...>& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + std::move(predicate)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +bool equal(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + ::Kokkos::View<DataType2, Properties2...>& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, + last1, first2, last2); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_impl(label, ex, first1, last1, first2, last2); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, + last1, first2, last2, std::move(predicate)); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_impl(label, ex, first1, last1, first2, last2, + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b97710f24f2122c637f294b10fe79080266d80c8 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp @@ -0,0 +1,190 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP +#define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP + +#include "impl/Kokkos_ExclusiveScan.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1 +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + return Impl::exclusive_scan_default_op_impl( + "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last, + first_dest, init_value); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +exclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value) { + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + return Impl::exclusive_scan_default_op_impl(label, ex, first, last, + first_dest, init_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType> +auto exclusive_scan(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_default_op_impl( + "Kokkos::exclusive_scan_default_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + init_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType> +auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), init_value); +} + +// overload set 2 +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType, class BinaryOpType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(ex); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + return Impl::exclusive_scan_custom_op_impl( + "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last, + first_dest, init_value, bop); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType, class BinaryOpType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +exclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value, + BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(ex); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest, + init_value, bop); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType, + class BinaryOpType> +auto exclusive_scan(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_custom_op_impl( + "Kokkos::exclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + init_value, bop); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType, + class BinaryOpType> +auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_custom_op_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), init_value, bop); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp new file mode 100644 index 0000000000000000000000000000000000000000..200e03b9dcd2a6ea7d871820f8f6fb746e87a82c --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp @@ -0,0 +1,86 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FILL_HPP +#define KOKKOS_STD_ALGORITHMS_FILL_HPP + +#include "impl/Kokkos_FillFillN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class T> +void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last, + const T& value) { + Impl::fill_impl("Kokkos::fill_iterator_api_default", ex, first, last, value); +} + +template <class ExecutionSpace, class IteratorType, class T> +void fill(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, const T& value) { + Impl::fill_impl(label, ex, first, last, value); +} + +template <class ExecutionSpace, class DataType, class... Properties, class T> +void fill(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + Impl::fill_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view), + value); +} + +template <class ExecutionSpace, class DataType, class... Properties, class T> +void fill(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + Impl::fill_impl(label, ex, begin(view), end(view), value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2e814dc55f781c67ed8c6960a97605265c50ff94 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp @@ -0,0 +1,91 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FILL_N_HPP +#define KOKKOS_STD_ALGORITHMS_FILL_N_HPP + +#include "impl/Kokkos_FillFillN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class SizeType, class T> +IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n, + const T& value) { + return Impl::fill_n_impl("Kokkos::fill_n_iterator_api_default", ex, first, n, + value); +} + +template <class ExecutionSpace, class IteratorType, class SizeType, class T> +IteratorType fill_n(const std::string& label, const ExecutionSpace& ex, + IteratorType first, SizeType n, const T& value) { + return Impl::fill_n_impl(label, ex, first, n, value); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class SizeType, class T> +auto fill_n(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, SizeType n, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::fill_n_impl("Kokkos::fill_n_view_api_default", ex, begin(view), + n, value); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class SizeType, class T> +auto fill_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, SizeType n, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::fill_n_impl(label, ex, begin(view), n, value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6758f00ce4e6572d8366acac028656b6f0831427 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_HPP + +#include "impl/Kokkos_FindIfOrNot.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class T> +InputIterator find(const ExecutionSpace& ex, InputIterator first, + InputIterator last, const T& value) { + return Impl::find_impl("Kokkos::find_iterator_api_default", ex, first, last, + value); +} + +template <class ExecutionSpace, class InputIterator, class T> +InputIterator find(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, const T& value) { + return Impl::find_impl(label, ex, first, last, value); +} + +template <class ExecutionSpace, class DataType, class... Properties, class T> +auto find(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_impl("Kokkos::find_view_api_default", ex, KE::begin(view), + KE::end(view), value); +} + +template <class ExecutionSpace, class DataType, class... Properties, class T> +auto find(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_impl(label, ex, KE::begin(view), KE::end(view), value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp new file mode 100644 index 0000000000000000000000000000000000000000..61b54c822550a9e6d1043a9eaabac5bcb89bb1c6 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp @@ -0,0 +1,149 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_END_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_END_HPP + +#include "impl/Kokkos_FindEnd.hpp" +#include "Kokkos_Equal.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1: no binary predicate passed +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, + last, s_first, s_last); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + return Impl::find_end_impl(label, ex, first, last, s_first, s_last); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto find_end(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), KE::begin(s_view), + KE::end(s_view)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto find_end(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, + last, s_first, s_last, pred); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_end_impl(label, ex, first, last, s_first, s_last, pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +auto find_end(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +auto find_end(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b8c27cb272ea0d102714fb6c38c4b0087e194cfd --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp @@ -0,0 +1,150 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_HPP + +#include "impl/Kokkos_FindFirstOf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1: no binary predicate passed +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", + ex, first, last, s_first, s_last); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto find_first_of(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto find_first_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", + ex, first, last, s_first, s_last, pred); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last, + pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +auto find_first_of(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +auto find_first_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..54896da1176155579a912220a739d22764fba113 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_IF_HPP + +#include "impl/Kokkos_FindIfOrNot.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class PredicateType> +IteratorType find_if(const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType predicate) { + return Impl::find_if_or_not_impl<true>("Kokkos::find_if_iterator_api_default", + ex, first, last, std::move(predicate)); +} + +template <class ExecutionSpace, class IteratorType, class PredicateType> +IteratorType find_if(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType predicate) { + return Impl::find_if_or_not_impl<true>(label, ex, first, last, + std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +auto find_if(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_impl<true>("Kokkos::find_if_view_api_default", ex, + KE::begin(v), KE::end(v), + std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +auto find_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_impl<true>(label, ex, KE::begin(v), KE::end(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cfe6bb84d8f50e18535e7ce07e8acac996b782e8 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp @@ -0,0 +1,98 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_NOT_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_IF_NOT_HPP + +#include "impl/Kokkos_FindIfOrNot.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class Predicate> +IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first, + IteratorType last, Predicate predicate) { + return Impl::find_if_or_not_impl<false>( + "Kokkos::find_if_not_iterator_api_default", ex, first, last, + std::move(predicate)); +} + +template <class ExecutionSpace, class IteratorType, class Predicate> +IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::find_if_or_not_impl<false>(label, ex, first, last, + std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +auto find_if_not(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_impl<false>( + "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v), + std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +auto find_if_not(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_impl<false>(label, ex, KE::begin(v), KE::end(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8a2f90e82bf3faedef85cc21610259b98b80a3d2 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_HPP +#define KOKKOS_STD_ALGORITHMS_FOR_EACH_HPP + +#include "impl/Kokkos_ForEachForEachN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class UnaryFunctorType> +UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + return Impl::for_each_impl(label, ex, first, last, std::move(functor)); +} + +template <class ExecutionSpace, class IteratorType, class UnaryFunctorType> +UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, + IteratorType last, UnaryFunctorType functor) { + return Impl::for_each_impl("Kokkos::for_each_iterator_api_default", ex, first, + last, std::move(functor)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType> +UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_impl(label, ex, KE::begin(v), KE::end(v), + std::move(functor)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType> +UnaryFunctorType for_each(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_impl("Kokkos::for_each_view_api_default", ex, + KE::begin(v), KE::end(v), std::move(functor)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dd917a33e89790fa70384d4cc0b16905d37c6a32 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp @@ -0,0 +1,96 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_N_HPP +#define KOKKOS_STD_ALGORITHMS_FOR_EACH_N_HPP + +#include "impl/Kokkos_ForEachForEachN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType> +IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex, + IteratorType first, SizeType n, + UnaryFunctorType functor) { + return Impl::for_each_n_impl(label, ex, first, n, std::move(functor)); +} + +template <class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType> +IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first, + SizeType n, UnaryFunctorType functor) { + return Impl::for_each_n_impl("Kokkos::for_each_n_iterator_api_default", ex, + first, n, std::move(functor)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class SizeType, class UnaryFunctorType> +auto for_each_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, SizeType n, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_n_impl(label, ex, KE::begin(v), n, std::move(functor)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class SizeType, class UnaryFunctorType> +auto for_each_n(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, SizeType n, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_n_impl("Kokkos::for_each_n_view_api_default", ex, + KE::begin(v), n, std::move(functor)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp new file mode 100644 index 0000000000000000000000000000000000000000..955cb42d4b18c4818316d995492f6d99b922a219 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp @@ -0,0 +1,91 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_GENERATE_HPP +#define KOKKOS_STD_ALGORITHMS_GENERATE_HPP + +#include "impl/Kokkos_GenerateGenerateN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class Generator> +void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last, + Generator g) { + Impl::generate_impl("Kokkos::generate_iterator_api_default", ex, first, last, + std::move(g)); +} + +template <class ExecutionSpace, class IteratorType, class Generator> +void generate(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, Generator g) { + Impl::generate_impl(label, ex, first, last, std::move(g)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Generator> +void generate(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + Impl::generate_impl("Kokkos::generate_view_api_default", ex, begin(view), + end(view), std::move(g)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Generator> +void generate(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + Impl::generate_impl(label, ex, begin(view), end(view), std::move(g)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..470edb15966fb08adbc131df3347d7aac82a087d --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp @@ -0,0 +1,93 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_GENERATE_N_HPP +#define KOKKOS_STD_ALGORITHMS_GENERATE_N_HPP + +#include "impl/Kokkos_GenerateGenerateN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class Size, class Generator> +IteratorType generate_n(const ExecutionSpace& ex, IteratorType first, + Size count, Generator g) { + Impl::generate_n_impl("Kokkos::generate_n_iterator_api_default", ex, first, + count, std::move(g)); + return first + count; +} + +template <class ExecutionSpace, class IteratorType, class Size, class Generator> +IteratorType generate_n(const std::string& label, const ExecutionSpace& ex, + IteratorType first, Size count, Generator g) { + Impl::generate_n_impl(label, ex, first, count, std::move(g)); + return first + count; +} + +template <class ExecutionSpace, class DataType, class... Properties, class Size, + class Generator> +auto generate_n(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, Size count, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::generate_n_impl("Kokkos::generate_n_view_api_default", ex, + begin(view), count, std::move(g)); +} + +template <class ExecutionSpace, class DataType, class... Properties, class Size, + class Generator> +auto generate_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, Size count, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::generate_n_impl(label, ex, begin(view), count, std::move(g)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c34b5f43c9d426b91be882b315e3675b2b6872ca --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp @@ -0,0 +1,223 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_HPP +#define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_HPP + +#include "impl/Kokkos_InclusiveScan.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1 +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_impl( + "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last, + first_dest); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_impl(label, ex, first, last, + first_dest); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto inclusive_scan( + const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_default_op_impl( + "Kokkos::inclusive_scan_default_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto inclusive_scan( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest)); +} + +// overload set 2 (accepting custom binary op) +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOp> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_impl( + "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, + first_dest, binary_op); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOp> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_impl(label, ex, first, last, + first_dest, binary_op); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryOp> +auto inclusive_scan(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + BinaryOp binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_impl( + "Kokkos::inclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + binary_op); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryOp> +auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + BinaryOp binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op); +} + +// overload set 3 +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOp, class ValueType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + return Impl::inclusive_scan_custom_binary_op_impl( + "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, + first_dest, binary_op, init_value); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOp, class ValueType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOp binary_op, + ValueType init_value) { + return Impl::inclusive_scan_custom_binary_op_impl( + label, ex, first, last, first_dest, binary_op, init_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryOp, + class ValueType> +auto inclusive_scan(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + BinaryOp binary_op, ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_impl( + "Kokkos::inclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + binary_op, init_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryOp, + class ValueType> +auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + BinaryOp binary_op, ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, init_value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8a2ca207ae6d01756498f24e8daa14fbed838a63 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_HPP +#define KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_HPP + +#include "impl/Kokkos_IsPartitioned.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class PredicateType> +bool is_partitioned(const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType p) { + return Impl::is_partitioned_impl( + "Kokkos::is_partitioned_iterator_api_default", ex, first, last, + std::move(p)); +} + +template <class ExecutionSpace, class IteratorType, class PredicateType> +bool is_partitioned(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, PredicateType p) { + return Impl::is_partitioned_impl(label, ex, first, last, std::move(p)); +} + +template <class ExecutionSpace, class PredicateType, class DataType, + class... Properties> +bool is_partitioned(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + PredicateType p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::is_partitioned_impl("Kokkos::is_partitioned_view_api_default", + ex, cbegin(v), cend(v), std::move(p)); +} + +template <class ExecutionSpace, class PredicateType, class DataType, + class... Properties> +bool is_partitioned(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + PredicateType p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::is_partitioned_impl(label, ex, cbegin(v), cend(v), std::move(p)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0ab466f3389564aeddd1b4f8482458c1baf644da --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp @@ -0,0 +1,131 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_HPP +#define KOKKOS_STD_ALGORITHMS_IS_SORTED_HPP + +#include "impl/Kokkos_IsSorted.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType> +bool is_sorted(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, + first, last); +} + +template <class ExecutionSpace, class IteratorType> +bool is_sorted(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::is_sorted_impl(label, ex, first, last); +} + +template <class ExecutionSpace, class DataType, class... Properties> +bool is_sorted(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +bool is_sorted(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view)); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, + first, last, std::move(comp)); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +bool is_sorted(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + return Impl::is_sorted_impl(label, ex, first, last, std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class ComparatorType> +bool is_sorted(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(ex); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view), + std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class ComparatorType> +bool is_sorted(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(ex); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view), + std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c480d9ee5a2e4335add84889862617c98cc5b9cf --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp @@ -0,0 +1,134 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_HPP +#define KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_HPP + +#include "impl/Kokkos_IsSortedUntil.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType> +IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::is_sorted_until_impl( + "Kokkos::is_sorted_until_iterator_api_default", ex, first, last); +} + +template <class ExecutionSpace, class IteratorType> +IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::is_sorted_until_impl(label, ex, first, last); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto is_sorted_until(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", + ex, KE::begin(view), KE::end(view)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view)); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + return Impl::is_sorted_until_impl( + "Kokkos::is_sorted_until_iterator_api_default", ex, first, last, + std::move(comp)); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::is_sorted_until_impl(label, ex, first, last, std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class ComparatorType> +auto is_sorted_until(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(ex); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", + ex, KE::begin(view), KE::end(view), + std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class ComparatorType> +auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(ex); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view), + std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp similarity index 79% rename from packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp rename to packages/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp index f8ca3456e5060cdc370fa2720936fdb3136b8738..1174740a5b1ac1dca656a3663ce889bf773305fc 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp @@ -42,38 +42,17 @@ //@HEADER */ -#ifndef KOKKOS_MODIFYING_OPERATIONS_HPP -#define KOKKOS_MODIFYING_OPERATIONS_HPP +#ifndef KOKKOS_STD_ALGORITHMS_ITER_SWAP_HPP +#define KOKKOS_STD_ALGORITHMS_ITER_SWAP_HPP #include <Kokkos_Core.hpp> -#include "Kokkos_BeginEnd.hpp" -#include "Kokkos_Constraints.hpp" +#include "impl/Kokkos_Constraints.hpp" +#include "Kokkos_Swap.hpp" namespace Kokkos { namespace Experimental { - -// move -template <typename T> -KOKKOS_INLINE_FUNCTION std::remove_reference_t<T>&& move(T&& t) { - return static_cast<std::remove_reference_t<T>&&>(t); -} - -// swap -template <class T> -KOKKOS_INLINE_FUNCTION void swap(T& a, T& b) noexcept { - static_assert( - std::is_move_assignable<T>::value && std::is_move_constructible<T>::value, - "Kokkos::Experimental::swap arguments must be move assignable " - "and move constructible"); - - T tmp = std::move(a); - a = std::move(b); - b = std::move(tmp); -} - -//---------------------------------------------------------------------------- -// this is here because we use the swap function above namespace Impl { + template <class IteratorType1, class IteratorType2> struct StdIterSwapFunctor { IteratorType1 m_a; diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4704a9ec5664e3bc7d18559e65ee5d5f90157c71 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp @@ -0,0 +1,154 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_HPP +#define KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_HPP + +#include "impl/Kokkos_LexicographicalCompare.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2) { + return Impl::lexicographical_compare_impl( + "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, + first2, last2); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, + last2); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +bool lexicographical_compare( + const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + ::Kokkos::View<DataType2, Properties2...>& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_impl( + "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +bool lexicographical_compare( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + ::Kokkos::View<DataType2, Properties2...>& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType> +bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2, ComparatorType comp) { + return Impl::lexicographical_compare_impl( + "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, + first2, last2, comp); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType> +bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + ComparatorType comp) { + return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, + last2, comp); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType> +bool lexicographical_compare( + const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + ::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_impl( + "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType> +bool lexicographical_compare( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + ::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5f6e5cbf62e28bae181663f14e44bcf2ee16bdab --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp @@ -0,0 +1,132 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MAX_ELEMENT_HPP +#define KOKKOS_STD_ALGORITHMS_MAX_ELEMENT_HPP + +#include "impl/Kokkos_MinMaxMinmaxElement.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType> +auto max_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::min_or_max_element_impl<MaxFirstLoc>( + "Kokkos::max_element_iterator_api_default", ex, first, last); +} + +template <class ExecutionSpace, class IteratorType> +auto max_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_impl<MaxFirstLoc>(label, ex, first, last); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +auto max_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>( + "Kokkos::max_element_iterator_api_default", ex, first, last, + std::move(comp)); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +auto max_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>( + label, ex, first, last, std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto max_element(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_impl<MaxFirstLoc>( + "Kokkos::max_element_view_api_default", ex, begin(v), end(v)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto max_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_impl<MaxFirstLoc>(label, ex, begin(v), + end(v)); +} + +template <class ExecutionSpace, class DataType, class ComparatorType, + class... Properties> +auto max_element(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>( + "Kokkos::max_element_view_api_default", ex, begin(v), end(v), + std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class ComparatorType, + class... Properties> +auto max_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>( + label, ex, begin(v), end(v), std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp new file mode 100644 index 0000000000000000000000000000000000000000..63cc5489607a6f5b5f525ee9aa61dfe1ec8a1962 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp @@ -0,0 +1,132 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MIN_ELEMENT_HPP +#define KOKKOS_STD_ALGORITHMS_MIN_ELEMENT_HPP + +#include "impl/Kokkos_MinMaxMinmaxElement.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType> +auto min_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::min_or_max_element_impl<MinFirstLoc>( + "Kokkos::min_element_iterator_api_default", ex, first, last); +} + +template <class ExecutionSpace, class IteratorType> +auto min_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_impl<MinFirstLoc>(label, ex, first, last); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +auto min_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>( + "Kokkos::min_element_iterator_api_default", ex, first, last, + std::move(comp)); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +auto min_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>( + label, ex, first, last, std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto min_element(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_impl<MinFirstLoc>( + "Kokkos::min_element_view_api_default", ex, begin(v), end(v)); +} + +template <class ExecutionSpace, class DataType, class ComparatorType, + class... Properties> +auto min_element(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>( + "Kokkos::min_element_view_api_default", ex, begin(v), end(v), + std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto min_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_impl<MinFirstLoc>(label, ex, begin(v), + end(v)); +} + +template <class ExecutionSpace, class DataType, class ComparatorType, + class... Properties> +auto min_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>( + label, ex, begin(v), end(v), std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp new file mode 100644 index 0000000000000000000000000000000000000000..07cdefcc05658ab5bd4edc8d47d592308a56aede --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp @@ -0,0 +1,133 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MINMAX_ELEMENT_HPP +#define KOKKOS_STD_ALGORITHMS_MINMAX_ELEMENT_HPP + +#include "impl/Kokkos_MinMaxMinmaxElement.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType> +auto minmax_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::minmax_element_impl<MinMaxFirstLastLoc>( + "Kokkos::minmax_element_iterator_api_default", ex, first, last); +} + +template <class ExecutionSpace, class IteratorType> +auto minmax_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::minmax_element_impl<MinMaxFirstLastLoc>(label, ex, first, last); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +auto minmax_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>( + "Kokkos::minmax_element_iterator_api_default", ex, first, last, + std::move(comp)); +} + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +auto minmax_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>( + label, ex, first, last, std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto minmax_element(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::minmax_element_impl<MinMaxFirstLastLoc>( + "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto minmax_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::minmax_element_impl<MinMaxFirstLastLoc>(label, ex, begin(v), + end(v)); +} + +template <class ExecutionSpace, class DataType, class ComparatorType, + class... Properties> +auto minmax_element(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>( + "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v), + std::move(comp)); +} + +template <class ExecutionSpace, class DataType, class ComparatorType, + class... Properties> +auto minmax_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>( + label, ex, begin(v), end(v), std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp deleted file mode 100644 index aa8f5ba3760c7569fb7cb31fd80fd1fe76eda197..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp +++ /dev/null @@ -1,409 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_STD_MIN_MAX_ELEMENT_OPERATIONS_HPP -#define KOKKOS_STD_MIN_MAX_ELEMENT_OPERATIONS_HPP - -#include <Kokkos_Core.hpp> -#include "Kokkos_BeginEnd.hpp" -#include "Kokkos_Constraints.hpp" -#include "Kokkos_Distance.hpp" -#include "Kokkos_ModifyingOperations.hpp" - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template <class IteratorType, class ReducerType> -struct StdMinOrMaxElemFunctor { - using index_type = typename IteratorType::difference_type; - using red_value_type = typename ReducerType::value_type; - - IteratorType m_first; - ReducerType m_reducer; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& red_value) const { - m_reducer.join(red_value, red_value_type{m_first[i], i}); - } - - KOKKOS_FUNCTION - StdMinOrMaxElemFunctor(IteratorType first, ReducerType reducer) - : m_first(std::move(first)), m_reducer(std::move(reducer)) {} -}; - -template <class IteratorType, class ReducerType> -struct StdMinMaxElemFunctor { - using index_type = typename IteratorType::difference_type; - using red_value_type = typename ReducerType::value_type; - IteratorType m_first; - ReducerType m_reducer; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& red_value) const { - const auto& my_value = m_first[i]; - m_reducer.join(red_value, red_value_type{my_value, my_value, i, i}); - } - - KOKKOS_FUNCTION - StdMinMaxElemFunctor(IteratorType first, ReducerType reducer) - : m_first(std::move(first)), m_reducer(std::move(reducer)) {} -}; - -// ------------------------------------------ -// min_or_max_element_impl -// ------------------------------------------ -template <template <class... Args> class ReducerType, class ExecutionSpace, - class IteratorType, class... Args> -IteratorType min_or_max_element_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, IteratorType last, - Args&&... args) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - if (first == last) { - return last; - } - - // aliases - using index_type = typename IteratorType::difference_type; - using value_type = typename IteratorType::value_type; - using reducer_type = ReducerType<value_type, index_type, Args...>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = StdMinOrMaxElemFunctor<IteratorType, reducer_type>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result, std::forward<Args>(args)...); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, reducer), reducer); - - // fence not needed because reducing into scalar - - // return - return first + red_result.loc; -} - -// ------------------------------------------ -// minmax_element_impl -// ------------------------------------------ -template <template <class... Args> class ReducerType, class ExecutionSpace, - class IteratorType, class... Args> -::Kokkos::pair<IteratorType, IteratorType> minmax_element_impl( - const std::string& label, const ExecutionSpace& ex, IteratorType first, - IteratorType last, Args&&... args) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - if (first == last) { - return {first, first}; - } - - // aliases - using index_type = typename IteratorType::difference_type; - using value_type = typename IteratorType::value_type; - using reducer_type = ReducerType<value_type, index_type, Args...>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = StdMinMaxElemFunctor<IteratorType, reducer_type>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result, std::forward<Args>(args)...); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, reducer), reducer); - - // fence not needed because reducing into scalar - - // return - return {first + red_result.min_loc, first + red_result.max_loc}; -} - -} // end namespace Impl - -// ---------------------- -// min_element public API -// ---------------------- -template <class ExecutionSpace, class IteratorType> -auto min_element(const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::min_or_max_element_impl<MinFirstLoc>( - "Kokkos::min_element_iterator_api_default", ex, first, last); -} - -template <class ExecutionSpace, class IteratorType> -auto min_element(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl<MinFirstLoc>(label, ex, first, last); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -auto min_element(const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>( - "Kokkos::min_element_iterator_api_default", ex, first, last, - std::move(comp)); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -auto min_element(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>( - label, ex, first, last, std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto min_element(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - return Impl::min_or_max_element_impl<MinFirstLoc>( - "Kokkos::min_element_view_api_default", ex, begin(v), end(v)); -} - -template <class ExecutionSpace, class DataType, class ComparatorType, - class... Properties> -auto min_element(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>( - "Kokkos::min_element_view_api_default", ex, begin(v), end(v), - std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto min_element(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - return Impl::min_or_max_element_impl<MinFirstLoc>(label, ex, begin(v), - end(v)); -} - -template <class ExecutionSpace, class DataType, class ComparatorType, - class... Properties> -auto min_element(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::min_or_max_element_impl<MinFirstLocCustomComparator>( - label, ex, begin(v), end(v), std::move(comp)); -} - -// ---------------------- -// max_element public API -// ---------------------- -template <class ExecutionSpace, class IteratorType> -auto max_element(const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::min_or_max_element_impl<MaxFirstLoc>( - "Kokkos::max_element_iterator_api_default", ex, first, last); -} - -template <class ExecutionSpace, class IteratorType> -auto max_element(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl<MaxFirstLoc>(label, ex, first, last); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -auto max_element(const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>( - "Kokkos::max_element_iterator_api_default", ex, first, last, - std::move(comp)); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -auto max_element(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>( - label, ex, first, last, std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto max_element(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - return Impl::min_or_max_element_impl<MaxFirstLoc>( - "Kokkos::max_element_view_api_default", ex, begin(v), end(v)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto max_element(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - return Impl::min_or_max_element_impl<MaxFirstLoc>(label, ex, begin(v), - end(v)); -} - -template <class ExecutionSpace, class DataType, class ComparatorType, - class... Properties> -auto max_element(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>( - "Kokkos::max_element_view_api_default", ex, begin(v), end(v), - std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class ComparatorType, - class... Properties> -auto max_element(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::min_or_max_element_impl<MaxFirstLocCustomComparator>( - label, ex, begin(v), end(v), std::move(comp)); -} - -// ------------------------- -// minmax_element public API -// ------------------------- -template <class ExecutionSpace, class IteratorType> -auto minmax_element(const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::minmax_element_impl<MinMaxFirstLastLoc>( - "Kokkos::minmax_element_iterator_api_default", ex, first, last); -} - -template <class ExecutionSpace, class IteratorType> -auto minmax_element(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { - return Impl::minmax_element_impl<MinMaxFirstLastLoc>(label, ex, first, last); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -auto minmax_element(const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>( - "Kokkos::minmax_element_iterator_api_default", ex, first, last, - std::move(comp)); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -auto minmax_element(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>( - label, ex, first, last, std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto minmax_element(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - return Impl::minmax_element_impl<MinMaxFirstLastLoc>( - "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto minmax_element(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - return Impl::minmax_element_impl<MinMaxFirstLastLoc>(label, ex, begin(v), - end(v)); -} - -template <class ExecutionSpace, class DataType, class ComparatorType, - class... Properties> -auto minmax_element(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>( - "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v), - std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class ComparatorType, - class... Properties> -auto minmax_element(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::minmax_element_impl<MinMaxFirstLastLocCustomComparator>( - label, ex, begin(v), end(v), std::move(comp)); -} - -} // namespace Experimental -} // namespace Kokkos - -#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3418e048a94f15a6ff01c5497f3281660049812e --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp @@ -0,0 +1,160 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MISMATCH_HPP +#define KOKKOS_STD_ALGORITHMS_MISMATCH_HPP + +#include "impl/Kokkos_Mismatch.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// FIXME: add mismatch overloads accepting 3 iterators. +// An overload consistent with other algorithms: +// +// auto mismatch(const ExecSpace& ex, It1 first1, It1 last1, It2 first2) {...} +// +// makes API ambiguous (with the overload accepting views). + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +::Kokkos::pair<IteratorType1, IteratorType2> mismatch(const ExecutionSpace& ex, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { + return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, + first1, last1, first2, last2); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +::Kokkos::pair<IteratorType1, IteratorType2> mismatch( + const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType&& predicate) { + return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, + first1, last1, first2, last2, + std::forward<BinaryPredicateType>(predicate)); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +::Kokkos::pair<IteratorType1, IteratorType2> mismatch( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { + return Impl::mismatch_impl(label, ex, first1, last1, first2, last2); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +::Kokkos::pair<IteratorType1, IteratorType2> mismatch( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType&& predicate) { + return Impl::mismatch_impl(label, ex, first1, last1, first2, last2, + std::forward<BinaryPredicateType>(predicate)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto mismatch(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + const ::Kokkos::View<DataType2, Properties2...>& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, + KE::begin(view1), KE::end(view1), KE::begin(view2), + KE::end(view2)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +auto mismatch(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + const ::Kokkos::View<DataType2, Properties2...>& view2, + BinaryPredicateType&& predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, + KE::begin(view1), KE::end(view1), KE::begin(view2), + KE::end(view2), + std::forward<BinaryPredicateType>(predicate)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto mismatch(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + const ::Kokkos::View<DataType2, Properties2...>& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +auto mismatch(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view1, + const ::Kokkos::View<DataType2, Properties2...>& view2, + BinaryPredicateType&& predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward<BinaryPredicateType>(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c2ce4655faa4631d48eaaafaec016c6e0fa65c4b --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MOVE_HPP +#define KOKKOS_STD_ALGORITHMS_MOVE_HPP + +#include "impl/Kokkos_Move.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator move(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + return Impl::move_impl("Kokkos::move_iterator_api_default", ex, first, last, + d_first); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator move(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::move_impl(label, ex, first, last, d_first); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto move(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_impl("Kokkos::move_view_api_default", ex, begin(source), + end(source), begin(dest)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto move(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_impl(label, ex, begin(source), end(source), begin(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f7462d52d8a8cbb520b0985085b23c8e56c01879 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_HPP +#define KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_HPP + +#include "impl/Kokkos_MoveBackward.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 move_backward(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + return Impl::move_backward_impl("Kokkos::move_backward_iterator_api_default", + ex, first, last, d_last); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto move_backward(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_backward_impl("Kokkos::move_backward_view_api_default", ex, + begin(source), end(source), end(dest)); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 move_backward(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 d_last) { + return Impl::move_backward_impl(label, ex, first, last, d_last); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto move_backward(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_backward_impl(label, ex, begin(source), end(source), + end(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp deleted file mode 100644 index d273f092a7413d50d021e4b8da6682aacbd1216e..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp +++ /dev/null @@ -1,2406 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_NON_MODIFYING_SEQUENCE_OPERATIONS_HPP -#define KOKKOS_NON_MODIFYING_SEQUENCE_OPERATIONS_HPP - -#include <Kokkos_Core.hpp> -#include "Kokkos_BeginEnd.hpp" -#include "Kokkos_Constraints.hpp" -#include "Kokkos_ModifyingOperations.hpp" -#include "Kokkos_HelperPredicates.hpp" -#include "Kokkos_RandomAccessIterator.hpp" -#include "Kokkos_Distance.hpp" -#include <string> - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -// ------------------------------------------ -// -// functors -// -// ------------------------------------------ - -template <bool is_find_if, class IndexType, class IteratorType, - class ReducerType, class PredicateType> -struct StdFindIfOrNotFunctor { - using red_value_type = typename ReducerType::value_type; - - IteratorType m_first; - ReducerType m_reducer; - PredicateType m_p; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - const auto& my_value = m_first[i]; - - // if doing find_if, look for when predicate is true - // if doing find_if_not, look for when predicate is false - const bool found_condition = is_find_if ? m_p(my_value) : !m_p(my_value); - - auto rv = - found_condition - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; - - m_reducer.join(red_value, rv); - } - - KOKKOS_FUNCTION - StdFindIfOrNotFunctor(IteratorType first, ReducerType reducer, - PredicateType p) - : m_first(std::move(first)), - m_reducer(std::move(reducer)), - m_p(std::move(p)) {} -}; - -template <class IteratorType, class UnaryFunctorType> -struct StdForEachFunctor { - using index_type = typename IteratorType::difference_type; - IteratorType m_first; - UnaryFunctorType m_functor; - - KOKKOS_FUNCTION - void operator()(index_type i) const { m_functor(m_first[i]); } - - KOKKOS_FUNCTION - StdForEachFunctor(IteratorType _first, UnaryFunctorType _functor) - : m_first(std::move(_first)), m_functor(std::move(_functor)) {} -}; - -template <class IteratorType, class Predicate> -struct StdCountIfFunctor { - using index_type = typename IteratorType::difference_type; - IteratorType m_first; - Predicate m_predicate; - - KOKKOS_FUNCTION - void operator()(index_type i, index_type& lsum) const { - if (m_predicate(m_first[i])) { - lsum++; - } - } - - KOKKOS_FUNCTION - StdCountIfFunctor(IteratorType _first, Predicate _predicate) - : m_first(std::move(_first)), m_predicate(std::move(_predicate)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2, - class ReducerType, class BinaryPredicateType> -struct StdMismatchRedFunctor { - using red_value_type = typename ReducerType::value_type; - - IteratorType1 m_first1; - IteratorType2 m_first2; - ReducerType m_reducer; - BinaryPredicateType m_predicate; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - const auto& my_value1 = m_first1[i]; - const auto& my_value2 = m_first2[i]; - - auto rv = - !m_predicate(my_value1, my_value2) - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; - - m_reducer.join(red_value, rv); - } - - KOKKOS_FUNCTION - StdMismatchRedFunctor(IteratorType1 first1, IteratorType2 first2, - ReducerType reducer, BinaryPredicateType predicate) - : m_first1(std::move(first1)), - m_first2(std::move(first2)), - m_reducer(std::move(reducer)), - m_predicate(std::move(predicate)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -struct StdEqualFunctor { - IteratorType1 m_first1; - IteratorType2 m_first2; - BinaryPredicateType m_predicate; - - KOKKOS_FUNCTION - void operator()(IndexType i, std::size_t& lsum) const { - if (!m_predicate(m_first1[i], m_first2[i])) { - lsum = 1; - } - } - - KOKKOS_FUNCTION - StdEqualFunctor(IteratorType1 _first1, IteratorType2 _first2, - BinaryPredicateType _predicate) - : m_first1(std::move(_first1)), - m_first2(std::move(_first2)), - m_predicate(std::move(_predicate)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2, - class ReducerType, class ComparatorType> -struct StdLexicographicalCompareFunctor { - using red_value_type = typename ReducerType::value_type; - IteratorType1 m_first1; - IteratorType2 m_first2; - ReducerType m_reducer; - ComparatorType m_comparator; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - const auto& my_value1 = m_first1[i]; - const auto& my_value2 = m_first2[i]; - - bool different = m_comparator(my_value1, my_value2) || - m_comparator(my_value2, my_value1); - auto rv = - different - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; - - m_reducer.join(red_value, rv); - } - - KOKKOS_FUNCTION - StdLexicographicalCompareFunctor(IteratorType1 _first1, IteratorType2 _first2, - ReducerType _reducer, ComparatorType _comp) - : m_first1(std::move(_first1)), - m_first2(std::move(_first2)), - m_reducer(std::move(_reducer)), - m_comparator(std::move(_comp)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2, - class ComparatorType> -struct StdCompareFunctor { - IteratorType1 m_it1; - IteratorType2 m_it2; - ComparatorType m_predicate; - - KOKKOS_FUNCTION - void operator()(IndexType /* i is unused */, int& lsum) const { - if (m_predicate(*m_it1, *m_it2)) { - lsum = 1; - } - } - - KOKKOS_FUNCTION - StdCompareFunctor(IteratorType1 _it1, IteratorType2 _it2, - ComparatorType _predicate) - : m_it1(std::move(_it1)), - m_it2(std::move(_it2)), - m_predicate(std::move(_predicate)) {} -}; - -template <class IndexType, class IteratorType, class ReducerType, - class PredicateType> -struct StdAdjacentFindFunctor { - using red_value_type = typename ReducerType::value_type; - - IteratorType m_first; - ReducerType m_reducer; - PredicateType m_p; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - const auto& my_value = m_first[i]; - const auto& next_value = m_first[i + 1]; - const bool are_equal = m_p(my_value, next_value); - - auto rv = - are_equal - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; - - m_reducer.join(red_value, rv); - } - - KOKKOS_FUNCTION - StdAdjacentFindFunctor(IteratorType first, ReducerType reducer, - PredicateType p) - : m_first(std::move(first)), - m_reducer(std::move(reducer)), - m_p(std::move(p)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2, - class ReducerType, class PredicateType> -struct StdSearchFunctor { - using red_value_type = typename ReducerType::value_type; - - IteratorType1 m_first; - IteratorType1 m_last; - IteratorType2 m_s_first; - IteratorType2 m_s_last; - ReducerType m_reducer; - PredicateType m_p; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - namespace KE = ::Kokkos::Experimental; - auto myit = m_first + i; - bool found = true; - - const auto search_count = KE::distance(m_s_first, m_s_last); - for (IndexType k = 0; k < search_count; ++k) { - // note that we add this EXPECT to check if we are in a valid range - // but I think we can remove this beceause the guarantee we don't go - // out of bounds is taken care of at the calling site - // where we launch the par-reduce. - KOKKOS_EXPECTS((myit + k) < m_last); - - if (!m_p(myit[k], m_s_first[k])) { - found = false; - break; - } - } - - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; - - m_reducer.join(red_value, rv); - } - - KOKKOS_FUNCTION - StdSearchFunctor(IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - ReducerType reducer, PredicateType p) - : m_first(std::move(first)), - m_last(std::move(last)), - m_s_first(std::move(s_first)), - m_s_last(std::move(s_last)), - m_reducer(std::move(reducer)), - m_p(std::move(p)) {} -}; - -template <class IndexType, class IteratorType, class SizeType, class ValueType, - class ReducerType, class PredicateType> -struct StdSearchNFunctor { - using red_value_type = typename ReducerType::value_type; - - IteratorType m_first; - IteratorType m_last; - SizeType m_count; - ValueType m_value; - ReducerType m_reducer; - PredicateType m_p; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - namespace KE = ::Kokkos::Experimental; - auto myit = m_first + i; - bool found = true; - - for (SizeType k = 0; k < m_count; ++k) { - // note that we add this EXPECT to check if we are in a valid range - // but I think we can remove this beceause the guarantee we don't go - // out of bounds is taken care of at the calling site - // where we launch the par-reduce. - KOKKOS_EXPECTS((myit + k) < m_last); - - if (!m_p(myit[k], m_value)) { - found = false; - break; - } - } - - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; - - m_reducer.join(red_value, rv); - } - - KOKKOS_FUNCTION - StdSearchNFunctor(IteratorType first, IteratorType last, SizeType count, - ValueType value, ReducerType reducer, PredicateType p) - : m_first(std::move(first)), - m_last(std::move(last)), - m_count(std::move(count)), - m_value(std::move(value)), - m_reducer(std::move(reducer)), - m_p(std::move(p)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2, - class ReducerType, class PredicateType> -struct StdFindFirstOfFunctor { - using red_value_type = typename ReducerType::value_type; - - IteratorType1 m_first; - IteratorType2 m_s_first; - IteratorType2 m_s_last; - ReducerType m_reducer; - PredicateType m_p; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - namespace KE = ::Kokkos::Experimental; - const auto& myvalue = m_first[i]; - bool found = false; - - const auto search_count = KE::distance(m_s_first, m_s_last); - for (IndexType k = 0; k < search_count; ++k) { - if (m_p(myvalue, m_s_first[k])) { - found = true; - break; - } - } - - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; - - m_reducer.join(red_value, rv); - } - - KOKKOS_FUNCTION - StdFindFirstOfFunctor(IteratorType1 first, IteratorType2 s_first, - IteratorType2 s_last, ReducerType reducer, - PredicateType p) - : m_first(std::move(first)), - m_s_first(std::move(s_first)), - m_s_last(std::move(s_last)), - m_reducer(std::move(reducer)), - m_p(std::move(p)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2, - class ReducerType, class PredicateType> -struct StdFindEndFunctor { - using red_value_type = typename ReducerType::value_type; - - IteratorType1 m_first; - IteratorType1 m_last; - IteratorType2 m_s_first; - IteratorType2 m_s_last; - ReducerType m_reducer; - PredicateType m_p; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - namespace KE = ::Kokkos::Experimental; - auto myit = m_first + i; - bool found = true; - - const auto search_count = KE::distance(m_s_first, m_s_last); - for (IndexType k = 0; k < search_count; ++k) { - // note that we add this EXPECT to check if we are in a valid range - // but I think we can remvoe this beceause the guarantee we don't go - // out of bounds is taken care of at the calling site - // where we launch the par-reduce. - KOKKOS_EXPECTS((myit + k) < m_last); - - if (!m_p(myit[k], m_s_first[k])) { - found = false; - break; - } - } - - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity<IndexType>::max()}; - - m_reducer.join(red_value, rv); - } - - KOKKOS_FUNCTION - StdFindEndFunctor(IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - ReducerType reducer, PredicateType p) - : m_first(std::move(first)), - m_last(std::move(last)), - m_s_first(std::move(s_first)), - m_s_last(std::move(s_last)), - m_reducer(std::move(reducer)), - m_p(std::move(p)) {} -}; - -// ------------------------------------------ -// find_if_or_not_impl -// ------------------------------------------ -template <bool is_find_if, class ExecutionSpace, class IteratorType, - class PredicateType> -IteratorType find_if_or_not_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { - // checks - Impl::static_assert_random_access_and_accessible( - ex, first); // only need one It per type - Impl::expect_valid_range(first, last); - - if (first == last) { - return last; - } - - // aliases - using index_type = typename IteratorType::difference_type; - using reducer_type = FirstLoc<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = StdFindIfOrNotFunctor<is_find_if, index_type, IteratorType, - reducer_type, PredicateType>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, reducer, pred), reducer); - - // fence not needed because reducing into scalar - - // decide and return - if (red_result.min_loc_true == - ::Kokkos::reduction_identity<index_type>::min()) { - // here, it means a valid loc has not been found, - return last; - } else { - // a location has been found - return first + red_result.min_loc_true; - } -} - -// ------------------------------------------ -// find_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class T> -InputIterator find_impl(const std::string& label, ExecutionSpace ex, - InputIterator first, InputIterator last, - const T& value) { - return find_if_or_not_impl<true>( - label, ex, first, last, - ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate<T>(value)); -} - -// ------------------------------------------ -// for_each_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class UnaryFunctorType> -UnaryFunctorType for_each_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor)); - ex.fence("Kokkos::for_each: fence after operation"); - - return functor; -} - -// ------------------------------------------ -// for_each_n_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class SizeType, - class UnaryFunctorType> -IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, - UnaryFunctorType functor) { - auto last = first + n; - Impl::static_assert_random_access_and_accessible(ex, first, last); - Impl::expect_valid_range(first, last); - - if (n == 0) { - return first; - } - - for_each_impl(label, ex, first, last, std::move(functor)); - // no neeed to fence since for_each_impl fences already - - return last; -} - -// ------------------------------------------ -// count_if_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class Predicate> -typename IteratorType::difference_type count_if_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - Predicate predicate) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - // aliases - using func_t = StdCountIfFunctor<IteratorType, Predicate>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - typename IteratorType::difference_type count = 0; - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, predicate), count); - ex.fence("Kokkos::count_if: fence after operation"); - - return count; -} - -// ------------------------------------------ -// count_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class T> -auto count_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { - return count_if_impl( - label, ex, first, last, - ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate<T>(value)); -} - -// ------------------------------------------ -// mismatch_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -::Kokkos::pair<IteratorType1, IteratorType2> mismatch_impl( - const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first1, first2); - Impl::static_assert_iterators_have_matching_difference_type(first1, first2); - Impl::expect_valid_range(first1, last1); - Impl::expect_valid_range(first2, last2); - - // aliases - using return_type = ::Kokkos::pair<IteratorType1, IteratorType2>; - using index_type = typename IteratorType1::difference_type; - using reducer_type = FirstLoc<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using functor_type = - StdMismatchRedFunctor<index_type, IteratorType1, IteratorType2, - reducer_type, BinaryPredicateType>; - - // trivial case: note that this is important, - // for OpenMPTarget, omitting special handling of - // the trivial case was giving all sorts of strange stuff. - const auto num_e1 = last1 - first1; - const auto num_e2 = last2 - first2; - if (num_e1 == 0 || num_e2 == 0) { - return return_type(first1, first2); - } - - // run - const auto num_elemen_par_reduce = (num_e1 <= num_e2) ? num_e1 : num_e2; - reduction_value_type red_result; - reducer_type reducer(red_result); - ::Kokkos::parallel_reduce( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elemen_par_reduce), - functor_type(first1, first2, reducer, std::move(predicate)), reducer); - - // fence not needed because reducing into scalar - - // decide and return - constexpr auto red_min = ::Kokkos::reduction_identity<index_type>::min(); - if (red_result.min_loc_true == red_min) { - // in here means mismatch has not been found - if (num_e1 == num_e2) { - return return_type(last1, last2); - } else if (num_e1 < num_e2) { - return return_type(last1, first2 + num_e1); - } else { - return return_type(first1 + num_e2, last2); - } - } else { - // in here means mismatch has been found - return return_type(first1 + red_result.min_loc_true, - first2 + red_result.min_loc_true); - } -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -::Kokkos::pair<IteratorType1, IteratorType2> mismatch_impl( - const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - using value_type1 = typename IteratorType1::value_type; - using value_type2 = typename IteratorType2::value_type; - using pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; - return mismatch_impl(label, ex, first1, last1, first2, last2, pred_t()); -} - -// ------------------------------------------ -// all_of_impl, any_of_impl, none_of_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class Predicate> -bool all_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl<false>(label, ex, first, last, predicate) == - last); -} - -template <class ExecutionSpace, class InputIterator, class Predicate> -bool any_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl<true>(label, ex, first, last, predicate) != last); -} - -template <class ExecutionSpace, class IteratorType, class Predicate> -bool none_of_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Predicate predicate) { - return (find_if_or_not_impl<true>(label, ex, first, last, predicate) == last); -} - -// ------------------------------------------ -// equal_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first1, first2); - Impl::static_assert_iterators_have_matching_difference_type(first1, first2); - Impl::expect_valid_range(first1, last1); - - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = StdEqualFunctor<index_type, IteratorType1, IteratorType2, - BinaryPredicateType>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first1, last1); - std::size_t different = 0; - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first1, first2, predicate), different); - ex.fence("Kokkos::equal: fence after operation"); - - return !different; -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { - using value_type1 = typename IteratorType1::value_type; - using value_type2 = typename IteratorType2::value_type; - using pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; - return equal_impl(label, ex, first1, last1, first2, pred_t()); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2, BinaryPredicateType predicate) { - const auto d1 = ::Kokkos::Experimental::distance(first1, last1); - const auto d2 = ::Kokkos::Experimental::distance(first2, last2); - if (d1 != d2) { - return false; - } - - return equal_impl(label, ex, first1, last1, first2, predicate); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2) { - Impl::expect_valid_range(first1, last1); - Impl::expect_valid_range(first2, last2); - - using value_type1 = typename IteratorType1::value_type; - using value_type2 = typename IteratorType2::value_type; - using pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; - return equal_impl(label, ex, first1, last1, first2, last2, pred_t()); -} - -// ------------------------------------------ -// lexicographical_compare_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class ComparatorType> -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - ComparatorType comp) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first1, first2); - Impl::static_assert_iterators_have_matching_difference_type(first1, first2); - Impl::expect_valid_range(first1, last1); - Impl::expect_valid_range(first2, last2); - - // aliases - using index_type = typename IteratorType1::difference_type; - using reducer_type = FirstLoc<index_type>; - using reduction_value_type = typename reducer_type::value_type; - - // run - const auto d1 = Kokkos::Experimental::distance(first1, last1); - const auto d2 = Kokkos::Experimental::distance(first2, last2); - const auto range = Kokkos::Experimental::min(d1, d2); - reduction_value_type red_result; - reducer_type reducer(red_result); - using func1_t = - StdLexicographicalCompareFunctor<index_type, IteratorType1, IteratorType2, - reducer_type, ComparatorType>; - - ::Kokkos::parallel_reduce(label, RangePolicy<ExecutionSpace>(ex, 0, range), - func1_t(first1, first2, reducer, comp), reducer); - - // fence not needed because reducing into scalar - // no mismatch - if (red_result.min_loc_true == - ::Kokkos::reduction_identity<index_type>::min()) { - auto new_last1 = first1 + range; - auto new_last2 = first2 + range; - bool is_prefix = (new_last1 == last1) && (new_last2 != last2); - return is_prefix; - } - - // check mismatched - int less = 0; - auto it1 = first1 + red_result.min_loc_true; - auto it2 = first2 + red_result.min_loc_true; - using func2_t = StdCompareFunctor<index_type, IteratorType1, IteratorType2, - ComparatorType>; - ::Kokkos::parallel_reduce(label, RangePolicy<ExecutionSpace>(ex, 0, 1), - func2_t(it1, it2, comp), less); - - // fence not needed because reducing into scalar - return static_cast<bool>(less); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { - using value_type_1 = typename IteratorType1::value_type; - using value_type_2 = typename IteratorType2::value_type; - using predicate_t = - Impl::StdAlgoLessThanBinaryPredicate<value_type_1, value_type_2>; - return lexicographical_compare_impl(label, ex, first1, last1, first2, last2, - predicate_t()); -} - -// ------------------------------------------ -// adjacent_find_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class PredicateType> -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - const auto num_elements = Kokkos::Experimental::distance(first, last); - - if (num_elements <= 1) { - return last; - } - - using index_type = typename IteratorType::difference_type; - using reducer_type = FirstLoc<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = StdAdjacentFindFunctor<index_type, IteratorType, reducer_type, - PredicateType>; - - reduction_value_type red_result; - reducer_type reducer(red_result); - - // note that we use below num_elements-1 because - // each index i in the reduction checks i and (i+1). - ::Kokkos::parallel_reduce( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements - 1), - func_t(first, reducer, pred), reducer); - - // fence not needed because reducing into scalar - if (red_result.min_loc_true == - ::Kokkos::reduction_identity<index_type>::min()) { - return last; - } else { - return first + red_result.min_loc_true; - } -} - -template <class ExecutionSpace, class IteratorType> -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - using value_type = typename IteratorType::value_type; - using default_pred_t = StdAlgoEqualBinaryPredicate<value_type>; - return adjacent_find_impl(label, ex, first, last, default_pred_t()); -} - -// ------------------------------------------ -// search_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 search_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, s_first); - Impl::static_assert_iterators_have_matching_difference_type(first, s_first); - Impl::expect_valid_range(first, last); - Impl::expect_valid_range(s_first, s_last); - - // the target sequence should not be larger than the range [first, last) - namespace KE = ::Kokkos::Experimental; - const auto num_elements = KE::distance(first, last); - const auto s_count = KE::distance(s_first, s_last); - KOKKOS_EXPECTS(num_elements >= s_count); - (void)s_count; // needed when macro above is a no-op - - if (s_first == s_last) { - return first; - } - - if (first == last) { - return last; - } - - // special case where the two ranges have equal size - if (num_elements == s_count) { - const auto equal_result = equal_impl(label, ex, first, last, s_first, pred); - return (equal_result) ? first : last; - } else { - using index_type = typename IteratorType1::difference_type; - using reducer_type = FirstLoc<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = StdSearchFunctor<index_type, IteratorType1, IteratorType2, - reducer_type, BinaryPredicateType>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result); - - // decide the size of the range policy of the par_red: - // note that the last feasible index to start looking is the index - // whose distance from the "last" is equal to the sequence count. - // the +1 is because we need to include that location too. - const auto range_size = num_elements - s_count + 1; - - // run par reduce - ::Kokkos::parallel_reduce( - label, RangePolicy<ExecutionSpace>(ex, 0, range_size), - func_t(first, last, s_first, s_last, reducer, pred), reducer); - - // fence not needed because reducing into scalar - - // decide and return - if (red_result.min_loc_true == - ::Kokkos::reduction_identity<index_type>::min()) { - // location has not been found - return last; - } else { - // location has been found - return first + red_result.min_loc_true; - } - } -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 search_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { - using value_type1 = typename IteratorType1::value_type; - using value_type2 = typename IteratorType2::value_type; - using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; - return search_impl(label, ex, first, last, s_first, s_last, predicate_type()); -} - -// ------------------------------------------ -// search_n_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class SizeType, - class ValueType, class BinaryPredicateType> -IteratorType search_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - SizeType count, const ValueType& value, - const BinaryPredicateType& pred) { - // checks - static_assert_random_access_and_accessible(ex, first); - expect_valid_range(first, last); - KOKKOS_EXPECTS((std::ptrdiff_t)count >= 0); - - // count should not be larger than the range [first, last) - namespace KE = ::Kokkos::Experimental; - const auto num_elements = KE::distance(first, last); - // cast things to avoid compiler warning - KOKKOS_EXPECTS((std::size_t)num_elements >= (std::size_t)count); - - if (first == last) { - return first; - } - - // special case where num elements in [first, last) == count - if ((std::size_t)num_elements == (std::size_t)count) { - using equal_to_value = StdAlgoEqualsValUnaryPredicate<ValueType>; - const auto satisfies = - all_of_impl(label, ex, first, last, equal_to_value(value)); - return (satisfies) ? first : last; - } else { - // aliases - using index_type = typename IteratorType::difference_type; - using reducer_type = FirstLoc<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = - StdSearchNFunctor<index_type, IteratorType, SizeType, ValueType, - reducer_type, BinaryPredicateType>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result); - - // decide the size of the range policy of the par_red: - // the last feasible index to start looking is the index - // whose distance from the "last" is equal to count. - // the +1 is because we need to include that location too. - const auto range_size = num_elements - count + 1; - - // run par reduce - ::Kokkos::parallel_reduce( - label, RangePolicy<ExecutionSpace>(ex, 0, range_size), - func_t(first, last, count, value, reducer, pred), reducer); - - // fence not needed because reducing into scalar - - // decide and return - if (red_result.min_loc_true == - ::Kokkos::reduction_identity<index_type>::min()) { - // location has not been found - return last; - } else { - // location has been found - return first + red_result.min_loc_true; - } - } -} - -template <class ExecutionSpace, class IteratorType, class SizeType, - class ValueType> -IteratorType search_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - SizeType count, const ValueType& value) { - using iter_value_type = typename IteratorType::value_type; - using predicate_type = - StdAlgoEqualBinaryPredicate<iter_value_type, ValueType>; - - /* above we use <iter_value_type, ValueType> for the predicate_type - to be consistent with the standard, which says: - - " - The signature of the predicate function should be equivalent to: - - bool pred(const Type1 &a, const Type2 &b); - - The type Type1 must be such that an object of type ForwardIt can be - dereferenced and then implicitly converted to Type1. The type Type2 must be - such that an object of type T can be implicitly converted to Type2. - " - - In our case, IteratorType = ForwardIt, and ValueType = T. - */ - - return search_n_impl(label, ex, first, last, count, value, predicate_type()); -} - -// ------------------------------------------ -// find_first_of_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last, - const BinaryPredicateType& pred) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, s_first); - Impl::static_assert_iterators_have_matching_difference_type(first, s_first); - Impl::expect_valid_range(first, last); - Impl::expect_valid_range(s_first, s_last); - - if ((s_first == s_last) || (first == last)) { - return last; - } - - using index_type = typename IteratorType1::difference_type; - using reducer_type = FirstLoc<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = StdFindFirstOfFunctor<index_type, IteratorType1, IteratorType2, - reducer_type, BinaryPredicateType>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, s_first, s_last, reducer, pred), reducer); - - // fence not needed because reducing into scalar - - // decide and return - if (red_result.min_loc_true == - ::Kokkos::reduction_identity<index_type>::min()) { - // if here, nothing found - return last; - } else { - // a location has been found - return first + red_result.min_loc_true; - } -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last) { - using value_type1 = typename IteratorType1::value_type; - using value_type2 = typename IteratorType2::value_type; - using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; - return find_first_of_impl(label, ex, first, last, s_first, s_last, - predicate_type()); -} - -// ------------------------------------------ -// find_end_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, s_first); - Impl::static_assert_iterators_have_matching_difference_type(first, s_first); - Impl::expect_valid_range(first, last); - Impl::expect_valid_range(s_first, s_last); - - // the target sequence should not be larger than the range [first, last) - namespace KE = ::Kokkos::Experimental; - const auto num_elements = KE::distance(first, last); - const auto s_count = KE::distance(s_first, s_last); - KOKKOS_EXPECTS(num_elements >= s_count); - (void)s_count; // needed when macro above is a no-op - - if (s_first == s_last) { - return last; - } - - if (first == last) { - return last; - } - - // special case where the two ranges have equal size - if (num_elements == s_count) { - const auto equal_result = equal_impl(label, ex, first, last, s_first, pred); - return (equal_result) ? first : last; - } else { - using index_type = typename IteratorType1::difference_type; - using reducer_type = LastLoc<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = StdFindEndFunctor<index_type, IteratorType1, IteratorType2, - reducer_type, BinaryPredicateType>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result); - - // decide the size of the range policy of the par_red: - // note that the last feasible index to start looking is the index - // whose distance from the "last" is equal to the sequence count. - // the +1 is because we need to include that location too. - const auto range_size = num_elements - s_count + 1; - - // run par reduce - ::Kokkos::parallel_reduce( - label, RangePolicy<ExecutionSpace>(ex, 0, range_size), - func_t(first, last, s_first, s_last, reducer, pred), reducer); - - // fence not needed because reducing into scalar - - // decide and return - if (red_result.max_loc_true == - ::Kokkos::reduction_identity<index_type>::max()) { - // if here, a subrange has not been found - return last; - } else { - // a location has been found - return first + red_result.max_loc_true; - } - } -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { - using value_type1 = typename IteratorType1::value_type; - using value_type2 = typename IteratorType2::value_type; - using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; - return find_end_impl(label, ex, first, last, s_first, s_last, - predicate_type()); -} - -} // namespace Impl - -// ---------------------------------- -// find public API -// ---------------------------------- -template <class ExecutionSpace, class InputIterator, class T> -InputIterator find(const ExecutionSpace& ex, InputIterator first, - InputIterator last, const T& value) { - return Impl::find_impl("Kokkos::find_iterator_api_default", ex, first, last, - value); -} - -template <class ExecutionSpace, class InputIterator, class T> -InputIterator find(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl(label, ex, first, last, value); -} - -template <class ExecutionSpace, class DataType, class... Properties, class T> -auto find(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, const T& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_impl("Kokkos::find_view_api_default", ex, KE::begin(view), - KE::end(view), value); -} - -template <class ExecutionSpace, class DataType, class... Properties, class T> -auto find(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, const T& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_impl(label, ex, KE::begin(view), KE::end(view), value); -} - -// ------------------- -// find_if public API -// ------------------- -template <class ExecutionSpace, class IteratorType, class PredicateType> -IteratorType find_if(const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl<true>("Kokkos::find_if_iterator_api_default", - ex, first, last, std::move(predicate)); -} - -template <class ExecutionSpace, class IteratorType, class PredicateType> -IteratorType find_if(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - PredicateType predicate) { - return Impl::find_if_or_not_impl<true>(label, ex, first, last, - std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -auto find_if(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl<true>("Kokkos::find_if_view_api_default", ex, - KE::begin(v), KE::end(v), - std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -auto find_if(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl<true>(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); -} - -// ---------------------------------- -// find_if_not public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType, class Predicate> -IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first, - IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl<false>( - "Kokkos::find_if_not_iterator_api_default", ex, first, last, - std::move(predicate)); -} - -template <class ExecutionSpace, class IteratorType, class Predicate> -IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - Predicate predicate) { - return Impl::find_if_or_not_impl<false>(label, ex, first, last, - std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -auto find_if_not(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl<false>( - "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v), - std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -auto find_if_not(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl<false>(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); -} - -// ---------------------------------- -// for_each public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType, class UnaryFunctorType> -UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - UnaryFunctorType functor) { - return Impl::for_each_impl(label, ex, first, last, std::move(functor)); -} - -template <class ExecutionSpace, class IteratorType, class UnaryFunctorType> -UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_impl("Kokkos::for_each_iterator_api_default", ex, first, - last, std::move(functor)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class UnaryFunctorType> -UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - UnaryFunctorType functor) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl(label, ex, KE::begin(v), KE::end(v), - std::move(functor)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class UnaryFunctorType> -UnaryFunctorType for_each(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - UnaryFunctorType functor) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl("Kokkos::for_each_view_api_default", ex, - KE::begin(v), KE::end(v), std::move(functor)); -} - -// ---------------------------------- -// for_each_n public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType, class SizeType, - class UnaryFunctorType> -IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, - UnaryFunctorType functor) { - return Impl::for_each_n_impl(label, ex, first, n, std::move(functor)); -} - -template <class ExecutionSpace, class IteratorType, class SizeType, - class UnaryFunctorType> -IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first, - SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl("Kokkos::for_each_n_iterator_api_default", ex, - first, n, std::move(functor)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class SizeType, class UnaryFunctorType> -auto for_each_n(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, SizeType n, - UnaryFunctorType functor) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl(label, ex, KE::begin(v), n, std::move(functor)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class SizeType, class UnaryFunctorType> -auto for_each_n(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, SizeType n, - UnaryFunctorType functor) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl("Kokkos::for_each_n_view_api_default", ex, - KE::begin(v), n, std::move(functor)); -} - -// ---------------------------------- -// count_if public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType, class Predicate> -typename IteratorType::difference_type count_if(const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - Predicate predicate) { - return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first, - last, std::move(predicate)); -} - -template <class ExecutionSpace, class IteratorType, class Predicate> -typename IteratorType::difference_type count_if(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - Predicate predicate) { - return Impl::count_if_impl(label, ex, first, last, std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -auto count_if(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -auto count_if(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); -} - -// ---------------------------------- -// count public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType, class T> -typename IteratorType::difference_type count(const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - const T& value) { - return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last, - value); -} - -template <class ExecutionSpace, class IteratorType, class T> -typename IteratorType::difference_type count(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - const T& value) { - return Impl::count_impl(label, ex, first, last, value); -} - -template <class ExecutionSpace, class DataType, class... Properties, class T> -auto count(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, const T& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v), - KE::cend(v), value); -} - -template <class ExecutionSpace, class DataType, class... Properties, class T> -auto count(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, const T& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value); -} - -// ---------------------------------- -// mismatch public API -// ---------------------------------- -// FIXME: add mismatch overloads accepting 3 iterators. -// An overload consistent with other algorithms: -// -// auto mismatch(const ExecSpace& ex, It1 first1, It1 last1, It2 first2) {...} -// -// makes API ambiguous (with the overload accepting views). - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -::Kokkos::pair<IteratorType1, IteratorType2> mismatch(const ExecutionSpace& ex, - IteratorType1 first1, - IteratorType1 last1, - IteratorType2 first2, - IteratorType2 last2) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -::Kokkos::pair<IteratorType1, IteratorType2> mismatch( - const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType&& predicate) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2, - std::forward<BinaryPredicateType>(predicate)); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -::Kokkos::pair<IteratorType1, IteratorType2> mismatch( - const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -::Kokkos::pair<IteratorType1, IteratorType2> mismatch( - const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType&& predicate) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2, - std::forward<BinaryPredicateType>(predicate)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto mismatch(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - const ::Kokkos::View<DataType2, Properties2...>& view2) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -auto mismatch(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - const ::Kokkos::View<DataType2, Properties2...>& view2, - BinaryPredicateType&& predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2), - std::forward<BinaryPredicateType>(predicate)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto mismatch(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - const ::Kokkos::View<DataType2, Properties2...>& view2) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -auto mismatch(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - const ::Kokkos::View<DataType2, Properties2...>& view2, - BinaryPredicateType&& predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2), - std::forward<BinaryPredicateType>(predicate)); -} - -// ---------------------------------- -// all_of public API -// ---------------------------------- -template <class ExecutionSpace, class InputIterator, class Predicate> -bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, - Predicate predicate) { - return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first, - last, predicate); -} - -template <class ExecutionSpace, class InputIterator, class Predicate> -bool all_of(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl(label, ex, first, last, predicate); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -bool all_of(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -bool all_of(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); -} - -// ---------------------------------- -// any_of public API -// ---------------------------------- -template <class ExecutionSpace, class InputIterator, class Predicate> -bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, - Predicate predicate) { - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last, - predicate); -} - -template <class ExecutionSpace, class InputIterator, class Predicate> -bool any_of(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl(label, ex, first, last, predicate); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -bool any_of(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -bool any_of(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); -} - -// ---------------------------------- -// none_of public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType, class Predicate> -bool none_of(const ExecutionSpace& ex, IteratorType first, IteratorType last, - Predicate predicate) { - return Impl::none_of_impl("Kokkos::none_of_iterator_api_default", ex, first, - last, predicate); -} - -template <class ExecutionSpace, class IteratorType, class Predicate> -bool none_of(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl(label, ex, first, last, predicate); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -bool none_of(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl("Kokkos::none_of_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Predicate> -bool none_of(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - Predicate predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); -} - -// ---------------------------------- -// equal public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2) { - return Impl::equal_impl(label, ex, first1, last1, first2); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, std::move(predicate)); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, - std::move(predicate)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -bool equal(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - ::Kokkos::View<DataType2, Properties2...>& view2) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -bool equal(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - ::Kokkos::View<DataType2, Properties2...>& view2) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -bool equal(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - ::Kokkos::View<DataType2, Properties2...>& view2, - BinaryPredicateType predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), - std::move(predicate)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -bool equal(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - ::Kokkos::View<DataType2, Properties2...>& view2, - BinaryPredicateType predicate) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2), std::move(predicate)); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2, std::move(predicate)); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2, - std::move(predicate)); -} - -// ---------------------------------- -// lexicographical_compare public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2) { - return Impl::lexicographical_compare_impl( - "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, - first2, last2); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -bool lexicographical_compare( - const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - ::Kokkos::View<DataType2, Properties2...>& view2) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( - "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -bool lexicographical_compare( - const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - ::Kokkos::View<DataType2, Properties2...>& view2) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2)); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class ComparatorType> -bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl( - "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, - first2, last2, comp); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class ComparatorType> -bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - ComparatorType comp) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2, comp); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ComparatorType> -bool lexicographical_compare( - const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - ::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( - "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ComparatorType> -bool lexicographical_compare( - const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view1, - ::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); - - namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2), comp); -} - -// ---------------------------------- -// adjacent_find -// ---------------------------------- -// overload set1 -template <class ExecutionSpace, class IteratorType> -IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl(label, ex, first, last); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto adjacent_find(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto adjacent_find(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v)); -} - -// overload set2 -template <class ExecutionSpace, class IteratorType, class BinaryPredicateType> -IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, - IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last, pred); -} - -template <class ExecutionSpace, class IteratorType, class BinaryPredicateType> -IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - BinaryPredicateType pred) { - return Impl::adjacent_find_impl(label, ex, first, last, pred); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class BinaryPredicateType> -auto adjacent_find(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - BinaryPredicateType pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v), pred); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class BinaryPredicateType> -auto adjacent_find(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - BinaryPredicateType pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred); -} - -// ---------------------------------- -// search -// ---------------------------------- -// overload set 1: no binary predicate passed -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 search(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl(label, ex, first, last, s_first, s_last); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto search(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto search(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); -} - -// overload set 2: binary predicate passed -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last, pred); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 search(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { - return Impl::search_impl(label, ex, first, last, s_first, s_last, pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -auto search(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view, - const BinaryPredicateType& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -auto search(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view, - const BinaryPredicateType& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); -} - -// ---------------------------------- -// find_first_of -// ---------------------------------- -// overload set 1: no binary predicate passed -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto find_first_of(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto find_first_of(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); -} - -// overload set 2: binary predicate passed -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last, - const BinaryPredicateType& pred) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last, pred); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last, - pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -auto find_first_of(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view, - const BinaryPredicateType& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -auto find_first_of(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view, - const BinaryPredicateType& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); -} - -// ---------------------------------- -// search_n -// ---------------------------------- -// overload set 1: no binary predicate passed -template <class ExecutionSpace, class IteratorType, class SizeType, - class ValueType> -IteratorType search_n(const ExecutionSpace& ex, IteratorType first, - IteratorType last, SizeType count, - const ValueType& value) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value); -} - -template <class ExecutionSpace, class IteratorType, class SizeType, - class ValueType> -IteratorType search_n(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, SizeType count, - const ValueType& value) { - return Impl::search_n_impl(label, ex, first, last, count, value); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class SizeType, class ValueType> -auto search_n(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - SizeType count, const ValueType& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class SizeType, class ValueType> -auto search_n(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - SizeType count, const ValueType& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value); -} - -// overload set 2: binary predicate passed -template <class ExecutionSpace, class IteratorType, class SizeType, - class ValueType, class BinaryPredicateType> -IteratorType search_n(const ExecutionSpace& ex, IteratorType first, - IteratorType last, SizeType count, const ValueType& value, - const BinaryPredicateType& pred) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value, pred); -} - -template <class ExecutionSpace, class IteratorType, class SizeType, - class ValueType, class BinaryPredicateType> -IteratorType search_n(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, SizeType count, - const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl(label, ex, first, last, count, value, pred); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class SizeType, class ValueType, class BinaryPredicateType> -auto search_n(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - SizeType count, const ValueType& value, - const BinaryPredicateType& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value, - pred); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class SizeType, class ValueType, class BinaryPredicateType> -auto search_n(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - SizeType count, const ValueType& value, - const BinaryPredicateType& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value, pred); -} - -// ---------------------------------- -// find_end -// ---------------------------------- -// overload set 1: no binary predicate passed -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto find_end(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto find_end(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); -} - -// overload set 2: binary predicate passed -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last, pred); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class BinaryPredicateType> -IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last, pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -auto find_end(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view, - const BinaryPredicateType& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicateType> -auto find_end(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ::Kokkos::View<DataType2, Properties2...>& s_view, - const BinaryPredicateType& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); - - namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); -} - -} // namespace Experimental -} // namespace Kokkos - -#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..30ffb52442e39f1fe33c3258194ebae5f249f060 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_NONE_OF_HPP +#define KOKKOS_STD_ALGORITHMS_NONE_OF_HPP + +#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class Predicate> +bool none_of(const ExecutionSpace& ex, IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::none_of_impl("Kokkos::none_of_iterator_api_default", ex, first, + last, predicate); +} + +template <class ExecutionSpace, class IteratorType, class Predicate> +bool none_of(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, Predicate predicate) { + return Impl::none_of_impl(label, ex, first, last, predicate); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +bool none_of(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::none_of_impl("Kokkos::none_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), std::move(predicate)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class Predicate> +bool none_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::none_of_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5b00669fd1bcc90cbcfeaf2dcaf609514cc566d8 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp @@ -0,0 +1,110 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_PARTITION_COPY_HPP +#define KOKKOS_STD_ALGORITHMS_PARTITION_COPY_HPP + +#include "impl/Kokkos_PartitionCopy.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorTrueType, class OutputIteratorFalseType, + class PredicateType> +::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType> partition_copy( + const ExecutionSpace& ex, InputIteratorType from_first, + InputIteratorType from_last, OutputIteratorTrueType to_first_true, + OutputIteratorFalseType to_first_false, PredicateType p) { + return Impl::partition_copy_impl( + "Kokkos::partition_copy_iterator_api_default", ex, from_first, from_last, + to_first_true, to_first_false, std::move(p)); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorTrueType, class OutputIteratorFalseType, + class PredicateType> +::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType> partition_copy( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType from_first, InputIteratorType from_last, + OutputIteratorTrueType to_first_true, + OutputIteratorFalseType to_first_false, PredicateType p) { + return Impl::partition_copy_impl(label, ex, from_first, from_last, + to_first_true, to_first_false, std::move(p)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class DataType3, + class... Properties3, class PredicateType> +auto partition_copy( + const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest_true, + const ::Kokkos::View<DataType3, Properties3...>& view_dest_false, + PredicateType p) { + return Impl::partition_copy_impl("Kokkos::partition_copy_view_api_default", + ex, cbegin(view_from), cend(view_from), + begin(view_dest_true), + begin(view_dest_false), std::move(p)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class DataType3, + class... Properties3, class PredicateType> +auto partition_copy( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest_true, + const ::Kokkos::View<DataType3, Properties3...>& view_dest_false, + PredicateType p) { + return Impl::partition_copy_impl(label, ex, cbegin(view_from), + cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b714d5a27163ff1d7fecbe6f2ad973cfd2d33da0 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp @@ -0,0 +1,91 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_PARTITION_POINT_HPP +#define KOKKOS_STD_ALGORITHMS_PARTITION_POINT_HPP + +#include "impl/Kokkos_PartitionPoint.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType, class UnaryPredicate> +IteratorType partition_point(const ExecutionSpace& ex, IteratorType first, + IteratorType last, UnaryPredicate p) { + return Impl::partition_point_impl( + "Kokkos::partitioned_point_iterator_api_default", ex, first, last, + std::move(p)); +} + +template <class ExecutionSpace, class IteratorType, class UnaryPredicate> +IteratorType partition_point(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + UnaryPredicate p) { + return Impl::partition_point_impl(label, ex, first, last, std::move(p)); +} + +template <class ExecutionSpace, class UnaryPredicate, class DataType, + class... Properties> +auto partition_point(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + UnaryPredicate p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::partition_point_impl(label, ex, begin(v), end(v), std::move(p)); +} + +template <class ExecutionSpace, class UnaryPredicate, class DataType, + class... Properties> +auto partition_point(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& v, + UnaryPredicate p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::partition_point_impl("Kokkos::partition_point_view_api_default", + ex, begin(v), end(v), std::move(p)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitioningOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitioningOperations.hpp deleted file mode 100644 index 9806084fc0e53d45d42e3f88d6ccf95a1fcc5748..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_PartitioningOperations.hpp +++ /dev/null @@ -1,491 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_STD_PARTITIONING_OPERATIONS_HPP -#define KOKKOS_STD_PARTITIONING_OPERATIONS_HPP - -#include <Kokkos_Core.hpp> -#include "Kokkos_BeginEnd.hpp" -#include "Kokkos_Constraints.hpp" -#include "Kokkos_ModifyingOperations.hpp" -#include "Kokkos_NonModifyingSequenceOperations.hpp" - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -// ------------------------- -// -// functors -// -// ------------------------- - -template <class IteratorType, class ReducerType, class PredicateType> -struct StdIsPartitionedFunctor { - using red_value_type = typename ReducerType::value_type; - using index_type = typename IteratorType::difference_type; - - IteratorType m_first; - ReducerType m_reducer; - PredicateType m_p; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& redValue) const { - const auto predicate_value = m_p(m_first[i]); - constexpr index_type m_red_id_min = - ::Kokkos::reduction_identity<index_type>::min(); - constexpr index_type m_red_id_max = - ::Kokkos::reduction_identity<index_type>::max(); - auto rv = predicate_value ? red_value_type{i, m_red_id_min} - : red_value_type{m_red_id_max, i}; - - m_reducer.join(redValue, rv); - } - - KOKKOS_FUNCTION - StdIsPartitionedFunctor(IteratorType first, ReducerType reducer, - PredicateType p) - : m_first(std::move(first)), - m_reducer(std::move(reducer)), - m_p(std::move(p)) {} -}; - -template <class IteratorType, class ReducerType, class PredicateType> -struct StdPartitionPointFunctor { - using red_value_type = typename ReducerType::value_type; - using index_type = typename IteratorType::difference_type; - - IteratorType m_first; - ReducerType m_reducer; - PredicateType m_p; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& redValue) const { - const auto predicate_value = m_p(m_first[i]); - auto rv = - predicate_value - ? red_value_type{::Kokkos::reduction_identity<index_type>::min()} - : red_value_type{i}; - m_reducer.join(redValue, rv); - } - - KOKKOS_FUNCTION - StdPartitionPointFunctor(IteratorType first, ReducerType reducer, - PredicateType p) - : m_first(std::move(first)), - m_reducer(std::move(reducer)), - m_p(std::move(p)) {} -}; - -template <class ValueType> -struct StdPartitionCopyScalar { - ValueType true_count_; - ValueType false_count_; - - // Here we implement the copy assignment operators explicitly for consistency - // with how the Scalar structs are implemented inside - // Kokkos_Parallel_Reduce.hpp. - KOKKOS_FUNCTION - void operator=(const StdPartitionCopyScalar& other) { - true_count_ = other.true_count_; - false_count_ = other.false_count_; - } - - KOKKOS_FUNCTION - void operator=(const volatile StdPartitionCopyScalar& other) volatile { - true_count_ = other.true_count_; - false_count_ = other.false_count_; - } - - // this is needed for - // OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp:699:21: error: no viable - // overloaded '=' m_returnvalue = 0; - // - KOKKOS_FUNCTION - void operator=(const ValueType value) { - true_count_ = value; - false_count_ = value; - } -}; - -template <class IndexType, class FirstFrom, class FirstDestTrue, - class FirstDestFalse, class PredType> -struct StdPartitionCopyFunctor { - using value_type = StdPartitionCopyScalar<IndexType>; - - FirstFrom m_first_from; - FirstDestTrue m_first_dest_true; - FirstDestFalse m_first_dest_false; - PredType m_pred; - - KOKKOS_FUNCTION - StdPartitionCopyFunctor(FirstFrom first_from, FirstDestTrue first_dest_true, - FirstDestFalse first_dest_false, PredType pred) - : m_first_from(std::move(first_from)), - m_first_dest_true(std::move(first_dest_true)), - m_first_dest_false(std::move(first_dest_false)), - m_pred(std::move(pred)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - const auto& myval = m_first_from[i]; - if (final_pass) { - if (m_pred(myval)) { - m_first_dest_true[update.true_count_] = myval; - } else { - m_first_dest_false[update.false_count_] = myval; - } - } - - if (m_pred(myval)) { - update.true_count_ += 1; - } else { - update.false_count_ += 1; - } - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.true_count_ = 0; - update.false_count_ = 0; - } - - KOKKOS_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { - update.true_count_ += input.true_count_; - update.false_count_ += input.false_count_; - } -}; - -// ------------------------------------------ -// is_partitioned_impl -// ------------------------------------------ - -template <class ExecutionSpace, class IteratorType, class PredicateType> -bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - PredicateType pred) { - // true if all elements in the range [first, last) that satisfy - // the predicate "pred" appear before all elements that don't. - // Also returns true if [first, last) is empty. - // also true if all elements satisfy the predicate. - - // we implement it by finding: - // - the max location where predicate is true (max_loc_true) - // - the min location where predicate is false (min_loc_false) - // so the range is partitioned if max_loc_true < (min_loc_false) - - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - // trivial case - if (first == last) { - return true; - } - - // aliases - using index_type = typename IteratorType::difference_type; - using reducer_type = StdIsPartitioned<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = - StdIsPartitionedFunctor<IteratorType, reducer_type, PredicateType>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, reducer, pred), reducer); - - // fence not needed because reducing into scalar - - // decide and return - constexpr index_type red_id_min = - ::Kokkos::reduction_identity<index_type>::min(); - constexpr index_type red_id_max = - ::Kokkos::reduction_identity<index_type>::max(); - - if (red_result.max_loc_true != red_id_max && - red_result.min_loc_false != red_id_min) { - return red_result.max_loc_true < red_result.min_loc_false; - } else if (first + red_result.max_loc_true == --last) { - return true; - } else { - return false; - } -} - -// ------------------------------------------ -// partition_point_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class PredicateType> -IteratorType partition_point_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { - // locates the end of the first partition, that is, the first - // element that does not satisfy p or last if all elements satisfy p. - // Implementation below finds the first location where p is false. - - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - if (first == last) { - return first; - } - - // aliases - using index_type = typename IteratorType::difference_type; - using reducer_type = StdPartitionPoint<index_type>; - using reduction_value_type = typename reducer_type::value_type; - using func_t = - StdPartitionPointFunctor<IteratorType, reducer_type, PredicateType>; - - // run - reduction_value_type red_result; - reducer_type reducer(red_result); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, reducer, pred), reducer); - - // fence not needed because reducing into scalar - - // decide and return - if (red_result.min_loc_false == - ::Kokkos::reduction_identity<index_type>::min()) { - // if all elements are true, return last - return last; - } else { - return first + red_result.min_loc_false; - } -} - -// ------------------------------------------ -// partition_copy_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorTrueType, class OutputIteratorFalseType, - class PredicateType> -::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType> -partition_copy_impl(const std::string& label, const ExecutionSpace& ex, - InputIteratorType from_first, InputIteratorType from_last, - OutputIteratorTrueType to_first_true, - OutputIteratorFalseType to_first_false, - PredicateType pred) { - // impl uses a scan, this is similar how we implemented copy_if - - // checks - Impl::static_assert_random_access_and_accessible( - ex, from_first, to_first_true, to_first_false); - Impl::static_assert_iterators_have_matching_difference_type( - from_first, to_first_true, to_first_false); - Impl::expect_valid_range(from_first, from_last); - - if (from_first == from_last) { - return {to_first_true, to_first_false}; - } - - // aliases - using index_type = typename InputIteratorType::difference_type; - using func_type = - StdPartitionCopyFunctor<index_type, InputIteratorType, - OutputIteratorTrueType, OutputIteratorFalseType, - PredicateType>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(from_first, from_last); - typename func_type::value_type counts{0, 0}; - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(from_first, to_first_true, to_first_false, pred), counts); - - // fence not needed here because of the scan into counts - - return {to_first_true + counts.true_count_, - to_first_false + counts.false_count_}; -} - -} // end namespace Impl - -// ---------------------- -// is_partitioned public API -// ---------------------- -template <class ExecutionSpace, class IteratorType, class PredicateType> -bool is_partitioned(const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl( - "Kokkos::is_partitioned_iterator_api_default", ex, first, last, - std::move(p)); -} - -template <class ExecutionSpace, class IteratorType, class PredicateType> -bool is_partitioned(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl(label, ex, first, last, std::move(p)); -} - -template <class ExecutionSpace, class PredicateType, class DataType, - class... Properties> -bool is_partitioned(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - PredicateType p) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - return Impl::is_partitioned_impl("Kokkos::is_partitioned_view_api_default", - ex, cbegin(v), cend(v), std::move(p)); -} - -template <class ExecutionSpace, class PredicateType, class DataType, - class... Properties> -bool is_partitioned(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - PredicateType p) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - - return Impl::is_partitioned_impl(label, ex, cbegin(v), cend(v), std::move(p)); -} - -// ---------------------- -// partition_copy -// ---------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorTrueType, class OutputIteratorFalseType, - class PredicateType> -::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType> partition_copy( - const ExecutionSpace& ex, InputIteratorType from_first, - InputIteratorType from_last, OutputIteratorTrueType to_first_true, - OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl( - "Kokkos::partition_copy_iterator_api_default", ex, from_first, from_last, - to_first_true, to_first_false, std::move(p)); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorTrueType, class OutputIteratorFalseType, - class PredicateType> -::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType> partition_copy( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType from_first, InputIteratorType from_last, - OutputIteratorTrueType to_first_true, - OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, from_first, from_last, - to_first_true, to_first_false, std::move(p)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class DataType3, - class... Properties3, class PredicateType> -auto partition_copy( - const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest_true, - const ::Kokkos::View<DataType3, Properties3...>& view_dest_false, - PredicateType p) { - return Impl::partition_copy_impl("Kokkos::partition_copy_view_api_default", - ex, cbegin(view_from), cend(view_from), - begin(view_dest_true), - begin(view_dest_false), std::move(p)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class DataType3, - class... Properties3, class PredicateType> -auto partition_copy( - const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest_true, - const ::Kokkos::View<DataType3, Properties3...>& view_dest_false, - PredicateType p) { - return Impl::partition_copy_impl(label, ex, cbegin(view_from), - cend(view_from), begin(view_dest_true), - begin(view_dest_false), std::move(p)); -} - -// ---------------------- -// partition_point -// ---------------------- -template <class ExecutionSpace, class IteratorType, class UnaryPredicate> -IteratorType partition_point(const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl( - "Kokkos::partitioned_point_iterator_api_default", ex, first, last, - std::move(p)); -} - -template <class ExecutionSpace, class IteratorType, class UnaryPredicate> -IteratorType partition_point(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - UnaryPredicate p) { - return Impl::partition_point_impl(label, ex, first, last, std::move(p)); -} - -template <class ExecutionSpace, class UnaryPredicate, class DataType, - class... Properties> -auto partition_point(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - UnaryPredicate p) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl(label, ex, begin(v), end(v), std::move(p)); -} - -template <class ExecutionSpace, class UnaryPredicate, class DataType, - class... Properties> -auto partition_point(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& v, - UnaryPredicate p) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl("Kokkos::partition_point_view_api_default", - ex, begin(v), end(v), std::move(p)); -} - -} // namespace Experimental -} // namespace Kokkos - -#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_Reduce.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp similarity index 70% rename from packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_Reduce.hpp rename to packages/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp index bf03f6e98f83b388462f4c26864e43dbc285d109..3cf9153202a3339c9b96bd054c5fcf6ceeb79d3d 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_Reduce.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp @@ -42,119 +42,14 @@ //@HEADER */ -#ifndef KOKKOS_STD_NUMERICS_REDUCE_HPP -#define KOKKOS_STD_NUMERICS_REDUCE_HPP +#ifndef KOKKOS_STD_ALGORITHMS_REDUCE_HPP +#define KOKKOS_STD_ALGORITHMS_REDUCE_HPP -#include <Kokkos_Core.hpp> -#include "../Kokkos_BeginEnd.hpp" -#include "../Kokkos_Constraints.hpp" -#include "../Kokkos_Distance.hpp" -#include "../Kokkos_ModifyingOperations.hpp" -#include "../Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp" +#include "impl/Kokkos_Reduce.hpp" +#include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { -namespace Impl { - -template <class ValueType> -struct StdReduceDefaultJoinFunctor { - KOKKOS_FUNCTION - constexpr ValueType operator()(const ValueType& a, const ValueType& b) const { - return a + b; - } - - KOKKOS_FUNCTION - constexpr ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return a + b; - } -}; - -template <class IteratorType, class ReducerType> -struct StdReduceFunctor { - using red_value_type = typename ReducerType::value_type; - using index_type = typename IteratorType::difference_type; - - const IteratorType m_first; - const ReducerType m_reducer; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& red_value) const { - auto tmp_wrapped_value = red_value_type{m_first[i], false}; - - if (red_value.is_initial) { - red_value = tmp_wrapped_value; - } else { - m_reducer.join(red_value, tmp_wrapped_value); - } - } - - KOKKOS_FUNCTION - StdReduceFunctor(IteratorType first, ReducerType reducer) - : m_first(std::move(first)), m_reducer(std::move(reducer)) {} -}; - -//------------------------------ -// reduce_custom_functors_impl -//------------------------------ -template <class ExecutionSpace, class IteratorType, class ValueType, - class JoinerType> -ValueType reduce_custom_functors_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ValueType init_reduction_value, - JoinerType joiner) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::static_assert_is_not_openmptarget(ex); - Impl::expect_valid_range(first, last); - - if (first == last) { - // init is returned, unmodified - return init_reduction_value; - } - - // aliases - using reducer_type = - ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>; - using functor_type = StdReduceFunctor<IteratorType, reducer_type>; - using reduction_value_type = typename reducer_type::value_type; - - // run - reduction_value_type result; - reducer_type reducer(result, joiner); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - functor_type(first, reducer), reducer); - - // fence not needed since reducing into scalar - return joiner(result.val, init_reduction_value); -} - -template <class ExecutionSpace, class IteratorType, class ValueType> -ValueType reduce_default_functors_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ValueType init_reduction_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::static_assert_is_not_openmptarget(ex); - Impl::expect_valid_range(first, last); - - using value_type = Kokkos::Impl::remove_cvref_t<ValueType>; - using joiner_type = Impl::StdReduceDefaultJoinFunctor<value_type>; - return reduce_custom_functors_impl( - label, ex, first, last, std::move(init_reduction_value), joiner_type()); -} - -} // end namespace Impl - -/////////////////////////////// -// -// reduce public API -// -/////////////////////////////// // // overload set 1 diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d8d7c999b6edc70d9e4d6bf702cc92c6fadfa358 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp @@ -0,0 +1,91 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REMOVE_HPP +#define KOKKOS_STD_ALGORITHMS_REMOVE_HPP + +#include "impl/Kokkos_RemoveAllVariants.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class Iterator, class ValueType> +Iterator remove(const ExecutionSpace& ex, Iterator first, Iterator last, + const ValueType& value) { + return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, first, + last, value); +} + +template <class ExecutionSpace, class Iterator, class ValueType> +Iterator remove(const std::string& label, const ExecutionSpace& ex, + Iterator first, Iterator last, const ValueType& value) { + return Impl::remove_impl(label, ex, first, last, value); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class ValueType> +auto remove(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class ValueType> +auto remove(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::remove_impl(label, ex, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7d5c163af95c0b8f804a064af4b13d1dd1940a97 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp @@ -0,0 +1,106 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REMOVE_COPY_HPP +#define KOKKOS_STD_ALGORITHMS_REMOVE_COPY_HPP + +#include "impl/Kokkos_RemoveAllVariants.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class ValueType> +OutputIterator remove_copy(const ExecutionSpace& ex, InputIterator first_from, + InputIterator last_from, OutputIterator first_dest, + const ValueType& value) { + return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, + first_from, last_from, first_dest, value); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class ValueType> +OutputIterator remove_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first_from, InputIterator last_from, + OutputIterator first_dest, const ValueType& value) { + return Impl::remove_copy_impl(label, ex, first_from, last_from, first_dest, + value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType> +auto remove_copy(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, + ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), + value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType> +auto remove_copy(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_impl( + label, ex, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8a9a3e4c14cc441c3aa5d92843ad5e939b585aa3 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp @@ -0,0 +1,110 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REMOVE_COPY_IF_HPP +#define KOKKOS_STD_ALGORITHMS_REMOVE_COPY_IF_HPP + +#include "impl/Kokkos_RemoveAllVariants.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class UnaryPredicate> +OutputIterator remove_copy_if(const ExecutionSpace& ex, + InputIterator first_from, InputIterator last_from, + OutputIterator first_dest, + const UnaryPredicate& pred) { + return Impl::remove_copy_if_impl( + "Kokkos::remove_copy_if_iterator_api_default", ex, first_from, last_from, + first_dest, pred); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class UnaryPredicate> +OutputIterator remove_copy_if(const std::string& label, + const ExecutionSpace& ex, + InputIterator first_from, InputIterator last_from, + OutputIterator first_dest, + const UnaryPredicate& pred) { + return Impl::remove_copy_if_impl(label, ex, first_from, last_from, first_dest, + pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class UnaryPredicate> +auto remove_copy_if(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + const UnaryPredicate& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_if_impl( + "Kokkos::remove_copy_if_iterator_api_default", ex, + ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class UnaryPredicate> +auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + const UnaryPredicate& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_if_impl( + label, ex, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e4171ca917855b3b45d35c9896035d0994a428af --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REMOVE_IF_HPP +#define KOKKOS_STD_ALGORITHMS_REMOVE_IF_HPP + +#include "impl/Kokkos_RemoveAllVariants.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class Iterator, class UnaryPredicate> +Iterator remove_if(const ExecutionSpace& ex, Iterator first, Iterator last, + UnaryPredicate pred) { + return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, + first, last, pred); +} + +template <class ExecutionSpace, class Iterator, class UnaryPredicate> +Iterator remove_if(const std::string& label, const ExecutionSpace& ex, + Iterator first, Iterator last, UnaryPredicate pred) { + return Impl::remove_if_impl(label, ex, first, last, pred); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class UnaryPredicate> +auto remove_if(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + UnaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class UnaryPredicate> +auto remove_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + UnaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::remove_if_impl(label, ex, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..10ca46af255d1f0e59347188e1901c11d54133d9 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp @@ -0,0 +1,93 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REPLACE_HPP +#define KOKKOS_STD_ALGORITHMS_REPLACE_HPP + +#include "impl/Kokkos_Replace.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class Iterator, class ValueType> +void replace(const ExecutionSpace& ex, Iterator first, Iterator last, + const ValueType& old_value, const ValueType& new_value) { + return Impl::replace_impl("Kokkos::replace_iterator_api", ex, first, last, + old_value, new_value); +} + +template <class ExecutionSpace, class Iterator, class ValueType> +void replace(const std::string& label, const ExecutionSpace& ex, Iterator first, + Iterator last, const ValueType& old_value, + const ValueType& new_value) { + return Impl::replace_impl(label, ex, first, last, old_value, new_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class ValueType> +void replace(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_impl("Kokkos::replace_view_api", ex, KE::begin(view), + KE::end(view), old_value, new_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class ValueType> +void replace(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_impl(label, ex, KE::begin(view), KE::end(view), + old_value, new_value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f5136eb438f4dc2b65aaa1f6fd8791f58ad6a6b3 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp @@ -0,0 +1,107 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_HPP +#define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_HPP + +#include "impl/Kokkos_ReplaceCopy.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class ValueType> +OutputIterator replace_copy(const ExecutionSpace& ex, InputIterator first_from, + InputIterator last_from, OutputIterator first_dest, + const ValueType& old_value, + const ValueType& new_value) { + return Impl::replace_copy_impl("Kokkos::replace_copy_iterator_api", ex, + first_from, last_from, first_dest, old_value, + new_value); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class ValueType> +OutputIterator replace_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first_from, InputIterator last_from, + OutputIterator first_dest, + const ValueType& old_value, + const ValueType& new_value) { + return Impl::replace_copy_impl(label, ex, first_from, last_from, first_dest, + old_value, new_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType> +auto replace_copy(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_impl("Kokkos::replace_copy_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), old_value, new_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType> +auto replace_copy(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_impl(label, ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), + old_value, new_value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a3f3fe69abaf7cce17b72f69ca0ffdada733de6a --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp @@ -0,0 +1,111 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_HPP +#define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_HPP + +#include "impl/Kokkos_ReplaceCopyIf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class PredicateType, class ValueType> +OutputIterator replace_copy_if(const ExecutionSpace& ex, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, PredicateType pred, + const ValueType& new_value) { + return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_iterator_api", ex, + first_from, last_from, first_dest, pred, + new_value); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class PredicateType, class ValueType> +OutputIterator replace_copy_if(const std::string& label, + const ExecutionSpace& ex, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, PredicateType pred, + const ValueType& new_value) { + return Impl::replace_copy_if_impl(label, ex, first_from, last_from, + first_dest, pred, new_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class PredicateType, + class ValueType> +auto replace_copy_if(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + PredicateType pred, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), pred, new_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class PredicateType, + class ValueType> +auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + PredicateType pred, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_if_impl(label, ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), + pred, new_value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bdb59f28af1cee6be97d822f7d4b41ae47321f44 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp @@ -0,0 +1,96 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REPLACE_IF_HPP +#define KOKKOS_STD_ALGORITHMS_REPLACE_IF_HPP + +#include "impl/Kokkos_ReplaceIf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class Predicate, + class ValueType> +void replace_if(const ExecutionSpace& ex, InputIterator first, + InputIterator last, Predicate pred, + const ValueType& new_value) { + return Impl::replace_if_impl("Kokkos::replace_if_iterator_api", ex, first, + last, pred, new_value); +} + +template <class ExecutionSpace, class InputIterator, class Predicate, + class ValueType> +void replace_if(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, Predicate pred, + const ValueType& new_value) { + return Impl::replace_if_impl(label, ex, first, last, pred, new_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class Predicate, class ValueType> +void replace_if(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + Predicate pred, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_if_impl("Kokkos::replace_if_view_api", ex, + KE::begin(view), KE::end(view), pred, new_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class Predicate, class ValueType> +void replace_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + Predicate pred, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_if_impl(label, ex, KE::begin(view), KE::end(view), pred, + new_value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4848b20f66973f22292da27ce97a68f886430870 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp @@ -0,0 +1,87 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REVERSE_HPP +#define KOKKOS_STD_ALGORITHMS_REVERSE_HPP + +#include "impl/Kokkos_Reverse.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator> +void reverse(const ExecutionSpace& ex, InputIterator first, + InputIterator last) { + return Impl::reverse_impl("Kokkos::reverse_iterator_api_default", ex, first, + last); +} + +template <class ExecutionSpace, class InputIterator> +void reverse(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last) { + return Impl::reverse_impl(label, ex, first, last); +} + +template <class ExecutionSpace, class DataType, class... Properties> +void reverse(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::reverse_impl("Kokkos::reverse_view_api_default", ex, + KE::begin(view), KE::end(view)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +void reverse(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::reverse_impl(label, ex, KE::begin(view), KE::end(view)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bb4462bf41d55a43fe5358e6c0ea3538c0fb35ab --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REVERSE_COPY_HPP +#define KOKKOS_STD_ALGORITHMS_REVERSE_COPY_HPP + +#include "impl/Kokkos_ReverseCopy.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator reverse_copy(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + return Impl::reverse_copy_impl("Kokkos::reverse_copy_iterator_api_default", + ex, first, last, d_first); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator reverse_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::reverse_copy_impl(label, ex, first, last, d_first); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto reverse_copy(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::reverse_copy_impl("Kokkos::reverse_copy_view_api_default", ex, + cbegin(source), cend(source), begin(dest)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto reverse_copy(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::reverse_copy_impl(label, ex, cbegin(source), cend(source), + begin(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp new file mode 100644 index 0000000000000000000000000000000000000000..39975811a47c1b3124084e3f12ddb678b92d848f --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ROTATE_HPP +#define KOKKOS_STD_ALGORITHMS_ROTATE_HPP + +#include "impl/Kokkos_Rotate.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType> +IteratorType rotate(const ExecutionSpace& ex, IteratorType first, + IteratorType n_first, IteratorType last) { + return Impl::rotate_impl("Kokkos::rotate_iterator_api_default", ex, first, + n_first, last); +} + +template <class ExecutionSpace, class IteratorType> +IteratorType rotate(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType n_first, + IteratorType last) { + return Impl::rotate_impl(label, ex, first, n_first, last); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto rotate(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + std::size_t n_location) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::rotate_impl("Kokkos::rotate_view_api_default", ex, begin(view), + begin(view) + n_location, end(view)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto rotate(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + std::size_t n_location) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::rotate_impl(label, ex, begin(view), begin(view) + n_location, + end(view)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f98686ab631ba031ab121cd0f7e224557c5d265c --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp @@ -0,0 +1,100 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ROTATE_COPY_HPP +#define KOKKOS_STD_ALGORITHMS_ROTATE_COPY_HPP + +#include "impl/Kokkos_RotateCopy.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator rotate_copy(const ExecutionSpace& ex, InputIterator first, + InputIterator n_first, InputIterator last, + OutputIterator d_first) { + return Impl::rotate_copy_impl("Kokkos::rotate_copy_iterator_api_default", ex, + first, n_first, last, d_first); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator rotate_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator n_first, + InputIterator last, OutputIterator d_first) { + return Impl::rotate_copy_impl(label, ex, first, n_first, last, d_first); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto rotate_copy(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + std::size_t n_location, + const ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::rotate_copy_impl("Kokkos::rotate_copy_view_api_default", ex, + cbegin(source), cbegin(source) + n_location, + cend(source), begin(dest)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto rotate_copy(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + std::size_t n_location, + const ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::rotate_copy_impl(label, ex, cbegin(source), + cbegin(source) + n_location, cend(source), + begin(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ce656da31c8610fca42f4ec3c8e6405c1ea1c0d6 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp @@ -0,0 +1,148 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SEARCH_HPP +#define KOKKOS_STD_ALGORITHMS_SEARCH_HPP + +#include "impl/Kokkos_Search.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1: no binary predicate passed +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, + last, s_first, s_last); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 search(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + return Impl::search_impl(label, ex, first, last, s_first, s_last); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto search(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), KE::begin(s_view), + KE::end(s_view)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto search(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, + last, s_first, s_last, pred); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 search(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::search_impl(label, ex, first, last, s_first, s_last, pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +auto search(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType> +auto search(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view, + const ::Kokkos::View<DataType2, Properties2...>& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..854d911e7602c4917133582d197b2a9d6fb18bdc --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp @@ -0,0 +1,144 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SEARCH_N_HPP +#define KOKKOS_STD_ALGORITHMS_SEARCH_N_HPP + +#include "impl/Kokkos_SearchN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1: no binary predicate passed +template <class ExecutionSpace, class IteratorType, class SizeType, + class ValueType> +IteratorType search_n(const ExecutionSpace& ex, IteratorType first, + IteratorType last, SizeType count, + const ValueType& value) { + return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, + last, count, value); +} + +template <class ExecutionSpace, class IteratorType, class SizeType, + class ValueType> +IteratorType search_n(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, SizeType count, + const ValueType& value) { + return Impl::search_n_impl(label, ex, first, last, count, value); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class SizeType, class ValueType> +auto search_n(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + SizeType count, const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, value); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class SizeType, class ValueType> +auto search_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + SizeType count, const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, + value); +} + +// overload set 2: binary predicate passed +template <class ExecutionSpace, class IteratorType, class SizeType, + class ValueType, class BinaryPredicateType> +IteratorType search_n(const ExecutionSpace& ex, IteratorType first, + IteratorType last, SizeType count, const ValueType& value, + const BinaryPredicateType& pred) { + return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, + last, count, value, pred); +} + +template <class ExecutionSpace, class IteratorType, class SizeType, + class ValueType, class BinaryPredicateType> +IteratorType search_n(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, SizeType count, + const ValueType& value, const BinaryPredicateType& pred) { + return Impl::search_n_impl(label, ex, first, last, count, value, pred); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class SizeType, class ValueType, class BinaryPredicateType> +auto search_n(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + SizeType count, const ValueType& value, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, value, + pred); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class SizeType, class ValueType, class BinaryPredicateType> +auto search_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + SizeType count, const ValueType& value, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, + value, pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cee111af98a0d3e759b29feeb55db278b7ed20d3 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_HPP +#define KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_HPP + +#include "impl/Kokkos_ShiftLeft.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType> +IteratorType shift_left(const ExecutionSpace& ex, IteratorType first, + IteratorType last, + typename IteratorType::difference_type n) { + return Impl::shift_left_impl("Kokkos::shift_left_iterator_api_default", ex, + first, last, n); +} + +template <class ExecutionSpace, class IteratorType> +IteratorType shift_left(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + typename IteratorType::difference_type n) { + return Impl::shift_left_impl(label, ex, first, last, n); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto shift_left(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_left_impl("Kokkos::shift_left_view_api_default", ex, + begin(view), end(view), n); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto shift_left(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_left_impl(label, ex, begin(view), end(view), n); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f104d2bd7a67836ee826c7f0e44b0107c0443ee2 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_HPP +#define KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_HPP + +#include "impl/Kokkos_ShiftRight.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType> +IteratorType shift_right(const ExecutionSpace& ex, IteratorType first, + IteratorType last, + typename IteratorType::difference_type n) { + return Impl::shift_right_impl("Kokkos::shift_right_iterator_api_default", ex, + first, last, n); +} + +template <class ExecutionSpace, class IteratorType> +IteratorType shift_right(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + typename IteratorType::difference_type n) { + return Impl::shift_right_impl(label, ex, first, last, n); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto shift_right(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_right_impl("Kokkos::shift_right_view_api_default", ex, + begin(view), end(view), n); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto shift_right(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_right_impl(label, ex, begin(view), end(view), n); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SortingOperations.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SortingOperations.hpp deleted file mode 100644 index bcc38fb38cb4b905e7ece4538ad18009e62a6884..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SortingOperations.hpp +++ /dev/null @@ -1,378 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_STD_SORTING_OPERATIONS_HPP -#define KOKKOS_STD_SORTING_OPERATIONS_HPP - -#include <Kokkos_Core.hpp> -#include "Kokkos_BeginEnd.hpp" -#include "Kokkos_Constraints.hpp" -#include "Kokkos_NonModifyingSequenceOperations.hpp" -#include "Kokkos_HelperPredicates.hpp" -#include <string> - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -// ------------------ -// -// functors -// -// ------------------ - -template <class IteratorType, class IndicatorViewType, class ComparatorType> -struct StdIsSortedUntilFunctor { - using index_type = typename IteratorType::difference_type; - IteratorType m_first; - IndicatorViewType m_indicator; - ComparatorType m_comparator; - - KOKKOS_FUNCTION - void operator()(const index_type i, int& update, const bool final) const { - const auto& val_i = m_first[i]; - const auto& val_ip1 = m_first[i + 1]; - - if (m_comparator(val_ip1, val_i)) { - ++update; - } - - if (final) { - m_indicator(i) = update; - } - } - - KOKKOS_FUNCTION - StdIsSortedUntilFunctor(IteratorType _first1, IndicatorViewType indicator, - ComparatorType comparator) - : m_first(std::move(_first1)), - m_indicator(std::move(indicator)), - m_comparator(std::move(comparator)) {} -}; - -template <class IteratorType, class ComparatorType> -struct StdIsSortedFunctor { - using index_type = typename IteratorType::difference_type; - IteratorType m_first; - ComparatorType m_comparator; - - KOKKOS_FUNCTION - void operator()(const index_type i, std::size_t& update) const { - const auto& val_i = m_first[i]; - const auto& val_ip1 = m_first[i + 1]; - - if (m_comparator(val_ip1, val_i)) { - ++update; - } - } - - KOKKOS_FUNCTION - StdIsSortedFunctor(IteratorType _first1, ComparatorType comparator) - : m_first(std::move(_first1)), m_comparator(std::move(comparator)) {} -}; - -// ------------------------------------------ -// is_sorted_until_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class ComparatorType> -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - const auto num_elements = Kokkos::Experimental::distance(first, last); - - // trivial case - if (num_elements <= 1) { - return last; - } - - /* - use scan and a helper "indicator" view - such that we scan the data and fill the indicator with - partial sum that is always 0 unless we find a pair that - breaks the sorting, so in that case the indicator will - have a 1 starting at the location where the sorting breaks. - So finding that 1 means finding the location we want. - */ - - // aliases - using indicator_value_type = std::size_t; - using indicator_view_type = - ::Kokkos::View<indicator_value_type*, ExecutionSpace>; - using functor_type = - StdIsSortedUntilFunctor<IteratorType, indicator_view_type, - ComparatorType>; - - // do scan - // use num_elements-1 because each index handles i and i+1 - const auto num_elements_minus_one = num_elements - 1; - indicator_view_type indicator("is_sorted_until_indicator_helper", - num_elements_minus_one); - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_minus_one), - functor_type(first, indicator, std::move(comp))); - - // try to find the first sentinel value, which indicates - // where the sorting condition breaks - namespace KE = ::Kokkos::Experimental; - constexpr indicator_value_type sentinel_value = 1; - auto r = - KE::find(ex, KE::cbegin(indicator), KE::cend(indicator), sentinel_value); - const auto shift = r - ::Kokkos::Experimental::cbegin(indicator); - - return first + (shift + 1); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - using value_type = typename IteratorType::value_type; - using pred_t = Impl::StdAlgoLessThanBinaryPredicate<value_type>; - return is_sorted_until_impl(label, ex, first, last, pred_t()); -} - -// ------------------------------------------ -// is_sorted_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class ComparatorType> -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ComparatorType comp) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - const auto num_elements = Kokkos::Experimental::distance(first, last); - if (num_elements <= 1) { - return true; - } - - // use num_elements-1 because each index handles i and i+1 - const auto num_elements_minus_one = num_elements - 1; - using functor_type = StdIsSortedFunctor<IteratorType, ComparatorType>; - - // result is incremented by one if sorting breaks at index i - std::size_t result = 0; - ::Kokkos::parallel_reduce( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_minus_one), - functor_type(first, std::move(comp)), result); - - return result == 0; -} - -template <class ExecutionSpace, class IteratorType> -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { - using value_type = typename IteratorType::value_type; - using pred_t = Impl::StdAlgoLessThanBinaryPredicate<value_type>; - return is_sorted_impl(label, ex, first, last, pred_t()); -} - -} // namespace Impl - -// ---------------------------------- -// is_sorted_until public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType> -IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::is_sorted_until_impl( - "Kokkos::is_sorted_until_iterator_api_default", ex, first, last); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl(label, ex, first, last); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto is_sorted_until(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view)); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl( - "Kokkos::is_sorted_until_iterator_api_default", ex, first, last, - std::move(comp)); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::is_sorted_until_impl(label, ex, first, last, std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class ComparatorType> -auto is_sorted_until(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_not_openmptarget(ex); - - namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view), - std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class ComparatorType> -auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_not_openmptarget(ex); - - namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view), - std::move(comp)); -} - -// ---------------------------------- -// is_sorted public API -// ---------------------------------- -template <class ExecutionSpace, class IteratorType> -bool is_sorted(const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last); -} - -template <class ExecutionSpace, class IteratorType> -bool is_sorted(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { - return Impl::is_sorted_impl(label, ex, first, last); -} - -template <class ExecutionSpace, class DataType, class... Properties> -bool is_sorted(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -bool is_sorted(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view)); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last, - ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last, std::move(comp)); -} - -template <class ExecutionSpace, class IteratorType, class ComparatorType> -bool is_sorted(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, ComparatorType comp) { - Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl(label, ex, first, last, std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class ComparatorType> -bool is_sorted(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_not_openmptarget(ex); - - namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view), - std::move(comp)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class ComparatorType> -bool is_sorted(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - ComparatorType comp) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::static_assert_is_not_openmptarget(ex); - - namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view), - std::move(comp)); -} - -} // namespace Experimental -} // namespace Kokkos - -#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Swap.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Swap.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9006aa9167dd326d312b8c31cc0790d511245918 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Swap.hpp @@ -0,0 +1,69 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SWAP_HPP +#define KOKKOS_STD_ALGORITHMS_SWAP_HPP + +#include <Kokkos_Core.hpp> + +namespace Kokkos { +namespace Experimental { + +// swap +template <class T> +KOKKOS_INLINE_FUNCTION void swap(T& a, T& b) noexcept { + static_assert( + std::is_move_assignable<T>::value && std::is_move_constructible<T>::value, + "Kokkos::Experimental::swap arguments must be move assignable " + "and move constructible"); + + T tmp = std::move(a); + a = std::move(b); + b = std::move(tmp); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2997cdab4b5fa58c47bc65eb4ed3d4804868a492 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp @@ -0,0 +1,97 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SWAP_RANGES_HPP +#define KOKKOS_STD_ALGORITHMS_SWAP_RANGES_HPP + +#include "impl/Kokkos_SwapRanges.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 swap_ranges(const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2) { + return Impl::swap_ranges_impl("Kokkos::swap_ranges_iterator_api_default", ex, + first1, last1, first2); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto swap_ranges(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + assert(source.extent(0) == dest.extent(0)); + return Impl::swap_ranges_impl("Kokkos::swap_ranges_view_api_default", ex, + begin(source), end(source), begin(dest)); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 swap_ranges(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::swap_ranges_impl(label, ex, first1, last1, first2); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto swap_ranges(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + assert(source.extent(0) == dest.extent(0)); + return Impl::swap_ranges_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6dfb83a8c0ea531f76f9ce4460d1ee37ae0cc37b --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp @@ -0,0 +1,166 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_HPP +#define KOKKOS_STD_ALGORITHMS_TRANSFORM_HPP + +#include "impl/Kokkos_Transform.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class UnaryOperation> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + InputIterator, OutputIterator>::value, + OutputIterator> +transform(const ExecutionSpace& ex, InputIterator first1, InputIterator last1, + OutputIterator d_first, UnaryOperation unary_op) { + return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, + first1, last1, d_first, std::move(unary_op)); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class UnaryOperation> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + InputIterator, OutputIterator>::value, + OutputIterator> +transform(const std::string& label, const ExecutionSpace& ex, + InputIterator first1, InputIterator last1, OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_impl(label, ex, first1, last1, d_first, + std::move(unary_op)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class UnaryOperation> +auto transform(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest, + UnaryOperation unary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_impl("Kokkos::transform_view_api_default", ex, + begin(source), end(source), begin(dest), + std::move(unary_op)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class UnaryOperation> +auto transform(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + ::Kokkos::View<DataType2, Properties2...>& dest, + UnaryOperation unary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_impl(label, ex, begin(source), end(source), + begin(dest), std::move(unary_op)); +} + +template <class ExecutionSpace, class InputIterator1, class InputIterator2, + class OutputIterator, class BinaryOperation> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + InputIterator1, InputIterator2, OutputIterator>::value, + OutputIterator> +transform(const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, + first1, last1, first2, d_first, + std::move(binary_op)); +} + +template <class ExecutionSpace, class InputIterator1, class InputIterator2, + class OutputIterator, class BinaryOperation> +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + InputIterator1, InputIterator2, OutputIterator>::value, + OutputIterator> +transform(const std::string& label, const ExecutionSpace& ex, + InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, + OutputIterator d_first, BinaryOperation binary_op) { + return Impl::transform_impl(label, ex, first1, last1, first2, d_first, + std::move(binary_op)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class DataType3, + class... Properties3, class BinaryOperation> +auto transform(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source1, + const ::Kokkos::View<DataType2, Properties2...>& source2, + ::Kokkos::View<DataType3, Properties3...>& dest, + BinaryOperation binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_impl("Kokkos::transform_view_api_default", ex, + begin(source1), end(source1), begin(source2), + begin(dest), std::move(binary_op)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class DataType3, + class... Properties3, class BinaryOperation> +auto transform(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source1, + const ::Kokkos::View<DataType2, Properties2...>& source2, + ::Kokkos::View<DataType3, Properties3...>& dest, + BinaryOperation binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_impl(label, ex, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d0073599b02eef5ed289a7afe3afbb46313297c5 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp @@ -0,0 +1,131 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_TRASFORM_EXCLUSIVE_SCAN_HPP +#define KOKKOS_STD_ALGORITHMS_TRASFORM_EXCLUSIVE_SCAN_HPP + +#include "impl/Kokkos_TransformExclusiveScan.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType, class BinaryOpType, + class UnaryOpType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +transform_exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType binary_op, + UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(ex); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + return Impl::transform_exclusive_scan_impl( + "Kokkos::transform_exclusive_scan_custom_functors_iterator_api", ex, + first, last, first_dest, init_value, binary_op, unary_op); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType, class BinaryOpType, + class UnaryOpType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +transform_exclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(ex); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + return Impl::transform_exclusive_scan_impl(label, ex, first, last, first_dest, + init_value, binary_op, unary_op); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType, + class BinaryOpType, class UnaryOpType> +auto transform_exclusive_scan( + const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_exclusive_scan_impl( + "Kokkos::transform_exclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + init_value, binary_op, unary_op); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ValueType, + class BinaryOpType, class UnaryOpType> +auto transform_exclusive_scan( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible<ValueType>::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_exclusive_scan_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), init_value, binary_op, unary_op); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..088e162adba77d514d157099f1f06663d515db4f --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp @@ -0,0 +1,190 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_HPP +#define KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_HPP + +#include "impl/Kokkos_TransformInclusiveScan.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1 (no init value) +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOpType, class UnaryOpType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::transform_inclusive_scan_impl( + "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, + first, last, first_dest, binary_op, unary_op); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOpType, class UnaryOpType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOpType binary_op, + UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, + binary_op, unary_op); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryOpType, + class UnaryOpType> +auto transform_inclusive_scan( + const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_impl( + "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + binary_op, unary_op); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryOpType, + class UnaryOpType> +auto transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op); +} + +// overload set 2 (init value) +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOpType, class UnaryOpType, + class ValueType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, + ValueType init_value) { + Impl::static_assert_is_not_openmptarget(ex); + return Impl::transform_inclusive_scan_impl( + "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, + first, last, first_dest, binary_op, unary_op, init_value); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOpType, class UnaryOpType, + class ValueType> +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOpType binary_op, + UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(ex); + return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, + binary_op, unary_op, init_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryOpType, + class UnaryOpType, class ValueType> +auto transform_inclusive_scan( + const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_impl( + "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + binary_op, unary_op, init_value); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryOpType, + class UnaryOpType, class ValueType> +auto transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& view_from, + const ::Kokkos::View<DataType2, Properties2...>& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op, init_value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_TransformReduce.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp similarity index 62% rename from packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_TransformReduce.hpp rename to packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp index 846166d322557a9872b0c74cfcef707e64fcf6f5..5caced59172b47fa69686ca93d65171addf41d94 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_TransformReduce.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp @@ -42,219 +42,14 @@ //@HEADER */ -#ifndef KOKKOS_STD_NUMERICS_TRANSFORM_REDUCE_HPP -#define KOKKOS_STD_NUMERICS_TRANSFORM_REDUCE_HPP +#ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_HPP +#define KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_HPP -#include <Kokkos_Core.hpp> -#include "../Kokkos_Constraints.hpp" -#include "../Kokkos_Distance.hpp" -#include "../Kokkos_ModifyingOperations.hpp" -#include "../Kokkos_BeginEnd.hpp" -#include "../Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp" +#include "impl/Kokkos_TransformReduce.hpp" +#include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { -namespace Impl { - -// -// helper functors -// -template <class ValueType> -struct StdTranformReduceDefaultBinaryTransformFunctor { - KOKKOS_FUNCTION - constexpr ValueType operator()(const ValueType& a, const ValueType& b) const { - return (a * b); - } -}; - -template <class ValueType> -struct StdTranformReduceDefaultJoinFunctor { - KOKKOS_FUNCTION - constexpr ValueType operator()(const ValueType& a, const ValueType& b) const { - return a + b; - } - - KOKKOS_FUNCTION - constexpr ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return a + b; - } -}; - -template <class IteratorType, class ReducerType, class TransformType> -struct StdTransformReduceSingleIntervalFunctor { - using red_value_type = typename ReducerType::value_type; - using index_type = typename IteratorType::difference_type; - - const IteratorType m_first; - const ReducerType m_reducer; - const TransformType m_transform; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& red_value) const { - auto tmp_wrapped_value = red_value_type{m_transform(m_first[i]), false}; - if (red_value.is_initial) { - red_value = tmp_wrapped_value; - } else { - m_reducer.join(red_value, tmp_wrapped_value); - } - } - - KOKKOS_FUNCTION - StdTransformReduceSingleIntervalFunctor(IteratorType first, - ReducerType reducer, - TransformType transform) - : m_first(std::move(first)), - m_reducer(std::move(reducer)), - m_transform(std::move(transform)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2, - class ReducerType, class TransformType> -struct StdTransformReduceTwoIntervalsFunctor { - using red_value_type = typename ReducerType::value_type; - - const IteratorType1 m_first1; - const IteratorType2 m_first2; - const ReducerType m_reducer; - const TransformType m_transform; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - auto tmp_wrapped_value = - red_value_type{m_transform(m_first1[i], m_first2[i]), false}; - - if (red_value.is_initial) { - red_value = tmp_wrapped_value; - } else { - m_reducer.join(red_value, tmp_wrapped_value); - } - } - - KOKKOS_FUNCTION - StdTransformReduceTwoIntervalsFunctor(IteratorType1 first1, - IteratorType2 first2, - ReducerType reducer, - TransformType transform) - : m_first1(std::move(first1)), - m_first2(std::move(first2)), - m_reducer(std::move(reducer)), - m_transform(std::move(transform)) {} -}; - -//------------------------------ -// -// impl functions -// -//------------------------------ - -template <class ExecutionSpace, class IteratorType, class ValueType, - class JoinerType, class UnaryTransformerType> -ValueType transform_reduce_custom_functors_impl( - const std::string& label, const ExecutionSpace& ex, IteratorType first, - IteratorType last, ValueType init_reduction_value, JoinerType joiner, - UnaryTransformerType transformer) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::static_assert_is_not_openmptarget(ex); - Impl::expect_valid_range(first, last); - - if (first == last) { - // init is returned, unmodified - return init_reduction_value; - } - - // aliases - using reducer_type = - ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>; - using functor_type = - StdTransformReduceSingleIntervalFunctor<IteratorType, reducer_type, - UnaryTransformerType>; - using reduction_value_type = typename reducer_type::value_type; - - // run - reduction_value_type result; - reducer_type reducer(result, joiner); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - functor_type(first, reducer, transformer), reducer); - - // fence not needed since reducing into scalar - - // as per standard, transform is not applied to the init value - // https://en.cppreference.com/w/cpp/algorithm/transform_reduce - return joiner(result.val, init_reduction_value); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class ValueType, class JoinerType, class BinaryTransformerType> -ValueType transform_reduce_custom_functors_impl( - const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, - JoinerType joiner, BinaryTransformerType transformer) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first1, first2); - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_iterators_have_matching_difference_type(first1, first2); - Impl::expect_valid_range(first1, last1); - - if (first1 == last1) { - // init is returned, unmodified - return init_reduction_value; - } - - // aliases - using index_type = typename IteratorType1::difference_type; - using reducer_type = - ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>; - using functor_type = - StdTransformReduceTwoIntervalsFunctor<index_type, IteratorType1, - IteratorType2, reducer_type, - BinaryTransformerType>; - using reduction_value_type = typename reducer_type::value_type; - - // run - reduction_value_type result; - reducer_type reducer(result, joiner); - - const auto num_elements = Kokkos::Experimental::distance(first1, last1); - ::Kokkos::parallel_reduce( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - functor_type(first1, first2, reducer, transformer), reducer); - - // fence not needed since reducing into scalar - return joiner(result.val, init_reduction_value); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2, - class ValueType> -ValueType transform_reduce_default_functors_impl( - const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first1, first2); - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_iterators_have_matching_difference_type(first1, first2); - Impl::expect_valid_range(first1, last1); - - // aliases - using transformer_type = - Impl::StdTranformReduceDefaultBinaryTransformFunctor<ValueType>; - using joiner_type = Impl::StdTranformReduceDefaultJoinFunctor<ValueType>; - - return transform_reduce_custom_functors_impl( - label, ex, first1, last1, first2, std::move(init_reduction_value), - joiner_type(), transformer_type()); -} - -} // end namespace Impl - -/////////////////////////////// -// -// transform_reduce public API -// -/////////////////////////////// // ---------------------------- // overload set1: diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp new file mode 100644 index 0000000000000000000000000000000000000000..aeb54a6ffe6fceaa7e8787e73527491e536b0686 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_HPP +#define KOKKOS_STD_ALGORITHMS_UNIQUE_HPP + +#include "impl/Kokkos_Unique.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// note: the enable_if below is to avoid "call to ... is ambiguous" +// for example in the unit test when using a variadic function + +// overload set1 +template <class ExecutionSpace, class IteratorType> +std::enable_if_t<!::Kokkos::is_view<IteratorType>::value, IteratorType> unique( + const ExecutionSpace& ex, IteratorType first, IteratorType last) { + return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, + last); +} + +template <class ExecutionSpace, class IteratorType> +std::enable_if_t<!::Kokkos::is_view<IteratorType>::value, IteratorType> unique( + const std::string& label, const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::unique_impl(label, ex, first, last); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto unique(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return ::Kokkos::Experimental::unique("Kokkos::unique_view_api_default", ex, + begin(view), end(view)); +} + +template <class ExecutionSpace, class DataType, class... Properties> +auto unique(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return ::Kokkos::Experimental::unique(label, ex, begin(view), end(view)); +} + +// overload set2 +template <class ExecutionSpace, class IteratorType, class BinaryPredicate> +IteratorType unique(const ExecutionSpace& ex, IteratorType first, + IteratorType last, BinaryPredicate pred) { + return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, + last, pred); +} + +template <class ExecutionSpace, class IteratorType, class BinaryPredicate> +IteratorType unique(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + BinaryPredicate pred) { + return Impl::unique_impl(label, ex, first, last, pred); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class BinaryPredicate> +auto unique(const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + BinaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::unique_impl("Kokkos::unique_view_api_default", ex, begin(view), + end(view), std::move(pred)); +} + +template <class ExecutionSpace, class DataType, class... Properties, + class BinaryPredicate> +auto unique(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType, Properties...>& view, + BinaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::unique_impl(label, ex, begin(view), end(view), std::move(pred)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..632b560fa8e2da17acf3eda9b16fc74b416f2362 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp @@ -0,0 +1,143 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_HPP +#define KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_HPP + +#include "impl/Kokkos_UniqueCopy.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set1 +template <class ExecutionSpace, class InputIterator, class OutputIterator> +std::enable_if_t<!::Kokkos::is_view<InputIterator>::value, OutputIterator> +unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, + first, last, d_first); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +std::enable_if_t<!::Kokkos::is_view<InputIterator>::value, OutputIterator> +unique_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, OutputIterator d_first) { + return Impl::unique_copy_impl(label, ex, first, last, d_first); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto unique_copy(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + const ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return ::Kokkos::Experimental::unique_copy( + "Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source), + begin(dest)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2> +auto unique_copy(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + const ::Kokkos::View<DataType2, Properties2...>& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return ::Kokkos::Experimental::unique_copy(label, ex, cbegin(source), + cend(source), begin(dest)); +} + +// overload set2 +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class BinaryPredicate> +OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first, + BinaryPredicate pred) { + return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, + first, last, d_first, pred); +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class BinaryPredicate> +OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first, BinaryPredicate pred) { + return Impl::unique_copy_impl(label, ex, first, last, d_first, pred); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicate> +auto unique_copy(const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + const ::Kokkos::View<DataType2, Properties2...>& dest, + BinaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_impl("Kokkos::unique_copy_view_api_default", ex, + cbegin(source), cend(source), begin(dest), + std::move(pred)); +} + +template <class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicate> +auto unique_copy(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View<DataType1, Properties1...>& source, + const ::Kokkos::View<DataType2, Properties2...>& dest, + BinaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_impl(label, ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp new file mode 100644 index 0000000000000000000000000000000000000000..35c78b86bf257255bf239552f9b41b7c34592532 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp @@ -0,0 +1,135 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ValueType1, class ValueType2, class RetType = ValueType2> +struct StdAdjacentDifferenceDefaultBinaryOpFunctor { + KOKKOS_FUNCTION + constexpr RetType operator()(const ValueType1& a, const ValueType2& b) const { + return a - b; + } +}; + +template <class InputIteratorType, class OutputIteratorType, + class BinaryOperator> +struct StdAdjacentDiffFunctor { + using index_type = typename InputIteratorType::difference_type; + + const InputIteratorType m_first_from; + const OutputIteratorType m_first_dest; + BinaryOperator m_op; + + KOKKOS_FUNCTION + void operator()(const index_type i) const { + const auto& my_value = m_first_from[i]; + if (i == 0) { + m_first_dest[i] = my_value; + } else { + const auto& left_value = m_first_from[i - 1]; + m_first_dest[i] = m_op(my_value, left_value); + } + } + + KOKKOS_FUNCTION + StdAdjacentDiffFunctor(InputIteratorType first_from, + OutputIteratorType first_dest, BinaryOperator op) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_op(std::move(op)) {} +}; + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOp> +OutputIteratorType adjacent_difference_impl(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + if (first_from == last_from) { + return first_dest; + } + + // aliases + using value_type = typename OutputIteratorType::value_type; + using aux_view_type = ::Kokkos::View<value_type*, ExecutionSpace>; + using functor_t = + StdAdjacentDiffFunctor<InputIteratorType, OutputIteratorType, BinaryOp>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + aux_view_type aux_view("aux_view", num_elements); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + functor_t(first_from, first_dest, bin_op)); + ex.fence("Kokkos::adjacent_difference: fence after operation"); + + // return + return first_dest + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp new file mode 100644 index 0000000000000000000000000000000000000000..155f6c7bb80c6879eb9bbf1d86fad4e222b69ee5 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp @@ -0,0 +1,140 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType, class ReducerType, + class PredicateType> +struct StdAdjacentFindFunctor { + using red_value_type = typename ReducerType::value_type; + + IteratorType m_first; + ReducerType m_reducer; + PredicateType m_p; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + const auto& my_value = m_first[i]; + const auto& next_value = m_first[i + 1]; + const bool are_equal = m_p(my_value, next_value); + + auto rv = + are_equal + ? red_value_type{i} + : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; + + m_reducer.join(red_value, rv); + } + + KOKKOS_FUNCTION + StdAdjacentFindFunctor(IteratorType first, ReducerType reducer, + PredicateType p) + : m_first(std::move(first)), + m_reducer(std::move(reducer)), + m_p(std::move(p)) {} +}; + +template <class ExecutionSpace, class IteratorType, class PredicateType> +IteratorType adjacent_find_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + if (num_elements <= 1) { + return last; + } + + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdAdjacentFindFunctor<index_type, IteratorType, reducer_type, + PredicateType>; + + reduction_value_type red_result; + reducer_type reducer(red_result); + + // note that we use below num_elements-1 because + // each index i in the reduction checks i and (i+1). + ::Kokkos::parallel_reduce( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements - 1), + func_t(first, reducer, pred), reducer); + + // fence not needed because reducing into scalar + if (red_result.min_loc_true == + ::Kokkos::reduction_identity<index_type>::min()) { + return last; + } else { + return first + red_result.min_loc_true; + } +} + +template <class ExecutionSpace, class IteratorType> +IteratorType adjacent_find_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + using value_type = typename IteratorType::value_type; + using default_pred_t = StdAlgoEqualBinaryPredicate<value_type>; + return adjacent_find_impl(label, ex, first, last, default_pred_t()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dd8ae4f5b4d992f3b2d4b9894fd113889dd68df0 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp @@ -0,0 +1,77 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_ANY_OF_NONE_OF_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_ALL_OF_ANY_OF_NONE_OF_IMPL_HPP + +#include "Kokkos_FindIfOrNot.hpp" + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ExecutionSpace, class InputIterator, class Predicate> +bool all_of_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, Predicate predicate) { + return (find_if_or_not_impl<false>(label, ex, first, last, predicate) == + last); +} + +template <class ExecutionSpace, class InputIterator, class Predicate> +bool any_of_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, Predicate predicate) { + return (find_if_or_not_impl<true>(label, ex, first, last, predicate) != last); +} + +template <class ExecutionSpace, class IteratorType, class Predicate> +bool none_of_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, Predicate predicate) { + return (find_if_or_not_impl<true>(label, ex, first, last, predicate) == last); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp similarity index 100% rename from packages/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp rename to packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp new file mode 100644 index 0000000000000000000000000000000000000000..18f614094cff0588ee045e8d61c27aade705f374 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp @@ -0,0 +1,103 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2> +struct StdCopyBackwardFunctor { + static_assert(std::is_signed<IndexType>::value, + "Kokkos: StdCopyBackwardFunctor requires signed index type"); + + IteratorType1 m_last; + IteratorType2 m_dest_last; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } + + KOKKOS_FUNCTION + StdCopyBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) + : m_last(std::move(_last)), m_dest_last(std::move(_dest_last)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 copy_backward_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_last); + Impl::static_assert_iterators_have_matching_difference_type(first, d_last); + Impl::expect_valid_range(first, last); + + // aliases + using index_type = typename IteratorType1::difference_type; + using func_t = + StdCopyBackwardFunctor<index_type, IteratorType1, IteratorType2>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(last, d_last)); + ex.fence("Kokkos::copy_backward: fence after operation"); + + // return + return d_last - num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..03b6fc6ecacc0fca8d9d457f4e6b2193a2fc9c81 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp @@ -0,0 +1,116 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class InputIterator, class OutputIterator> +struct StdCopyFunctor { + InputIterator m_first; + OutputIterator m_dest_first; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { m_dest_first[i] = m_first[i]; } + + KOKKOS_FUNCTION + StdCopyFunctor(InputIterator _first, OutputIterator _dest_first) + : m_first(std::move(_first)), m_dest_first(std::move(_dest_first)) {} +}; + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // aliases + using index_type = typename InputIterator::difference_type; + using func_t = StdCopyFunctor<index_type, InputIterator, OutputIterator>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, d_first)); + ex.fence("Kokkos::copy: fence after operation"); + + // return + return d_first + num_elements; +} + +template <class ExecutionSpace, class InputIterator, class Size, + class OutputIterator> +OutputIterator copy_n_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first_from, Size count, + OutputIterator first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + + if (count > 0) { + return copy_impl(label, ex, first_from, first_from + count, first_dest); + } else { + return first_dest; + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..aebb5a9a46111a3012e6d304d312ad77f14f42a3 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -0,0 +1,142 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_IF_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class FirstFrom, class FirstDest, class PredType> +struct StdCopyIfFunctor { + FirstFrom m_first_from; + FirstDest m_first_dest; + PredType m_pred; + + KOKKOS_FUNCTION + StdCopyIfFunctor(FirstFrom first_from, FirstDest first_dest, PredType pred) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_pred(std::move(pred)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, IndexType& update, + const bool final_pass) const { + const auto& myval = m_first_from[i]; + if (final_pass) { + if (m_pred(myval)) { + m_first_dest[update] = myval; + } + } + + if (m_pred(myval)) { + update += 1; + } + } +}; + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class PredicateType> +OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first, PredicateType pred) { + /* + To explain the impl, suppose that our data is: + + | 1 | 1 | 2 | 2 | 3 | -2 | 4 | 4 | 4 | 5 | 7 | -10 | + + and we want to copy only the even entries, + We can use an exclusive scan where the "update" + is incremented only for the elements that satisfy the predicate. + This way, the update allows us to track where in the destination + we need to copy the elements: + + In this case, counting only the even entries, the exlusive scan + during the final pass would yield: + + | 0 | 0 | 0 | 1 | 2 | 2 | 3 | 4 | 5 | 6 | 6 | 6 | + * * * * * * * + + which provides the indexing in the destination where + each starred (*) element needs to be copied to since + the starred elements are those that satisfy the predicate. + */ + + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return d_first; + } else { + // aliases + using index_type = typename InputIterator::difference_type; + using func_type = StdCopyIfFunctor<index_type, InputIterator, + OutputIterator, PredicateType>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + index_type count = 0; + ::Kokkos::parallel_scan(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(first, d_first, pred), count); + + // fence not needed because of the scan accumulating into count + return d_first + count; + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..982ac4046426ceb20de1899e6c0fe700aa8382e0 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp @@ -0,0 +1,112 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_COUNT_IF_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class Predicate> +struct StdCountIfFunctor { + using index_type = typename IteratorType::difference_type; + IteratorType m_first; + Predicate m_predicate; + + KOKKOS_FUNCTION + void operator()(index_type i, index_type& lsum) const { + if (m_predicate(m_first[i])) { + lsum++; + } + } + + KOKKOS_FUNCTION + StdCountIfFunctor(IteratorType _first, Predicate _predicate) + : m_first(std::move(_first)), m_predicate(std::move(_predicate)) {} +}; + +template <class ExecutionSpace, class IteratorType, class Predicate> +typename IteratorType::difference_type count_if_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + Predicate predicate) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + // aliases + using func_t = StdCountIfFunctor<IteratorType, Predicate>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + typename IteratorType::difference_type count = 0; + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, predicate), count); + ex.fence("Kokkos::count_if: fence after operation"); + + return count; +} + +template <class ExecutionSpace, class IteratorType, class T> +auto count_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, const T& value) { + return count_if_impl( + label, ex, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate<T>(value)); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9482917abb996498af8161b2ac3feb8300f3f5e2 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp @@ -0,0 +1,147 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_EQUAL_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_EQUAL_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +struct StdEqualFunctor { + IteratorType1 m_first1; + IteratorType2 m_first2; + BinaryPredicateType m_predicate; + + KOKKOS_FUNCTION + void operator()(IndexType i, std::size_t& lsum) const { + if (!m_predicate(m_first1[i], m_first2[i])) { + lsum = 1; + } + } + + KOKKOS_FUNCTION + StdEqualFunctor(IteratorType1 _first1, IteratorType2 _first2, + BinaryPredicateType _predicate) + : m_first1(std::move(_first1)), + m_first2(std::move(_first2)), + m_predicate(std::move(_predicate)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +bool equal_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + BinaryPredicateType predicate) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + + // aliases + using index_type = typename IteratorType1::difference_type; + using func_t = StdEqualFunctor<index_type, IteratorType1, IteratorType2, + BinaryPredicateType>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first1, last1); + std::size_t different = 0; + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first1, first2, predicate), different); + ex.fence("Kokkos::equal: fence after operation"); + + return !different; +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +bool equal_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; + return equal_impl(label, ex, first1, last1, first2, pred_t()); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +bool equal_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2, BinaryPredicateType predicate) { + const auto d1 = ::Kokkos::Experimental::distance(first1, last1); + const auto d2 = ::Kokkos::Experimental::distance(first2, last2); + if (d1 != d2) { + return false; + } + + return equal_impl(label, ex, first1, last1, first2, predicate); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +bool equal_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2) { + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; + return equal_impl(label, ex, first1, last1, first2, last2, pred_t()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0ae4651c6afa9beec90cf09b7cd7ccbc1b0c71ab --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp @@ -0,0 +1,232 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_ValueWrapperForNoNeutralElement.hpp" +#include "Kokkos_IdentityReferenceUnaryFunctor.hpp" +#include <std_algorithms/Kokkos_TransformExclusiveScan.hpp> +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, + class FirstDest> +struct ExclusiveScanDefaultFunctorForKnownNeutralElement { + using execution_space = ExeSpace; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + if (final_pass) m_first_dest[i] = update + m_init_value; + update += m_first_from[i]; + } +}; + +template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, + class FirstDest> +struct ExclusiveScanDefaultFunctor { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement<ValueType>; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctor(ValueType init, FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = update.val + m_init_value; + } + } + + const auto tmp = value_type{m_first_from[i], false}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (update.is_initial) { + update.val = input.val; + update.is_initial = false; + } else { + update.val = update.val + input.val; + } + } +}; + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType, class BinaryOpType> +OutputIteratorType exclusive_scan_custom_op_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>; + using func_type = + TransformExclusiveScanFunctor<ExecutionSpace, index_type, ValueType, + InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(init_value, first_from, first_dest, bop, unary_op_type())); + ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); + + // return + return first_dest + num_elements; +} + +template <typename ValueType> +using ex_scan_has_reduction_identity_sum_t = + decltype(Kokkos::reduction_identity<ValueType>::sum()); + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType> +OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // does it make sense to do this static_assert too? + // using input_iterator_value_type = typename InputIteratorType::value_type; + // static_assert + // (std::is_convertible<std::remove_cv_t<input_iterator_value_type>, + // ValueType>::value, + // "exclusive_scan: InputIteratorType::value_type not convertible to + // ValueType"); + + // we are unnecessarily duplicating code, but this is on purpose + // so that we can use the default_op for OpenMPTarget. + // Originally, I had this implemented as: + // ''' + // using bop_type = StdExclusiveScanDefaultJoinFunctor<ValueType>; + // call exclusive_scan_custom_op_impl(..., bop_type()); + // ''' + // which avoids duplicating the functors, but for OpenMPTarget + // I cannot use a custom binary op. + // This is the same problem that occurs for reductions. + + // aliases + using index_type = typename InputIteratorType::difference_type; + using func_type = std::conditional_t< + ::Kokkos::is_detected<ex_scan_has_reduction_identity_sum_t, + ValueType>::value, + ExclusiveScanDefaultFunctorForKnownNeutralElement< + ExecutionSpace, index_type, ValueType, InputIteratorType, + OutputIteratorType>, + ExclusiveScanDefaultFunctor<ExecutionSpace, index_type, ValueType, + InputIteratorType, OutputIteratorType>>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(init_value, first_from, first_dest)); + + ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + + return first_dest + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp similarity index 52% rename from packages/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp rename to packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp index bd29a0b18ae3951c30d54b1a897d42cff0805397..843771b6b13c481b2859e8981a5b21e11ce6b229 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp @@ -42,83 +42,65 @@ //@HEADER */ -#include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_THREADS) +#ifndef KOKKOS_STD_ALGORITHMS_FILL_AND_FILL_N_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FILL_AND_FILL_N_IMPL_HPP -#include <Kokkos_Core_fwd.hpp> - -/* Standard C++ libraries */ - -#include <cstdlib> +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> #include <string> -#include <iostream> -#include <stdexcept> -#include <thread> -#include <mutex> - -#include <Kokkos_Threads.hpp> - -//---------------------------------------------------------------------------- namespace Kokkos { +namespace Experimental { namespace Impl { -namespace { - -std::mutex host_internal_cppthread_mutex; - -// std::thread compatible driver. -// Recovery from an exception would require constant intra-thread health -// verification; which would negatively impact runtime. As such simply -// abort the process. - -void internal_cppthread_driver() { - try { - ThreadsExec::driver(); - } catch (const std::exception& x) { - std::cerr << "Exception thrown from worker thread: " << x.what() - << std::endl; - std::cerr.flush(); - std::abort(); - } catch (...) { - std::cerr << "Exception thrown from worker thread" << std::endl; - std::cerr.flush(); - std::abort(); - } -} -} // namespace - -//---------------------------------------------------------------------------- -// Spawn a thread - -void ThreadsExec::spawn() { - std::thread t(internal_cppthread_driver); - t.detach(); +template <class InputIterator, class T> +struct StdFillFunctor { + using index_type = typename InputIterator::difference_type; + InputIterator m_first; + T m_value; + + KOKKOS_FUNCTION + void operator()(index_type i) const { m_first[i] = m_value; } + + KOKKOS_FUNCTION + StdFillFunctor(InputIterator _first, T _value) + : m_first(std::move(_first)), m_value(std::move(_value)) {} +}; + +template <class ExecutionSpace, class IteratorType, class T> +void fill_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, const T& value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + StdFillFunctor<IteratorType, T>(first, value)); + ex.fence("Kokkos::fill: fence after operation"); } -//---------------------------------------------------------------------------- - -bool ThreadsExec::is_process() { - static const std::thread::id master_pid = std::this_thread::get_id(); - - return master_pid == std::this_thread::get_id(); -} +template <class ExecutionSpace, class IteratorType, class SizeType, class T> +IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, SizeType n, const T& value) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); -void ThreadsExec::global_lock() { host_internal_cppthread_mutex.lock(); } - -void ThreadsExec::global_unlock() { host_internal_cppthread_mutex.unlock(); } - -//---------------------------------------------------------------------------- - -void ThreadsExec::wait_yield(volatile int& flag, const int value) { - while (value == flag) { - std::this_thread::yield(); + if (n <= 0) { + return first; } + + fill_impl(label, ex, first, last, value); + return last; } } // namespace Impl +} // namespace Experimental } // namespace Kokkos -#else -void KOKKOS_CORE_SRC_THREADS_EXEC_BASE_PREVENT_LINK_ERROR() {} -#endif /* end #if defined( KOKKOS_ENABLE_THREADS ) */ +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp new file mode 100644 index 0000000000000000000000000000000000000000..35a6c4b4ac9b650655333e21579be707f72774f0 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp @@ -0,0 +1,191 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_END_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_END_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2, + class ReducerType, class PredicateType> +struct StdFindEndFunctor { + using red_value_type = typename ReducerType::value_type; + + IteratorType1 m_first; + IteratorType1 m_last; + IteratorType2 m_s_first; + IteratorType2 m_s_last; + ReducerType m_reducer; + PredicateType m_p; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + namespace KE = ::Kokkos::Experimental; + auto myit = m_first + i; + bool found = true; + + const auto search_count = KE::distance(m_s_first, m_s_last); + for (IndexType k = 0; k < search_count; ++k) { + // note that we add this EXPECT to check if we are in a valid range + // but I think we can remvoe this beceause the guarantee we don't go + // out of bounds is taken care of at the calling site + // where we launch the par-reduce. + KOKKOS_EXPECTS((myit + k) < m_last); + + if (!m_p(myit[k], m_s_first[k])) { + found = false; + break; + } + } + + const auto rv = + found ? red_value_type{i} + : red_value_type{::Kokkos::reduction_identity<IndexType>::max()}; + + m_reducer.join(red_value, rv); + } + + KOKKOS_FUNCTION + StdFindEndFunctor(IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + ReducerType reducer, PredicateType p) + : m_first(std::move(first)), + m_last(std::move(last)), + m_s_first(std::move(s_first)), + m_s_last(std::move(s_last)), + m_reducer(std::move(reducer)), + m_p(std::move(p)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + // the target sequence should not be larger than the range [first, last) + namespace KE = ::Kokkos::Experimental; + const auto num_elements = KE::distance(first, last); + const auto s_count = KE::distance(s_first, s_last); + KOKKOS_EXPECTS(num_elements >= s_count); + (void)s_count; // needed when macro above is a no-op + + if (s_first == s_last) { + return last; + } + + if (first == last) { + return last; + } + + // special case where the two ranges have equal size + if (num_elements == s_count) { + const auto equal_result = equal_impl(label, ex, first, last, s_first, pred); + return (equal_result) ? first : last; + } else { + using index_type = typename IteratorType1::difference_type; + using reducer_type = LastLoc<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindEndFunctor<index_type, IteratorType1, IteratorType2, + reducer_type, BinaryPredicateType>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + + // decide the size of the range policy of the par_red: + // note that the last feasible index to start looking is the index + // whose distance from the "last" is equal to the sequence count. + // the +1 is because we need to include that location too. + const auto range_size = num_elements - s_count + 1; + + // run par reduce + ::Kokkos::parallel_reduce( + label, RangePolicy<ExecutionSpace>(ex, 0, range_size), + func_t(first, last, s_first, s_last, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + if (red_result.max_loc_true == + ::Kokkos::reduction_identity<index_type>::max()) { + // if here, a subrange has not been found + return last; + } else { + // a location has been found + return first + red_result.max_loc_true; + } + } +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; + return find_end_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6907bbdbc37c77182c86382b37a8d346c8174988 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp @@ -0,0 +1,161 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2, + class ReducerType, class PredicateType> +struct StdFindFirstOfFunctor { + using red_value_type = typename ReducerType::value_type; + + IteratorType1 m_first; + IteratorType2 m_s_first; + IteratorType2 m_s_last; + ReducerType m_reducer; + PredicateType m_p; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + namespace KE = ::Kokkos::Experimental; + const auto& myvalue = m_first[i]; + bool found = false; + + const auto search_count = KE::distance(m_s_first, m_s_last); + for (IndexType k = 0; k < search_count; ++k) { + if (m_p(myvalue, m_s_first[k])) { + found = true; + break; + } + } + + const auto rv = + found ? red_value_type{i} + : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; + + m_reducer.join(red_value, rv); + } + + KOKKOS_FUNCTION + StdFindFirstOfFunctor(IteratorType1 first, IteratorType2 s_first, + IteratorType2 s_last, ReducerType reducer, + PredicateType p) + : m_first(std::move(first)), + m_s_first(std::move(s_first)), + m_s_last(std::move(s_last)), + m_reducer(std::move(reducer)), + m_p(std::move(p)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 find_first_of_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + if ((s_first == s_last) || (first == last)) { + return last; + } + + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindFirstOfFunctor<index_type, IteratorType1, IteratorType2, + reducer_type, BinaryPredicateType>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, s_first, s_last, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity<index_type>::min()) { + // if here, nothing found + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 find_first_of_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; + return find_first_of_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c79c4b5216bed0d8b386ec4c0630834537d1c0e8 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp @@ -0,0 +1,146 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_AND_FIND_IF_NOT_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_IF_AND_FIND_IF_NOT_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <bool is_find_if, class IndexType, class IteratorType, + class ReducerType, class PredicateType> +struct StdFindIfOrNotFunctor { + using red_value_type = typename ReducerType::value_type; + + IteratorType m_first; + ReducerType m_reducer; + PredicateType m_p; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + const auto& my_value = m_first[i]; + + // if doing find_if, look for when predicate is true + // if doing find_if_not, look for when predicate is false + const bool found_condition = is_find_if ? m_p(my_value) : !m_p(my_value); + + auto rv = + found_condition + ? red_value_type{i} + : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; + + m_reducer.join(red_value, rv); + } + + KOKKOS_FUNCTION + StdFindIfOrNotFunctor(IteratorType first, ReducerType reducer, + PredicateType p) + : m_first(std::move(first)), + m_reducer(std::move(reducer)), + m_p(std::move(p)) {} +}; + +template <bool is_find_if, class ExecutionSpace, class IteratorType, + class PredicateType> +IteratorType find_if_or_not_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible( + ex, first); // only need one It per type + Impl::expect_valid_range(first, last); + + if (first == last) { + return last; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindIfOrNotFunctor<is_find_if, index_type, IteratorType, + reducer_type, PredicateType>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity<index_type>::min()) { + // here, it means a valid loc has not been found, + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template <class ExecutionSpace, class InputIterator, class T> +InputIterator find_impl(const std::string& label, ExecutionSpace ex, + InputIterator first, InputIterator last, + const T& value) { + return find_if_or_not_impl<true>( + label, ex, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate<T>(value)); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8bd37b13bc9a19f0b8cbe457913b3a34d0320c6b --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FOR_EACH_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class UnaryFunctorType> +struct StdForEachFunctor { + using index_type = typename IteratorType::difference_type; + IteratorType m_first; + UnaryFunctorType m_functor; + + KOKKOS_FUNCTION + void operator()(index_type i) const { m_functor(m_first[i]); } + + KOKKOS_FUNCTION + StdForEachFunctor(IteratorType _first, UnaryFunctorType _functor) + : m_first(std::move(_first)), m_functor(std::move(_functor)) {} +}; + +template <class ExecutionSpace, class IteratorType, class UnaryFunctorType> +UnaryFunctorType for_each_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, UnaryFunctorType functor) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor)); + ex.fence("Kokkos::for_each: fence after operation"); + + return functor; +} + +template <class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType> +IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, SizeType n, + UnaryFunctorType functor) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(ex, first, last); + Impl::expect_valid_range(first, last); + + if (n == 0) { + return first; + } + + for_each_impl(label, ex, first, last, std::move(functor)); + // no neeed to fence since for_each_impl fences already + + return last; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f01d9bfb55ebf3f2dd9fdd795e34b57ff8057a24 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp @@ -0,0 +1,105 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_GENERATE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_GENERATE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class Generator> +struct StdGenerateFunctor { + using index_type = typename IteratorType::difference_type; + IteratorType m_first; + Generator m_generator; + + KOKKOS_FUNCTION + void operator()(index_type i) const { m_first[i] = m_generator(); } + + KOKKOS_FUNCTION + StdGenerateFunctor(IteratorType _first, Generator _g) + : m_first(std::move(_first)), m_generator(std::move(_g)) {} +}; + +template <class ExecutionSpace, class IteratorType, class Generator> +void generate_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, Generator g) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + // aliases + using func_t = StdGenerateFunctor<IteratorType, Generator>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, g)); + ex.fence("Kokkos::generate: fence after operation"); +} + +template <class ExecutionSpace, class IteratorType, class Size, class Generator> +IteratorType generate_n_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, Size count, Generator g) { + if (count <= 0) { + return first; + } + + generate_impl(label, ex, first, first + count, g); + return first + count; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_HelperPredicates.hpp similarity index 97% rename from packages/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp rename to packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_HelperPredicates.hpp index 18d5dadd539e1d19897c2268954637542b17eea5..244bce48e436b3c8af3a7d70823baa436d9f9958 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_HelperPredicates.hpp @@ -42,8 +42,8 @@ //@HEADER */ -#ifndef KOKKOS_STD_HELPER_PREDICATES_HPP -#define KOKKOS_STD_HELPER_PREDICATES_HPP +#ifndef KOKKOS_STD_ALGORITHMS_HELPER_PREDICATES_HPP +#define KOKKOS_STD_ALGORITHMS_HELPER_PREDICATES_HPP #include <Kokkos_Macros.hpp> diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_IdentityReferenceUnaryFunctor.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp similarity index 93% rename from packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_IdentityReferenceUnaryFunctor.hpp rename to packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp index d43a161fcd469d430a82e067634a81cb147dd05f..f41e567c9b09d43550fd1dd4d1f12da9b6589597 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_IdentityReferenceUnaryFunctor.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp @@ -42,8 +42,8 @@ //@HEADER */ -#ifndef KOKKOS_STD_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP -#define KOKKOS_STD_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP +#ifndef KOKKOS_STD_ALGORITHMS_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP +#define KOKKOS_STD_ALGORITHMS_NUMERIC_IDENTITY_REFERENCE_UNARY_FUNCTOR_HPP #include <Kokkos_Macros.hpp> diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2088ebd439325bff2273c0a5b5ee635590f40532 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -0,0 +1,243 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_TransformInclusiveScan.hpp> +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <typename ValueType> +using in_scan_has_reduction_identity_sum_t = + decltype(Kokkos::reduction_identity<ValueType>::sum()); + +template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, + class FirstDest> +struct InclusiveScanDefaultFunctorForKnownIdentityElement { + using execution_space = ExeSpace; + + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + InclusiveScanDefaultFunctorForKnownIdentityElement(FirstFrom first_from, + FirstDest first_dest) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + update += m_first_from[i]; + + if (final_pass) { + m_first_dest[i] = update; + } + } +}; + +template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, + class FirstDest> +struct InclusiveScanDefaultFunctor { + using execution_space = ExeSpace; + using value_type = ValueWrapperForNoNeutralElement<ValueType>; + + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + InclusiveScanDefaultFunctor(FirstFrom first_from, FirstDest first_dest) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + const auto tmp = value_type{m_first_from[i], false}; + this->join(update, tmp); + + if (final_pass) { + m_first_dest[i] = update.val; + } + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (update.is_initial) { + update.val = input.val; + } else { + update.val = update.val + input.val; + } + update.is_initial = false; + } +}; + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType> +OutputIteratorType inclusive_scan_default_op_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using value_type = + std::remove_const_t<typename InputIteratorType::value_type>; + using func_type = std::conditional_t< + ::Kokkos::is_detected<in_scan_has_reduction_identity_sum_t, + value_type>::value, + InclusiveScanDefaultFunctorForKnownIdentityElement< + ExecutionSpace, index_type, value_type, InputIteratorType, + OutputIteratorType>, + InclusiveScanDefaultFunctor<ExecutionSpace, index_type, value_type, + InputIteratorType, OutputIteratorType>>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(first_from, first_dest)); + ex.fence("Kokkos::inclusive_scan_default_op: fence after operation"); + + // return + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl +// ------------------------------------------------------------- +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOpType> +OutputIteratorType inclusive_scan_custom_binary_op_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOpType binary_op) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using value_type = + std::remove_const_t<typename InputIteratorType::value_type>; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>; + using func_type = TransformInclusiveScanNoInitValueFunctor< + ExecutionSpace, index_type, value_type, InputIteratorType, + OutputIteratorType, BinaryOpType, unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(first_from, first_dest, binary_op, unary_op_type())); + ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); + + // return + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl with init_value +// ------------------------------------------------------------- +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOpType, class ValueType> +OutputIteratorType inclusive_scan_custom_binary_op_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOpType binary_op, + ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>; + using func_type = TransformInclusiveScanWithInitValueFunctor< + ExecutionSpace, index_type, ValueType, InputIteratorType, + OutputIteratorType, BinaryOpType, unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(first_from, first_dest, binary_op, + unary_op_type(), init_value)); + ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); + + // return + return first_dest + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0f00bebb6ddd148b23bf266c7a12dddbc9abda19 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp @@ -0,0 +1,148 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class ReducerType, class PredicateType> +struct StdIsPartitionedFunctor { + using red_value_type = typename ReducerType::value_type; + using index_type = typename IteratorType::difference_type; + + IteratorType m_first; + ReducerType m_reducer; + PredicateType m_p; + + KOKKOS_FUNCTION + void operator()(const index_type i, red_value_type& redValue) const { + const auto predicate_value = m_p(m_first[i]); + constexpr index_type m_red_id_min = + ::Kokkos::reduction_identity<index_type>::min(); + constexpr index_type m_red_id_max = + ::Kokkos::reduction_identity<index_type>::max(); + auto rv = predicate_value ? red_value_type{i, m_red_id_min} + : red_value_type{m_red_id_max, i}; + + m_reducer.join(redValue, rv); + } + + KOKKOS_FUNCTION + StdIsPartitionedFunctor(IteratorType first, ReducerType reducer, + PredicateType p) + : m_first(std::move(first)), + m_reducer(std::move(reducer)), + m_p(std::move(p)) {} +}; + +template <class ExecutionSpace, class IteratorType, class PredicateType> +bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { + // true if all elements in the range [first, last) that satisfy + // the predicate "pred" appear before all elements that don't. + // Also returns true if [first, last) is empty. + // also true if all elements satisfy the predicate. + + // we implement it by finding: + // - the max location where predicate is true (max_loc_true) + // - the min location where predicate is false (min_loc_false) + // so the range is partitioned if max_loc_true < (min_loc_false) + + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + // trivial case + if (first == last) { + return true; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = StdIsPartitioned<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = + StdIsPartitionedFunctor<IteratorType, reducer_type, PredicateType>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + constexpr index_type red_id_min = + ::Kokkos::reduction_identity<index_type>::min(); + constexpr index_type red_id_max = + ::Kokkos::reduction_identity<index_type>::max(); + + if (red_result.max_loc_true != red_id_max && + red_result.min_loc_false != red_id_min) { + return red_result.max_loc_true < red_result.min_loc_false; + } else if (first + red_result.max_loc_true == --last) { + return true; + } else { + return false; + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4e36ae3890f578533465eca0bc224cea439d4f52 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp @@ -0,0 +1,117 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_IS_SORTED_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class ComparatorType> +struct StdIsSortedFunctor { + using index_type = typename IteratorType::difference_type; + IteratorType m_first; + ComparatorType m_comparator; + + KOKKOS_FUNCTION + void operator()(const index_type i, std::size_t& update) const { + const auto& val_i = m_first[i]; + const auto& val_ip1 = m_first[i + 1]; + + if (m_comparator(val_ip1, val_i)) { + ++update; + } + } + + KOKKOS_FUNCTION + StdIsSortedFunctor(IteratorType _first1, ComparatorType comparator) + : m_first(std::move(_first1)), m_comparator(std::move(comparator)) {} +}; + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + if (num_elements <= 1) { + return true; + } + + // use num_elements-1 because each index handles i and i+1 + const auto num_elements_minus_one = num_elements - 1; + using functor_type = StdIsSortedFunctor<IteratorType, ComparatorType>; + + // result is incremented by one if sorting breaks at index i + std::size_t result = 0; + ::Kokkos::parallel_reduce( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_minus_one), + functor_type(first, std::move(comp)), result); + + return result == 0; +} + +template <class ExecutionSpace, class IteratorType> +bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate<value_type>; + return is_sorted_impl(label, ex, first, last, pred_t()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4e99c301b26e2d8b238300945412de7038975d4b --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp @@ -0,0 +1,153 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <std_algorithms/Kokkos_Find.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class IndicatorViewType, class ComparatorType> +struct StdIsSortedUntilFunctor { + using index_type = typename IteratorType::difference_type; + IteratorType m_first; + IndicatorViewType m_indicator; + ComparatorType m_comparator; + + KOKKOS_FUNCTION + void operator()(const index_type i, int& update, const bool final) const { + const auto& val_i = m_first[i]; + const auto& val_ip1 = m_first[i + 1]; + + if (m_comparator(val_ip1, val_i)) { + ++update; + } + + if (final) { + m_indicator(i) = update; + } + } + + KOKKOS_FUNCTION + StdIsSortedUntilFunctor(IteratorType _first1, IndicatorViewType indicator, + ComparatorType comparator) + : m_first(std::move(_first1)), + m_indicator(std::move(indicator)), + m_comparator(std::move(comparator)) {} +}; + +template <class ExecutionSpace, class IteratorType, class ComparatorType> +IteratorType is_sorted_until_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + // trivial case + if (num_elements <= 1) { + return last; + } + + /* + use scan and a helper "indicator" view + such that we scan the data and fill the indicator with + partial sum that is always 0 unless we find a pair that + breaks the sorting, so in that case the indicator will + have a 1 starting at the location where the sorting breaks. + So finding that 1 means finding the location we want. + */ + + // aliases + using indicator_value_type = std::size_t; + using indicator_view_type = + ::Kokkos::View<indicator_value_type*, ExecutionSpace>; + using functor_type = + StdIsSortedUntilFunctor<IteratorType, indicator_view_type, + ComparatorType>; + + // do scan + // use num_elements-1 because each index handles i and i+1 + const auto num_elements_minus_one = num_elements - 1; + indicator_view_type indicator("is_sorted_until_indicator_helper", + num_elements_minus_one); + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_minus_one), + functor_type(first, indicator, std::move(comp))); + + // try to find the first sentinel value, which indicates + // where the sorting condition breaks + namespace KE = ::Kokkos::Experimental; + constexpr indicator_value_type sentinel_value = 1; + auto r = + KE::find(ex, KE::cbegin(indicator), KE::cend(indicator), sentinel_value); + const auto shift = r - ::Kokkos::Experimental::cbegin(indicator); + + return first + (shift + 1); +} + +template <class ExecutionSpace, class IteratorType> +IteratorType is_sorted_until_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate<value_type>; + return is_sorted_until_impl(label, ex, first, last, pred_t()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c3dd13e6b5a15c54fef30c976135437fe400d0e0 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp @@ -0,0 +1,184 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2, + class ComparatorType> +struct StdCompareFunctor { + IteratorType1 m_it1; + IteratorType2 m_it2; + ComparatorType m_predicate; + + KOKKOS_FUNCTION + void operator()(IndexType /* i is unused */, int& lsum) const { + if (m_predicate(*m_it1, *m_it2)) { + lsum = 1; + } + } + + KOKKOS_FUNCTION + StdCompareFunctor(IteratorType1 _it1, IteratorType2 _it2, + ComparatorType _predicate) + : m_it1(std::move(_it1)), + m_it2(std::move(_it2)), + m_predicate(std::move(_predicate)) {} +}; + +template <class IndexType, class IteratorType1, class IteratorType2, + class ReducerType, class ComparatorType> +struct StdLexicographicalCompareFunctor { + using red_value_type = typename ReducerType::value_type; + IteratorType1 m_first1; + IteratorType2 m_first2; + ReducerType m_reducer; + ComparatorType m_comparator; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + const auto& my_value1 = m_first1[i]; + const auto& my_value2 = m_first2[i]; + + bool different = m_comparator(my_value1, my_value2) || + m_comparator(my_value2, my_value1); + auto rv = + different + ? red_value_type{i} + : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; + + m_reducer.join(red_value, rv); + } + + KOKKOS_FUNCTION + StdLexicographicalCompareFunctor(IteratorType1 _first1, IteratorType2 _first2, + ReducerType _reducer, ComparatorType _comp) + : m_first1(std::move(_first1)), + m_first2(std::move(_first2)), + m_reducer(std::move(_reducer)), + m_comparator(std::move(_comp)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType> +bool lexicographical_compare_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + // aliases + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc<index_type>; + using reduction_value_type = typename reducer_type::value_type; + + // run + const auto d1 = Kokkos::Experimental::distance(first1, last1); + const auto d2 = Kokkos::Experimental::distance(first2, last2); + const auto range = Kokkos::min(d1, d2); + reduction_value_type red_result; + reducer_type reducer(red_result); + using func1_t = + StdLexicographicalCompareFunctor<index_type, IteratorType1, IteratorType2, + reducer_type, ComparatorType>; + + ::Kokkos::parallel_reduce(label, RangePolicy<ExecutionSpace>(ex, 0, range), + func1_t(first1, first2, reducer, comp), reducer); + + // fence not needed because reducing into scalar + // no mismatch + if (red_result.min_loc_true == + ::Kokkos::reduction_identity<index_type>::min()) { + auto new_last1 = first1 + range; + auto new_last2 = first2 + range; + bool is_prefix = (new_last1 == last1) && (new_last2 != last2); + return is_prefix; + } + + // check mismatched + int less = 0; + auto it1 = first1 + red_result.min_loc_true; + auto it2 = first2 + red_result.min_loc_true; + using func2_t = StdCompareFunctor<index_type, IteratorType1, IteratorType2, + ComparatorType>; + ::Kokkos::parallel_reduce(label, RangePolicy<ExecutionSpace>(ex, 0, 1), + func2_t(it1, it2, comp), less); + + // fence not needed because reducing into scalar + return static_cast<bool>(less); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +bool lexicographical_compare_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + using value_type_1 = typename IteratorType1::value_type; + using value_type_2 = typename IteratorType2::value_type; + using predicate_t = + Impl::StdAlgoLessThanBinaryPredicate<value_type_1, value_type_2>; + return lexicographical_compare_impl(label, ex, first1, last1, first2, last2, + predicate_t()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0a9d41b9bb62e3f4c22eb6ece4745b1033bc00d7 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp @@ -0,0 +1,167 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MIN_MAX_MINMAX_ELEMENT_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_MIN_MAX_MINMAX_ELEMENT_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class ReducerType> +struct StdMinOrMaxElemFunctor { + using index_type = typename IteratorType::difference_type; + using red_value_type = typename ReducerType::value_type; + + IteratorType m_first; + ReducerType m_reducer; + + KOKKOS_FUNCTION + void operator()(const index_type i, red_value_type& red_value) const { + m_reducer.join(red_value, red_value_type{m_first[i], i}); + } + + KOKKOS_FUNCTION + StdMinOrMaxElemFunctor(IteratorType first, ReducerType reducer) + : m_first(std::move(first)), m_reducer(std::move(reducer)) {} +}; + +template <class IteratorType, class ReducerType> +struct StdMinMaxElemFunctor { + using index_type = typename IteratorType::difference_type; + using red_value_type = typename ReducerType::value_type; + IteratorType m_first; + ReducerType m_reducer; + + KOKKOS_FUNCTION + void operator()(const index_type i, red_value_type& red_value) const { + const auto& my_value = m_first[i]; + m_reducer.join(red_value, red_value_type{my_value, my_value, i, i}); + } + + KOKKOS_FUNCTION + StdMinMaxElemFunctor(IteratorType first, ReducerType reducer) + : m_first(std::move(first)), m_reducer(std::move(reducer)) {} +}; + +template <template <class... Args> class ReducerType, class ExecutionSpace, + class IteratorType, class... Args> +IteratorType min_or_max_element_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Args&&... args) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return last; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using value_type = typename IteratorType::value_type; + using reducer_type = ReducerType<value_type, index_type, Args...>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdMinOrMaxElemFunctor<IteratorType, reducer_type>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result, std::forward<Args>(args)...); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, reducer), reducer); + + // fence not needed because reducing into scalar + + // return + return first + red_result.loc; +} + +template <template <class... Args> class ReducerType, class ExecutionSpace, + class IteratorType, class... Args> +::Kokkos::pair<IteratorType, IteratorType> minmax_element_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType first, + IteratorType last, Args&&... args) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return {first, first}; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using value_type = typename IteratorType::value_type; + using reducer_type = ReducerType<value_type, index_type, Args...>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdMinMaxElemFunctor<IteratorType, reducer_type>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result, std::forward<Args>(args)...); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, reducer), reducer); + + // fence not needed because reducing into scalar + + // return + return {first + red_result.min_loc, first + red_result.max_loc}; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp new file mode 100644 index 0000000000000000000000000000000000000000..180afe925c1158760d37cfb7db4ace25a8858ef5 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp @@ -0,0 +1,162 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MISMATCH_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_MISMATCH_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2, + class ReducerType, class BinaryPredicateType> +struct StdMismatchRedFunctor { + using red_value_type = typename ReducerType::value_type; + + IteratorType1 m_first1; + IteratorType2 m_first2; + ReducerType m_reducer; + BinaryPredicateType m_predicate; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + const auto& my_value1 = m_first1[i]; + const auto& my_value2 = m_first2[i]; + + auto rv = + !m_predicate(my_value1, my_value2) + ? red_value_type{i} + : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; + + m_reducer.join(red_value, rv); + } + + KOKKOS_FUNCTION + StdMismatchRedFunctor(IteratorType1 first1, IteratorType2 first2, + ReducerType reducer, BinaryPredicateType predicate) + : m_first1(std::move(first1)), + m_first2(std::move(first2)), + m_reducer(std::move(reducer)), + m_predicate(std::move(predicate)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +::Kokkos::pair<IteratorType1, IteratorType2> mismatch_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + // aliases + using return_type = ::Kokkos::pair<IteratorType1, IteratorType2>; + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using functor_type = + StdMismatchRedFunctor<index_type, IteratorType1, IteratorType2, + reducer_type, BinaryPredicateType>; + + // trivial case: note that this is important, + // for OpenMPTarget, omitting special handling of + // the trivial case was giving all sorts of strange stuff. + const auto num_e1 = last1 - first1; + const auto num_e2 = last2 - first2; + if (num_e1 == 0 || num_e2 == 0) { + return return_type(first1, first2); + } + + // run + const auto num_elemen_par_reduce = (num_e1 <= num_e2) ? num_e1 : num_e2; + reduction_value_type red_result; + reducer_type reducer(red_result); + ::Kokkos::parallel_reduce( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elemen_par_reduce), + functor_type(first1, first2, reducer, std::move(predicate)), reducer); + + // fence not needed because reducing into scalar + + // decide and return + constexpr auto red_min = ::Kokkos::reduction_identity<index_type>::min(); + if (red_result.min_loc_true == red_min) { + // in here means mismatch has not been found + if (num_e1 == num_e2) { + return return_type(last1, last2); + } else if (num_e1 < num_e2) { + return return_type(last1, first2 + num_e1); + } else { + return return_type(first1 + num_e2, last2); + } + } else { + // in here means mismatch has been found + return return_type(first1 + red_result.min_loc_true, + first2 + red_result.min_loc_true); + } +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +::Kokkos::pair<IteratorType1, IteratorType2> mismatch_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; + return mismatch_impl(label, ex, first1, last1, first2, last2, pred_t()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Move.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Move.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6b1ed1da4c17804c811e746c8d4889f7b1ed3c5b --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Move.hpp @@ -0,0 +1,100 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MOVE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_MOVE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class InputIterator, class OutputIterator> +struct StdMoveFunctor { + InputIterator m_first; + OutputIterator m_dest_first; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { + m_dest_first[i] = std::move(m_first[i]); + } + + StdMoveFunctor(InputIterator _first, OutputIterator _dest_first) + : m_first(std::move(_first)), m_dest_first(std::move(_dest_first)) {} +}; + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator move_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // aliases + using index_type = typename InputIterator::difference_type; + using func_t = StdMoveFunctor<index_type, InputIterator, OutputIterator>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, d_first)); + ex.fence("Kokkos::move: fence after operation"); + + // return + return d_first + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MoveBackward.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MoveBackward.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c34ab679dbc927fcf42dfed420cc00add06dce58 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MoveBackward.hpp @@ -0,0 +1,104 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_MOVE_BACKWARD_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2> +struct StdMoveBackwardFunctor { + static_assert(std::is_signed<IndexType>::value, + "Kokkos: StdMoveBackwardFunctor requires signed index type"); + + IteratorType1 m_last; + IteratorType2 m_dest_last; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { + m_dest_last[-i - 1] = std::move(m_last[-i - 1]); + } + + StdMoveBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) + : m_last(std::move(_last)), m_dest_last(std::move(_dest_last)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 move_backward_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_last); + Impl::static_assert_iterators_have_matching_difference_type(first, d_last); + Impl::expect_valid_range(first, last); + + // aliases + using index_type = typename IteratorType1::difference_type; + using func_t = + StdMoveBackwardFunctor<index_type, IteratorType1, IteratorType2>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(last, d_last)); + ex.fence("Kokkos::move_backward: fence after operation"); + + // return + return d_last - num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..508e4baedf60f1fb46c69bdb3ee05d725ea716ab --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp @@ -0,0 +1,180 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_PARTITION_COPY_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_PARTITION_COPY_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ValueType> +struct StdPartitionCopyScalar { + ValueType true_count_; + ValueType false_count_; + + // Here we implement the copy assignment operators explicitly for consistency + // with how the Scalar structs are implemented inside + // Kokkos_Parallel_Reduce.hpp. + KOKKOS_FUNCTION + void operator=(const StdPartitionCopyScalar& other) { + true_count_ = other.true_count_; + false_count_ = other.false_count_; + } + + // this is needed for + // OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp:699:21: error: no viable + // overloaded '=' m_returnvalue = 0; + // + KOKKOS_FUNCTION + void operator=(const ValueType value) { + true_count_ = value; + false_count_ = value; + } +}; + +template <class IndexType, class FirstFrom, class FirstDestTrue, + class FirstDestFalse, class PredType> +struct StdPartitionCopyFunctor { + using value_type = StdPartitionCopyScalar<IndexType>; + + FirstFrom m_first_from; + FirstDestTrue m_first_dest_true; + FirstDestFalse m_first_dest_false; + PredType m_pred; + + KOKKOS_FUNCTION + StdPartitionCopyFunctor(FirstFrom first_from, FirstDestTrue first_dest_true, + FirstDestFalse first_dest_false, PredType pred) + : m_first_from(std::move(first_from)), + m_first_dest_true(std::move(first_dest_true)), + m_first_dest_false(std::move(first_dest_false)), + m_pred(std::move(pred)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + const auto& myval = m_first_from[i]; + if (final_pass) { + if (m_pred(myval)) { + m_first_dest_true[update.true_count_] = myval; + } else { + m_first_dest_false[update.false_count_] = myval; + } + } + + if (m_pred(myval)) { + update.true_count_ += 1; + } else { + update.false_count_ += 1; + } + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.true_count_ = 0; + update.false_count_ = 0; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + update.true_count_ += input.true_count_; + update.false_count_ += input.false_count_; + } +}; + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorTrueType, class OutputIteratorFalseType, + class PredicateType> +::Kokkos::pair<OutputIteratorTrueType, OutputIteratorFalseType> +partition_copy_impl(const std::string& label, const ExecutionSpace& ex, + InputIteratorType from_first, InputIteratorType from_last, + OutputIteratorTrueType to_first_true, + OutputIteratorFalseType to_first_false, + PredicateType pred) { + // impl uses a scan, this is similar how we implemented copy_if + + // checks + Impl::static_assert_random_access_and_accessible( + ex, from_first, to_first_true, to_first_false); + Impl::static_assert_iterators_have_matching_difference_type( + from_first, to_first_true, to_first_false); + Impl::expect_valid_range(from_first, from_last); + + if (from_first == from_last) { + return {to_first_true, to_first_false}; + } + + // aliases + using index_type = typename InputIteratorType::difference_type; + using func_type = + StdPartitionCopyFunctor<index_type, InputIteratorType, + OutputIteratorTrueType, OutputIteratorFalseType, + PredicateType>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(from_first, from_last); + typename func_type::value_type counts{0, 0}; + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(from_first, to_first_true, to_first_false, pred), counts); + + // fence not needed here because of the scan into counts + + return {to_first_true + counts.true_count_, + to_first_false + counts.false_count_}; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionPoint.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionPoint.hpp new file mode 100644 index 0000000000000000000000000000000000000000..671e8d70f01d83f65d6c3173fe3753527b09c278 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_PartitionPoint.hpp @@ -0,0 +1,132 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_PARTITION_POINT_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_PARTITION_POINT_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class ReducerType, class PredicateType> +struct StdPartitionPointFunctor { + using red_value_type = typename ReducerType::value_type; + using index_type = typename IteratorType::difference_type; + + IteratorType m_first; + ReducerType m_reducer; + PredicateType m_p; + + KOKKOS_FUNCTION + void operator()(const index_type i, red_value_type& redValue) const { + const auto predicate_value = m_p(m_first[i]); + auto rv = + predicate_value + ? red_value_type{::Kokkos::reduction_identity<index_type>::min()} + : red_value_type{i}; + m_reducer.join(redValue, rv); + } + + KOKKOS_FUNCTION + StdPartitionPointFunctor(IteratorType first, ReducerType reducer, + PredicateType p) + : m_first(std::move(first)), + m_reducer(std::move(reducer)), + m_p(std::move(p)) {} +}; + +template <class ExecutionSpace, class IteratorType, class PredicateType> +IteratorType partition_point_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType pred) { + // locates the end of the first partition, that is, the first + // element that does not satisfy p or last if all elements satisfy p. + // Implementation below finds the first location where p is false. + + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return first; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = StdPartitionPoint<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = + StdPartitionPointFunctor<IteratorType, reducer_type, PredicateType>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + if (red_result.min_loc_false == + ::Kokkos::reduction_identity<index_type>::min()) { + // if all elements are true, return last + return last; + } else { + return first + red_result.min_loc_false; + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RandomAccessIterator.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp similarity index 88% rename from packages/kokkos/algorithms/src/std_algorithms/Kokkos_RandomAccessIterator.hpp rename to packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp index 01c0d76720118198e6ee9a14bdd51ea5bdfcd229..2457d9400a24bc962bae1726d4dc238ab7d4a637 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_RandomAccessIterator.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp @@ -42,8 +42,8 @@ //@HEADER */ -#ifndef KOKKOS_RANDOM_ACCESS_ITERATOR_HPP -#define KOKKOS_RANDOM_ACCESS_ITERATOR_HPP +#ifndef KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP +#define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP #include <iterator> #include <Kokkos_Macros.hpp> @@ -58,18 +58,16 @@ template <class T> class RandomAccessIterator; template <class DataType, class... Args> -class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > - : public std::iterator< - std::random_access_iterator_tag, - typename ::Kokkos::View<DataType, Args...>::value_type, ptrdiff_t, - typename ::Kokkos::View<DataType, Args...>::pointer_type, - typename ::Kokkos::View<DataType, Args...>::reference_type> { +class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > { public: - using view_type = ::Kokkos::View<DataType, Args...>; - using iterator_type = RandomAccessIterator<view_type>; - using difference_type = ptrdiff_t; - using value_type = typename view_type::value_type; - using reference = typename view_type::reference_type; + using view_type = ::Kokkos::View<DataType, Args...>; + using iterator_type = RandomAccessIterator<view_type>; + + using iterator_category = std::random_access_iterator_tag; + using value_type = typename view_type::value_type; + using difference_type = ptrdiff_t; + using pointer = typename view_type::pointer_type; + using reference = typename view_type::reference_type; static_assert(view_type::rank == 1 && (std::is_same<typename view_type::traits::array_layout, diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reduce.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reduce.hpp new file mode 100644 index 0000000000000000000000000000000000000000..26e0795d8b1c1214b7bcf1a3dfe30d26aaf5aa0b --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reduce.hpp @@ -0,0 +1,186 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REDUCE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_REDUCE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IteratorType, class ValueType> +struct StdReduceDefaultFunctor { + using index_type = typename IteratorType::difference_type; + + const IteratorType m_first; + + KOKKOS_FUNCTION + void operator()(const index_type i, ValueType& update) const { + update += m_first[i]; + } +}; + +template <class ValueType> +struct StdReduceDefaultJoinFunctor { + KOKKOS_FUNCTION + constexpr ValueType operator()(const ValueType& a, const ValueType& b) const { + return a + b; + } +}; + +template <class IteratorType, class ReducerType> +struct StdReduceFunctor { + using red_value_type = typename ReducerType::value_type; + using index_type = typename IteratorType::difference_type; + + const IteratorType m_first; + const ReducerType m_reducer; + + KOKKOS_FUNCTION + void operator()(const index_type i, red_value_type& red_value) const { + auto tmp_wrapped_value = red_value_type{m_first[i], false}; + + if (red_value.is_initial) { + red_value = tmp_wrapped_value; + } else { + m_reducer.join(red_value, tmp_wrapped_value); + } + } + + KOKKOS_FUNCTION + StdReduceFunctor(IteratorType first, ReducerType reducer) + : m_first(std::move(first)), m_reducer(std::move(reducer)) {} +}; + +//------------------------------ +// reduce_custom_functors_impl +//------------------------------ +template <class ExecutionSpace, class IteratorType, class ValueType, + class JoinerType> +ValueType reduce_custom_functors_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ValueType init_reduction_value, + JoinerType joiner) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::static_assert_is_not_openmptarget(ex); + Impl::expect_valid_range(first, last); + + if (first == last) { + // init is returned, unmodified + return init_reduction_value; + } + + // aliases + using reducer_type = + ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>; + using functor_type = StdReduceFunctor<IteratorType, reducer_type>; + using reduction_value_type = typename reducer_type::value_type; + + // run + reduction_value_type result; + reducer_type reducer(result, joiner); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + functor_type(first, reducer), reducer); + + // fence not needed since reducing into scalar + return joiner(result.val, init_reduction_value); +} + +template <typename ValueType> +using has_reduction_identity_sum_t = + decltype(Kokkos::reduction_identity<ValueType>::sum()); + +template <class ExecutionSpace, class IteratorType, class ValueType> +ValueType reduce_default_functors_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ValueType init_reduction_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::static_assert_is_not_openmptarget(ex); + Impl::expect_valid_range(first, last); + + using value_type = Kokkos::Impl::remove_cvref_t<ValueType>; + + if (::Kokkos::is_detected<has_reduction_identity_sum_t, value_type>::value) { + if (first == last) { + // init is returned, unmodified + return init_reduction_value; + } + + using functor_type = + Impl::StdReduceDefaultFunctor<IteratorType, value_type>; + + // run + value_type tmp; + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + functor_type{first}, tmp); + // fence not needed since reducing into scalar + tmp += init_reduction_value; + return tmp; + } else { + using joiner_type = Impl::StdReduceDefaultJoinFunctor<value_type>; + return reduce_custom_functors_impl( + label, ex, first, last, std::move(init_reduction_value), joiner_type()); + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp similarity index 91% rename from packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp rename to packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp index dd529a25c6508ce9d07a90c77a6ec6fd5beef7d0..d8e383b859218bd33b67efdd9f11c374372b8fb5 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp @@ -42,8 +42,8 @@ //@HEADER */ -#ifndef KOKKOS_STD_ReducerWithArbitraryJoinerNoNeutralElement_hpp_ -#define KOKKOS_STD_ReducerWithArbitraryJoinerNoNeutralElement_hpp_ +#ifndef KOKKOS_STD_ALGORITHMS_REDUCER_WITH_ARBITRARY_JOINER_NONEUTRAL_ELEMENT_HPP +#define KOKKOS_STD_ALGORITHMS_REDUCER_WITH_ARBITRARY_JOINER_NONEUTRAL_ELEMENT_HPP #include <Kokkos_Core.hpp> #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" @@ -58,7 +58,7 @@ namespace Impl { template <class Scalar, class JoinerType, class Space = HostSpace> struct ReducerWithArbitraryJoinerNoNeutralElement { - using scalar_type = typename std::remove_cv<Scalar>::type; + using scalar_type = std::remove_cv_t<Scalar>; public: // Required @@ -90,11 +90,6 @@ struct ReducerWithArbitraryJoinerNoNeutralElement { dest.val = m_joiner(dest.val, src.val); } - KOKKOS_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest.val = m_joiner(dest.val, src.val); - } - KOKKOS_FUNCTION void init(value_type& val) const { // I cannot call reduction_identity, so need to default this diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RemoveAllVariants.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RemoveAllVariants.hpp new file mode 100644 index 0000000000000000000000000000000000000000..742d4d776a8c755a76393ae38a375b8dd0d61ed6 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RemoveAllVariants.hpp @@ -0,0 +1,212 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REMOVE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_REMOVE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <std_algorithms/Kokkos_CountIf.hpp> +#include <std_algorithms/Kokkos_CopyIf.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class FirstFrom, class FirstDest, class PredType> +struct StdRemoveIfStage1Functor { + FirstFrom m_first_from; + FirstDest m_first_dest; + PredType m_must_remove; + + KOKKOS_FUNCTION + StdRemoveIfStage1Functor(FirstFrom first_from, FirstDest first_dest, + PredType pred) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_must_remove(std::move(pred)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, IndexType& update, + const bool final_pass) const { + auto& myval = m_first_from[i]; + if (final_pass) { + if (!m_must_remove(myval)) { + // calling move here is ok because we are inside final pass + // we are calling move assign as specified by the std + m_first_dest[update] = std::move(myval); + } + } + + if (!m_must_remove(myval)) { + update += 1; + } + } +}; + +template <class IndexType, class InputIteratorType, class OutputIteratorType> +struct StdRemoveIfStage2Functor { + InputIteratorType m_first_from; + OutputIteratorType m_first_to; + + KOKKOS_FUNCTION + StdRemoveIfStage2Functor(InputIteratorType first_from, + OutputIteratorType first_to) + : m_first_from(std::move(first_from)), m_first_to(std::move(first_to)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i) const { + m_first_to[i] = std::move(m_first_from[i]); + } +}; + +template <class ExecutionSpace, class IteratorType, class UnaryPredicateType> +IteratorType remove_if_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + UnaryPredicateType pred) { + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return last; + } else { + // create tmp buffer to use to *move* all elements that we need to keep. + // note that the tmp buffer is just large enought to store + // all elements to keep, because ideally we do not need/want one + // as large as the original range. + // To allocate the right tmp view, we need a call to count_if. + // We could just do a "safe" allocation of a buffer as + // large as (last-first), but I think a call to count_if is more afforable. + + // count how many elements we need to keep + // note that the elements to remove are those that meet the predicate + const auto remove_count = + ::Kokkos::Experimental::count_if(ex, first, last, pred); + const auto keep_count = + Kokkos::Experimental::distance(first, last) - remove_count; + + // create helper tmp view + using value_type = typename IteratorType::value_type; + using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; + tmp_view_type tmp_view("std_remove_if_tmp_view", keep_count); + using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); + + // in stage 1, *move* all elements to keep from original range to tmp + // we use similar impl as copy_if except that we *move* rather than copy + using index_type = typename IteratorType::difference_type; + using func1_type = StdRemoveIfStage1Functor<index_type, IteratorType, + tmp_readwrite_iterator_type, + UnaryPredicateType>; + + const auto scan_num_elements = Kokkos::Experimental::distance(first, last); + index_type scan_count = 0; + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, scan_num_elements), + func1_type(first, begin(tmp_view), pred), scan_count); + + // scan_count should be equal to keep_count + assert(scan_count == keep_count); + (void)scan_count; // to avoid unused complaints + + // stage 2, we do parfor to move from tmp to original range + using func2_type = + StdRemoveIfStage2Functor<index_type, tmp_readwrite_iterator_type, + IteratorType>; + ::Kokkos::parallel_for( + "remove_if_stage2_parfor", + RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), + func2_type(begin(tmp_view), first)); + ex.fence("Kokkos::remove_if: fence after stage2"); + + // return + return first + keep_count; + } +} + +template <class ExecutionSpace, class IteratorType, class ValueType> +auto remove_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + const ValueType& value) { + using predicate_type = StdAlgoEqualsValUnaryPredicate<ValueType>; + return remove_if_impl(label, ex, first, last, predicate_type(value)); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType> +auto remove_copy_impl(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, const ValueType& value) { + // this is like copy_if except that we need to *ignore* the elements + // that match the value, so we can solve this as follows: + + using predicate_type = StdAlgoNotEqualsValUnaryPredicate<ValueType>; + return ::Kokkos::Experimental::copy_if(label, ex, first_from, last_from, + first_dest, predicate_type(value)); +} + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class UnaryPredicate> +auto remove_copy_if_impl(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + const UnaryPredicate& pred) { + // this is like copy_if except that we need to *ignore* the elements + // satisfying the pred, so we can solve this as follows: + + using value_type = typename InputIteratorType::value_type; + using pred_wrapper_type = + StdAlgoNegateUnaryPredicateWrapper<value_type, UnaryPredicate>; + return ::Kokkos::Experimental::copy_if(label, ex, first_from, last_from, + first_dest, pred_wrapper_type(pred)); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Replace.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Replace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..877ffa276faebd4b6368dc23f63946587751de05 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Replace.hpp @@ -0,0 +1,103 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REPLACE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_REPLACE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class InputIterator, class ValueType> +struct StdReplaceFunctor { + using index_type = typename InputIterator::difference_type; + InputIterator m_first; + ValueType m_old_value; + ValueType m_new_value; + + KOKKOS_FUNCTION + void operator()(index_type i) const { + if (m_first[i] == m_old_value) { + m_first[i] = m_new_value; + } + } + + KOKKOS_FUNCTION + StdReplaceFunctor(InputIterator first, ValueType old_value, + ValueType new_value) + : m_first(std::move(first)), + m_old_value(std::move(old_value)), + m_new_value(std::move(new_value)) {} +}; + +template <class ExecutionSpace, class IteratorType, class ValueType> +void replace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + const ValueType& old_value, const ValueType& new_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + // aliases + using func_t = StdReplaceFunctor<IteratorType, ValueType>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, old_value, new_value)); + ex.fence("Kokkos::replace: fence after operation"); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b75dde9cd804836e9fb39ee41393c84f0cb1fded --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopy.hpp @@ -0,0 +1,122 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class InputIterator, class OutputIterator, class ValueType> +struct StdReplaceCopyFunctor { + using index_type = typename InputIterator::difference_type; + + InputIterator m_first_from; + OutputIterator m_first_dest; + ValueType m_old_value; + ValueType m_new_value; + + KOKKOS_FUNCTION + void operator()(index_type i) const { + const auto& myvalue_from = m_first_from[i]; + + if (myvalue_from == m_old_value) { + m_first_dest[i] = m_new_value; + } else { + m_first_dest[i] = myvalue_from; + } + } + + KOKKOS_FUNCTION + StdReplaceCopyFunctor(InputIterator first_from, OutputIterator first_dest, + ValueType old_value, ValueType new_value) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_old_value(std::move(old_value)), + m_new_value(std::move(new_value)) {} +}; + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType> +OutputIteratorType replace_copy_impl(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + const ValueType& old_value, + const ValueType& new_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using func_t = + StdReplaceCopyFunctor<InputIteratorType, OutputIteratorType, ValueType>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first_from, first_dest, old_value, new_value)); + ex.fence("Kokkos::replace_copy: fence after operation"); + + // return + return first_dest + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopyIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopyIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8f7c8140e6b9bb1099876171e4ecd07eed5530a2 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceCopyIf.hpp @@ -0,0 +1,123 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_REPLACE_COPY_IF_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class InputIterator, class OutputIterator, + class PredicateType, class ValueType> +struct StdReplaceIfCopyFunctor { + InputIterator m_first_from; + OutputIterator m_first_dest; + PredicateType m_pred; + ValueType m_new_value; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { + const auto& myvalue_from = m_first_from[i]; + + if (m_pred(myvalue_from)) { + m_first_dest[i] = m_new_value; + } else { + m_first_dest[i] = myvalue_from; + } + } + + KOKKOS_FUNCTION + StdReplaceIfCopyFunctor(InputIterator first_from, OutputIterator first_dest, + PredicateType pred, ValueType new_value) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_pred(std::move(pred)), + m_new_value(std::move(new_value)) {} +}; + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class PredicateType, class ValueType> +OutputIteratorType replace_copy_if_impl(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + PredicateType pred, + const ValueType& new_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using func_t = + StdReplaceIfCopyFunctor<index_type, InputIteratorType, OutputIteratorType, + PredicateType, ValueType>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first_from, first_dest, std::move(pred), new_value)); + ex.fence("Kokkos::replace_copy_if: fence after operation"); + + // return + return first_dest + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceIf.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceIf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6fe33019c0b6703713c4b17e5c71cbe21cc444c2 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReplaceIf.hpp @@ -0,0 +1,105 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REPLACE_IF_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_REPLACE_IF_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class InputIterator, class PredicateType, class NewValueType> +struct StdReplaceIfFunctor { + using index_type = typename InputIterator::difference_type; + + InputIterator m_first; + PredicateType m_predicate; + NewValueType m_new_value; + + KOKKOS_FUNCTION + void operator()(index_type i) const { + if (m_predicate(m_first[i])) { + m_first[i] = m_new_value; + } + } + + KOKKOS_FUNCTION + StdReplaceIfFunctor(InputIterator first, PredicateType pred, + NewValueType new_value) + : m_first(std::move(first)), + m_predicate(std::move(pred)), + m_new_value(std::move(new_value)) {} +}; + +template <class ExecutionSpace, class IteratorType, class PredicateType, + class ValueType> +void replace_if_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, PredicateType pred, + const ValueType& new_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + // aliases + using func_t = StdReplaceIfFunctor<IteratorType, PredicateType, ValueType>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, std::move(pred), new_value)); + ex.fence("Kokkos::replace_if: fence after operation"); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f84eb2c81ad4fbf218ecc8c67de8f1f9eabc39f9 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp @@ -0,0 +1,111 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REVERSE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_REVERSE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <std_algorithms/Kokkos_Swap.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class InputIterator> +struct StdReverseFunctor { + using index_type = typename InputIterator::difference_type; + static_assert(std::is_signed<index_type>::value, + "Kokkos: StdReverseFunctor requires signed index type"); + + InputIterator m_first; + InputIterator m_last; + + KOKKOS_FUNCTION + void operator()(index_type i) const { + // the swap below is doing the same thing, but + // for Intel 18.0.5 does not work. + // But putting the impl directly here, it works. +#ifdef KOKKOS_COMPILER_INTEL + typename InputIterator::value_type tmp = std::move(m_first[i]); + m_first[i] = std::move(m_last[-i - 1]); + m_last[-i - 1] = std::move(tmp); +#else + ::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]); +#endif + } + + StdReverseFunctor(InputIterator first, InputIterator last) + : m_first(std::move(first)), m_last(std::move(last)) {} +}; + +template <class ExecutionSpace, class InputIterator> +void reverse_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + // aliases + using func_t = StdReverseFunctor<InputIterator>; + + // run + if (last >= first + 2) { + // only need half + const auto num_elements = Kokkos::Experimental::distance(first, last) / 2; + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first, last)); + ex.fence("Kokkos::reverse: fence after operation"); + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReverseCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReverseCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..88b6ed16b5ee326f9551aa2168884c4f52641992 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ReverseCopy.hpp @@ -0,0 +1,102 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_REVERSE_COPY_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_REVERSE_COPY_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class InputIterator, class OutputIterator> +struct StdReverseCopyFunctor { + static_assert(std::is_signed<IndexType>::value, + "Kokkos: StdReverseCopyFunctor requires signed index type"); + + InputIterator m_last; + OutputIterator m_dest_first; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { m_dest_first[i] = m_last[-1 - i]; } + + StdReverseCopyFunctor(InputIterator _last, OutputIterator _dest_first) + : m_last(std::move(_last)), m_dest_first(std::move(_dest_first)) {} +}; + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator reverse_copy_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // aliases + using index_type = typename InputIterator::difference_type; + using func_t = + StdReverseCopyFunctor<index_type, InputIterator, OutputIterator>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(last, d_first)); + ex.fence("Kokkos::reverse_copy: fence after operation"); + + // return + return d_first + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Rotate.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Rotate.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c08cf1aecbf25f6532810ab0ff4fe138309b5eff --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Rotate.hpp @@ -0,0 +1,219 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ROTATE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_ROTATE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Move.hpp> +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ExecutionSpace, class IteratorType> +IteratorType rotate_with_pivot_in_left_half(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType n_first, + IteratorType last) { + /* + This impl is specific for when the n_first iterator points to + an element that is before or equal to the middle of the range. + + If we have: + + | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 | * + ^ ^ mid ^ + first n_first last + + In step 1, we create a temporary view with extent = distance(n_first, last) + and *move* the elements from [n_first, last) to tmp view, such that + tmp view becomes: + + | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 | + + In step 2, we move the elements in [first, n_first) + to the new position where they are supposed to end up. + + In step 3, we move the elements from the tmp view to + the range starting at first. + */ + + namespace KE = ::Kokkos::Experimental; + const auto num_elements_on_left = KE::distance(first, n_first); + const auto num_elements_on_right = KE::distance(n_first, last); + + // create helper tmp view + using value_type = typename IteratorType::value_type; + using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; + tmp_view_type tmp_view("rotate_impl_for_pivot_in_left_half_impl", + num_elements_on_right); + using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); + + // index_type is the same and needed in all steps + using index_type = typename IteratorType::difference_type; + + // stage 1 + using step1_func_type = + StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>; + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_right), + step1_func_type(n_first, begin(tmp_view))); + + // stage 2 + using step2_func_type = + StdMoveFunctor<index_type, IteratorType, IteratorType>; + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_left), + step2_func_type(first, first + num_elements_on_right)); + + // step 3 + using step3_func_type = + StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), + step3_func_type(begin(tmp_view), first)); + + ex.fence("Kokkos::rotate: fence after operation"); + return first + (last - n_first); +} + +template <class ExecutionSpace, class IteratorType> +IteratorType rotate_with_pivot_in_right_half(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType n_first, + IteratorType last) { + /* + This impl is specific for when the n_first iterator points to + an element that is after the middle of the range. + + If we have: + + | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 | * + ^ mid ^ ^ + first n_first last + + In step 1, we create a temporary view with extent = distance(first, n_first) + and *move* the elements from [first, n_first) to tmp view, + such that tmp view becomes: + + | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | + + In step 2, we move the elements in [n_first, last) + to the beginning where they are supposed to end up. + + In step 3, we move the elements from the tmp view to + the range starting at first. + */ + + namespace KE = ::Kokkos::Experimental; + const auto num_elements_on_left = KE::distance(first, n_first); + const auto num_elements_on_right = KE::distance(n_first, last); + + // create helper tmp view + using value_type = typename IteratorType::value_type; + using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; + tmp_view_type tmp_view("rotate_impl_for_pivot_in_left_half_impl", + num_elements_on_left); + using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); + + // index_type is the same and needed in all steps + using index_type = typename IteratorType::difference_type; + + // stage 1 + using step1_func_type = + StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>; + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_left), + step1_func_type(first, begin(tmp_view))); + + // stage 2 + using step2_func_type = + StdMoveFunctor<index_type, IteratorType, IteratorType>; + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_right), + step2_func_type(n_first, first)); + + // step 3: + using step3_func_type = + StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), + step3_func_type(begin(tmp_view), first + num_elements_on_right)); + + ex.fence("Kokkos::rotate: fence after operation"); + return first + (last - n_first); +} + +template <class ExecutionSpace, class IteratorType> +IteratorType rotate_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType n_first, + IteratorType last) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(first, n_first); + Impl::expect_valid_range(n_first, last); + + namespace KE = ::Kokkos::Experimental; + const auto num_elements = KE::distance(first, last); + const auto n_distance_from_first = KE::distance(first, n_first); + if (n_distance_from_first <= num_elements / 2) { + return rotate_with_pivot_in_left_half(label, ex, first, n_first, last); + } else { + return rotate_with_pivot_in_right_half(label, ex, first, n_first, last); + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RotateCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RotateCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a8111a511f10512f31bcf201a7e5eddb47f26ced --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RotateCopy.hpp @@ -0,0 +1,149 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ROTATE_COPY_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_ROTATE_COPY_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class InputIterator, class OutputIterator> +struct StdRotateCopyFunctor { + InputIterator m_first; + InputIterator m_last; + InputIterator m_first_n; + OutputIterator m_dest_first; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { + const IndexType shift = m_last - m_first_n; + + if (i < shift) { + m_dest_first[i] = m_first_n[i]; + } else { + m_dest_first[i] = m_first[i - shift]; + } + } + + StdRotateCopyFunctor(InputIterator first, InputIterator last, + InputIterator first_n, OutputIterator dest_first) + : m_first(std::move(first)), + m_last(std::move(last)), + m_first_n(std::move(first_n)), + m_dest_first(std::move(dest_first)) {} +}; + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator rotate_copy_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first, + InputIterator n_first, InputIterator last, + OutputIterator d_first) { + /* + algorithm is implemented as follows: + + first n_first last + | | | + o o o o o o o o o o o o + + dest+0 -> first_n + dest+1 -> first_n+1 + dest+2 -> first_n+2 + dest+3 -> first + dest+4 -> first+1 + dest+5 -> first+2 + dest+6 -> first+3 + dest+7 -> first+4 + dest+8 -> first+5 + ... + let shift = last - first_n; + + then we have: + if (i < shift){ + *(dest_first + i) = *(first_n + i); + } + else{ + *(dest_first + i) = *(from + i - shift); + } + */ + + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(first, n_first); + Impl::expect_valid_range(n_first, last); + + if (first == last) { + return d_first; + } + + // aliases + using index_type = typename InputIterator::difference_type; + using func_type = + StdRotateCopyFunctor<index_type, InputIterator, OutputIterator>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(first, last, n_first, d_first)); + + ex.fence("Kokkos::rotate_copy: fence after operation"); + + // return + return d_first + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2afb0a74f013574f852b178b0a7f505462a80feb --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp @@ -0,0 +1,191 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SEARCH_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_SEARCH_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Equal.hpp> +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2, + class ReducerType, class PredicateType> +struct StdSearchFunctor { + using red_value_type = typename ReducerType::value_type; + + IteratorType1 m_first; + IteratorType1 m_last; + IteratorType2 m_s_first; + IteratorType2 m_s_last; + ReducerType m_reducer; + PredicateType m_p; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + namespace KE = ::Kokkos::Experimental; + auto myit = m_first + i; + bool found = true; + + const auto search_count = KE::distance(m_s_first, m_s_last); + for (IndexType k = 0; k < search_count; ++k) { + // note that we add this EXPECT to check if we are in a valid range + // but I think we can remove this beceause the guarantee we don't go + // out of bounds is taken care of at the calling site + // where we launch the par-reduce. + KOKKOS_EXPECTS((myit + k) < m_last); + + if (!m_p(myit[k], m_s_first[k])) { + found = false; + break; + } + } + + const auto rv = + found ? red_value_type{i} + : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; + + m_reducer.join(red_value, rv); + } + + KOKKOS_FUNCTION + StdSearchFunctor(IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + ReducerType reducer, PredicateType p) + : m_first(std::move(first)), + m_last(std::move(last)), + m_s_first(std::move(s_first)), + m_s_last(std::move(s_last)), + m_reducer(std::move(reducer)), + m_p(std::move(p)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType> +IteratorType1 search_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + // the target sequence should not be larger than the range [first, last) + namespace KE = ::Kokkos::Experimental; + const auto num_elements = KE::distance(first, last); + const auto s_count = KE::distance(s_first, s_last); + KOKKOS_EXPECTS(num_elements >= s_count); + (void)s_count; // needed when macro above is a no-op + + if (s_first == s_last) { + return first; + } + + if (first == last) { + return last; + } + + // special case where the two ranges have equal size + if (num_elements == s_count) { + const auto equal_result = equal_impl(label, ex, first, last, s_first, pred); + return (equal_result) ? first : last; + } else { + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdSearchFunctor<index_type, IteratorType1, IteratorType2, + reducer_type, BinaryPredicateType>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + + // decide the size of the range policy of the par_red: + // note that the last feasible index to start looking is the index + // whose distance from the "last" is equal to the sequence count. + // the +1 is because we need to include that location too. + const auto range_size = num_elements - s_count + 1; + + // run par reduce + ::Kokkos::parallel_reduce( + label, RangePolicy<ExecutionSpace>(ex, 0, range_size), + func_t(first, last, s_first, s_last, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity<index_type>::min()) { + // location has not been found + return last; + } else { + // location has been found + return first + red_result.min_loc_true; + } + } +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType1 search_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; + return search_impl(label, ex, first, last, s_first, s_last, predicate_type()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cd8b394386c1432ee532ab15dd8b599342ff02e0 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp @@ -0,0 +1,205 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SEARCH_N_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_SEARCH_N_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_AllOfAnyOfNoneOf.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType, class SizeType, class ValueType, + class ReducerType, class PredicateType> +struct StdSearchNFunctor { + using red_value_type = typename ReducerType::value_type; + + IteratorType m_first; + IteratorType m_last; + SizeType m_count; + ValueType m_value; + ReducerType m_reducer; + PredicateType m_p; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + namespace KE = ::Kokkos::Experimental; + auto myit = m_first + i; + bool found = true; + + for (SizeType k = 0; k < m_count; ++k) { + // note that we add this EXPECT to check if we are in a valid range + // but I think we can remove this beceause the guarantee we don't go + // out of bounds is taken care of at the calling site + // where we launch the par-reduce. + KOKKOS_EXPECTS((myit + k) < m_last); + + if (!m_p(myit[k], m_value)) { + found = false; + break; + } + } + + const auto rv = + found ? red_value_type{i} + : red_value_type{::Kokkos::reduction_identity<IndexType>::min()}; + + m_reducer.join(red_value, rv); + } + + KOKKOS_FUNCTION + StdSearchNFunctor(IteratorType first, IteratorType last, SizeType count, + ValueType value, ReducerType reducer, PredicateType p) + : m_first(std::move(first)), + m_last(std::move(last)), + m_count(std::move(count)), + m_value(std::move(value)), + m_reducer(std::move(reducer)), + m_p(std::move(p)) {} +}; + +template <class ExecutionSpace, class IteratorType, class SizeType, + class ValueType, class BinaryPredicateType> +IteratorType search_n_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value, + const BinaryPredicateType& pred) { + // checks + static_assert_random_access_and_accessible(ex, first); + expect_valid_range(first, last); + KOKKOS_EXPECTS((std::ptrdiff_t)count >= 0); + + // count should not be larger than the range [first, last) + namespace KE = ::Kokkos::Experimental; + const auto num_elements = KE::distance(first, last); + // cast things to avoid compiler warning + KOKKOS_EXPECTS((std::size_t)num_elements >= (std::size_t)count); + + if (first == last) { + return first; + } + + // special case where num elements in [first, last) == count + if ((std::size_t)num_elements == (std::size_t)count) { + using equal_to_value = StdAlgoEqualsValUnaryPredicate<ValueType>; + const auto satisfies = + all_of_impl(label, ex, first, last, equal_to_value(value)); + return (satisfies) ? first : last; + } else { + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc<index_type>; + using reduction_value_type = typename reducer_type::value_type; + using func_t = + StdSearchNFunctor<index_type, IteratorType, SizeType, ValueType, + reducer_type, BinaryPredicateType>; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + + // decide the size of the range policy of the par_red: + // the last feasible index to start looking is the index + // whose distance from the "last" is equal to count. + // the +1 is because we need to include that location too. + const auto range_size = num_elements - count + 1; + + // run par reduce + ::Kokkos::parallel_reduce( + label, RangePolicy<ExecutionSpace>(ex, 0, range_size), + func_t(first, last, count, value, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity<index_type>::min()) { + // location has not been found + return last; + } else { + // location has been found + return first + red_result.min_loc_true; + } + } +} + +template <class ExecutionSpace, class IteratorType, class SizeType, + class ValueType> +IteratorType search_n_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value) { + using iter_value_type = typename IteratorType::value_type; + using predicate_type = + StdAlgoEqualBinaryPredicate<iter_value_type, ValueType>; + + /* above we use <iter_value_type, ValueType> for the predicate_type + to be consistent with the standard, which says: + + " + The signature of the predicate function should be equivalent to: + + bool pred(const Type1 &a, const Type2 &b); + + The type Type1 must be such that an object of type ForwardIt can be + dereferenced and then implicitly converted to Type1. The type Type2 must be + such that an object of type T can be implicitly converted to Type2. + " + + In our case, IteratorType = ForwardIt, and ValueType = T. + */ + + return search_n_impl(label, ex, first, last, count, value, predicate_type()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftLeft.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftLeft.hpp new file mode 100644 index 0000000000000000000000000000000000000000..796864461f0824e43efe16632432e00d6144ac6a --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftLeft.hpp @@ -0,0 +1,139 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_SHIFT_LEFT_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Move.hpp> +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ExecutionSpace, class IteratorType> +IteratorType shift_left_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + typename IteratorType::difference_type n) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + KOKKOS_EXPECTS(n >= 0); + + // handle trivial cases + if (n == 0) { + return last; + } + + if (n >= Kokkos::Experimental::distance(first, last)) { + return first; + } + + /* + Suppose that n = 5, and our [first,last) spans: + + | 0 | 1 | 2 | 1 | 2 | 1 | 2 | 2 | 10 | -3 | 1 | -6 | * + ^ ^ + first last + + shift_left modifies the range such that we have this data: + | 1 | 2 | 2 | 10 | -3 | 1 | -6 | x | x | x | x | x | * + ^ + return it pointing here + + + and returns an iterator pointing to one past the new end. + Note: elements marked x are in undefined state because have been moved. + + We implement this in two steps: + step 1: + we create a temporary view with extent = distance(first+n, last) + and *move* assign the elements from [first+n, last) to tmp view, such that + tmp view becomes: + + | 1 | 2 | 2 | 10 | -3 | 1 | -6 | + + step 2: + move elements of tmp view back to range starting at first. + */ + + const auto num_elements_to_move = + ::Kokkos::Experimental::distance(first + n, last); + + // create tmp view + using value_type = typename IteratorType::value_type; + using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; + tmp_view_type tmp_view("shift_left_impl", num_elements_to_move); + using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); + + using index_type = typename IteratorType::difference_type; + + // step 1 + using step1_func_type = + StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>; + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_move), + step1_func_type(first + n, begin(tmp_view))); + + // step 2 + using step2_func_type = + StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), + step2_func_type(begin(tmp_view), first)); + + ex.fence("Kokkos::shift_left: fence after operation"); + + return last - n; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftRight.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftRight.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0ffde42ab29abf13ab8ff113c94b8de9ca8bc9fd --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ShiftRight.hpp @@ -0,0 +1,139 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_SHIFT_RIGHT_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Move.hpp> +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ExecutionSpace, class IteratorType> +IteratorType shift_right_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, + typename IteratorType::difference_type n) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + KOKKOS_EXPECTS(n >= 0); + + // handle trivial cases + if (n == 0) { + return first; + } + + if (n >= Kokkos::Experimental::distance(first, last)) { + return last; + } + + /* + Suppose that n = 3, and [first,last) spans: + + | 0 | 1 | 2 | 1 | 2 | 1 | 2 | 2 | 10 | -3 | 1 | -6 | * + ^ ^ + first last + + shift_right modifies the range such that we have this data: + | x | x | x | 0 | 1 | 2 | 1 | 2 | 1 | 2 | 2 | 10 | * + ^ + return it points here + + and returns an iterator pointing to the new beginning. + Note: elements marked x are in undefined state because have been moved. + + We implement this in two steps: + step 1: + we create a temporary view with extent = distance(first, last-n) + and *move* assign the elements from [first, last-n) to tmp view, such that + tmp view becomes: + + | 0 | 1 | 2 | 1 | 2 | 1 | 2 | 2 | 10 | + + step 2: + move elements of tmp view back to range starting at first+n. + */ + + const auto num_elements_to_move = + ::Kokkos::Experimental::distance(first, last - n); + + // create tmp view + using value_type = typename IteratorType::value_type; + using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; + tmp_view_type tmp_view("shift_right_impl", num_elements_to_move); + using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); + + using index_type = typename IteratorType::difference_type; + + // step 1 + using step1_func_type = + StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>; + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_move), + step1_func_type(first, begin(tmp_view))); + + // step 2 + using step2_func_type = + StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), + step2_func_type(begin(tmp_view), first + n)); + + ex.fence("Kokkos::shift_right: fence after operation"); + + return first + n; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3e6ca14697647d6eec2c5b2300a3d4c1ab7dbac1 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp @@ -0,0 +1,112 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_SWAP_RANGES_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_SWAP_RANGES_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <std_algorithms/Kokkos_Swap.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class IteratorType1, class IteratorType2> +struct StdSwapRangesFunctor { + IteratorType1 m_first1; + IteratorType2 m_first2; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { + // the swap below is doing the same thing, but + // for Intel 18.0.5 does not work. + // But putting the impl directly here, it works. +#ifdef KOKKOS_COMPILER_INTEL + typename IteratorType1::value_type tmp = std::move(m_first1[i]); + m_first1[i] = std::move(m_first2[i]); + m_first2[i] = std::move(tmp); +#else + ::Kokkos::Experimental::swap(m_first1[i], m_first2[i]); +#endif + } + + KOKKOS_FUNCTION + StdSwapRangesFunctor(IteratorType1 _first1, IteratorType2 _first2) + : m_first1(std::move(_first1)), m_first2(std::move(_first2)) {} +}; + +template <class ExecutionSpace, class IteratorType1, class IteratorType2> +IteratorType2 swap_ranges_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + + // aliases + using index_type = typename IteratorType1::difference_type; + using func_t = StdSwapRangesFunctor<index_type, IteratorType1, IteratorType2>; + + // run + const auto num_elements_to_swap = + Kokkos::Experimental::distance(first1, last1); + ::Kokkos::parallel_for( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_swap), + func_t(first1, first2)); + ex.fence("Kokkos::swap_ranges: fence after operation"); + + // return + return first2 + num_elements_to_swap; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Transform.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Transform.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5d2c0cc98240569a03646382967b19963401d0fa --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Transform.hpp @@ -0,0 +1,158 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_TRANSFORM_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class InputIterator, class OutputIterator, + class UnaryFunctorType> +struct StdTransformFunctor { + InputIterator m_first; + OutputIterator m_d_first; + UnaryFunctorType m_unary_op; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { m_d_first[i] = m_unary_op(m_first[i]); } + + KOKKOS_FUNCTION + StdTransformFunctor(InputIterator _first, OutputIterator _m_d_first, + UnaryFunctorType _functor) + : m_first(std::move(_first)), + m_d_first(std::move(_m_d_first)), + m_unary_op(std::move(_functor)) {} +}; + +template <class IndexType, class InputIterator1, class InputIterator2, + class OutputIterator, class BinaryFunctorType> +struct StdTransformBinaryFunctor { + InputIterator1 m_first1; + InputIterator2 m_first2; + OutputIterator m_d_first; + BinaryFunctorType m_binary_op; + + KOKKOS_FUNCTION + void operator()(IndexType i) const { + m_d_first[i] = m_binary_op(m_first1[i], m_first2[i]); + } + + KOKKOS_FUNCTION + StdTransformBinaryFunctor(InputIterator1 _first1, InputIterator2 _first2, + OutputIterator _m_d_first, + BinaryFunctorType _functor) + : m_first1(std::move(_first1)), + m_first2(std::move(_first2)), + m_d_first(std::move(_m_d_first)), + m_binary_op(std::move(_functor)) {} +}; + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class UnaryOperation> +OutputIterator transform_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first1, + InputIterator last1, OutputIterator d_first, + UnaryOperation unary_op) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first1, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first1, d_first); + Impl::expect_valid_range(first1, last1); + + // aliases + using index_type = typename InputIterator::difference_type; + using func_t = StdTransformFunctor<index_type, InputIterator, OutputIterator, + UnaryOperation>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first1, last1); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first1, d_first, unary_op)); + ex.fence("Kokkos::transform: fence after operation"); + + // return + return d_first + num_elements; +} + +template <class ExecutionSpace, class InputIterator1, class InputIterator2, + class OutputIterator, class BinaryOperation> +OutputIterator transform_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, + OutputIterator d_first, + BinaryOperation binary_op) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first1, first2, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2, + d_first); + Impl::expect_valid_range(first1, last1); + + // aliases + using index_type = typename InputIterator1::difference_type; + using func_t = + StdTransformBinaryFunctor<index_type, InputIterator1, InputIterator2, + OutputIterator, BinaryOperation>; + + // run + const auto num_elements = Kokkos::Experimental::distance(first1, last1); + ::Kokkos::parallel_for(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_t(first1, first2, d_first, binary_op)); + ex.fence("Kokkos::transform: fence after operation"); + return d_first + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9fb8cbcc3035c33b73197c7005e0a3d8c502dbe5 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp @@ -0,0 +1,153 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_EXCLUSIVE_SCAN_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_TRANSFORM_EXCLUSIVE_SCAN_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_ValueWrapperForNoNeutralElement.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, + class FirstDest, class BinaryOpType, class UnaryOpType> +struct TransformExclusiveScanFunctor { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement<ValueType>; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctor(ValueType init, FirstFrom first_from, + FirstDest first_dest, BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update.val, m_init_value); + } + } + + const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (update.is_initial) { + update.val = input.val; + } else { + update.val = m_binary_op(update.val, input.val); + } + update.is_initial = false; + } +}; + +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class ValueType, class BinaryOpType, + class UnaryOpType> +OutputIteratorType transform_exclusive_scan_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop, + UnaryOpType uop) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using func_type = + TransformExclusiveScanFunctor<ExecutionSpace, index_type, ValueType, + InputIteratorType, OutputIteratorType, + BinaryOpType, UnaryOpType>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(init_value, first_from, first_dest, bop, uop)); + ex.fence("Kokkos::transform_exclusive_scan: fence after operation"); + + // return + return first_dest + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp new file mode 100644 index 0000000000000000000000000000000000000000..281eb6f8a14f6a878c929b06f409515562d800cc --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp @@ -0,0 +1,235 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_TRANSFORM_INCLUSIVE_SCAN_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_ValueWrapperForNoNeutralElement.hpp" +#include "Kokkos_IdentityReferenceUnaryFunctor.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, + class FirstDest, class BinaryOpType, class UnaryOpType> +struct TransformInclusiveScanNoInitValueFunctor { + using execution_space = ExeSpace; + using value_type = ValueWrapperForNoNeutralElement<ValueType>; + + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformInclusiveScanNoInitValueFunctor(FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, UnaryOpType uop) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; + this->join(update, tmp); + if (final_pass) { + m_first_dest[i] = update.val; + } + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (update.is_initial) { + update.val = input.val; + } else { + update.val = m_binary_op(update.val, input.val); + } + update.is_initial = false; + } +}; + +template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, + class FirstDest, class BinaryOpType, class UnaryOpType> +struct TransformInclusiveScanWithInitValueFunctor { + using execution_space = ExeSpace; + using value_type = ValueWrapperForNoNeutralElement<ValueType>; + + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + ValueType m_init; + + KOKKOS_FUNCTION + TransformInclusiveScanWithInitValueFunctor(FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, UnaryOpType uop, + ValueType init) + : m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)), + m_init(std::move(init)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; + this->join(update, tmp); + + if (final_pass) { + m_first_dest[i] = m_binary_op(update.val, m_init); + } + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (update.is_initial) { + update.val = input.val; + } else { + update.val = m_binary_op(update.val, input.val); + } + update.is_initial = false; + } +}; + +// ------------------------------------------------------------- +// transform_inclusive_scan_impl without init_value +// ------------------------------------------------------------- +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOpType, class UnaryOpType> +OutputIteratorType transform_inclusive_scan_impl(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOpType binary_op, + UnaryOpType unary_op) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using value_type = + std::remove_const_t<typename InputIteratorType::value_type>; + using func_type = TransformInclusiveScanNoInitValueFunctor< + ExecutionSpace, index_type, value_type, InputIteratorType, + OutputIteratorType, BinaryOpType, UnaryOpType>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(first_from, first_dest, binary_op, unary_op)); + ex.fence("Kokkos::transform_inclusive_scan: fence after operation"); + + // return + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// transform_inclusive_scan_impl with init_value +// ------------------------------------------------------------- +template <class ExecutionSpace, class InputIteratorType, + class OutputIteratorType, class BinaryOpType, class UnaryOpType, + class ValueType> +OutputIteratorType transform_inclusive_scan_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, + ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using func_type = TransformInclusiveScanWithInitValueFunctor< + ExecutionSpace, index_type, ValueType, InputIteratorType, + OutputIteratorType, BinaryOpType, UnaryOpType>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + func_type(first_from, first_dest, binary_op, unary_op, init_value)); + ex.fence("Kokkos::transform_inclusive_scan: fence after operation"); + + // return + return first_dest + num_elements; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformReduce.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformReduce.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e3a780f4856c34cf15a6c5fa72f33f2d69c25607 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_TransformReduce.hpp @@ -0,0 +1,245 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_TRANSFORM_REDUCE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class ValueType> +struct StdTranformReduceDefaultBinaryTransformFunctor { + KOKKOS_FUNCTION + constexpr ValueType operator()(const ValueType& a, const ValueType& b) const { + return (a * b); + } +}; + +template <class ValueType> +struct StdTranformReduceDefaultJoinFunctor { + KOKKOS_FUNCTION + constexpr ValueType operator()(const ValueType& a, const ValueType& b) const { + return a + b; + } +}; + +template <class IteratorType, class ReducerType, class TransformType> +struct StdTransformReduceSingleIntervalFunctor { + using red_value_type = typename ReducerType::value_type; + using index_type = typename IteratorType::difference_type; + + const IteratorType m_first; + const ReducerType m_reducer; + const TransformType m_transform; + + KOKKOS_FUNCTION + void operator()(const index_type i, red_value_type& red_value) const { + auto tmp_wrapped_value = red_value_type{m_transform(m_first[i]), false}; + if (red_value.is_initial) { + red_value = tmp_wrapped_value; + } else { + m_reducer.join(red_value, tmp_wrapped_value); + } + } + + KOKKOS_FUNCTION + StdTransformReduceSingleIntervalFunctor(IteratorType first, + ReducerType reducer, + TransformType transform) + : m_first(std::move(first)), + m_reducer(std::move(reducer)), + m_transform(std::move(transform)) {} +}; + +template <class IndexType, class IteratorType1, class IteratorType2, + class ReducerType, class TransformType> +struct StdTransformReduceTwoIntervalsFunctor { + using red_value_type = typename ReducerType::value_type; + + const IteratorType1 m_first1; + const IteratorType2 m_first2; + const ReducerType m_reducer; + const TransformType m_transform; + + KOKKOS_FUNCTION + void operator()(const IndexType i, red_value_type& red_value) const { + auto tmp_wrapped_value = + red_value_type{m_transform(m_first1[i], m_first2[i]), false}; + + if (red_value.is_initial) { + red_value = tmp_wrapped_value; + } else { + m_reducer.join(red_value, tmp_wrapped_value); + } + } + + KOKKOS_FUNCTION + StdTransformReduceTwoIntervalsFunctor(IteratorType1 first1, + IteratorType2 first2, + ReducerType reducer, + TransformType transform) + : m_first1(std::move(first1)), + m_first2(std::move(first2)), + m_reducer(std::move(reducer)), + m_transform(std::move(transform)) {} +}; + +//------------------------------ +// +// impl functions +// +//------------------------------ + +template <class ExecutionSpace, class IteratorType, class ValueType, + class JoinerType, class UnaryTransformerType> +ValueType transform_reduce_custom_functors_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType first, + IteratorType last, ValueType init_reduction_value, JoinerType joiner, + UnaryTransformerType transformer) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::static_assert_is_not_openmptarget(ex); + Impl::expect_valid_range(first, last); + + if (first == last) { + // init is returned, unmodified + return init_reduction_value; + } + + // aliases + using reducer_type = + ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>; + using functor_type = + StdTransformReduceSingleIntervalFunctor<IteratorType, reducer_type, + UnaryTransformerType>; + using reduction_value_type = typename reducer_type::value_type; + + // run + reduction_value_type result; + reducer_type reducer(result, joiner); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(label, + RangePolicy<ExecutionSpace>(ex, 0, num_elements), + functor_type(first, reducer, transformer), reducer); + + // fence not needed since reducing into scalar + + // as per standard, transform is not applied to the init value + // https://en.cppreference.com/w/cpp/algorithm/transform_reduce + return joiner(result.val, init_reduction_value); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class ValueType, class JoinerType, class BinaryTransformerType> +ValueType transform_reduce_custom_functors_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, + JoinerType joiner, BinaryTransformerType transformer) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first1, first2); + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + + if (first1 == last1) { + // init is returned, unmodified + return init_reduction_value; + } + + // aliases + using index_type = typename IteratorType1::difference_type; + using reducer_type = + ReducerWithArbitraryJoinerNoNeutralElement<ValueType, JoinerType>; + using functor_type = + StdTransformReduceTwoIntervalsFunctor<index_type, IteratorType1, + IteratorType2, reducer_type, + BinaryTransformerType>; + using reduction_value_type = typename reducer_type::value_type; + + // run + reduction_value_type result; + reducer_type reducer(result, joiner); + + const auto num_elements = Kokkos::Experimental::distance(first1, last1); + ::Kokkos::parallel_reduce( + label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), + functor_type(first1, first2, reducer, transformer), reducer); + + // fence not needed since reducing into scalar + return joiner(result.val, init_reduction_value); +} + +template <class ExecutionSpace, class IteratorType1, class IteratorType2, + class ValueType> +ValueType transform_reduce_default_functors_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first1, first2); + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + + // aliases + using transformer_type = + Impl::StdTranformReduceDefaultBinaryTransformFunctor<ValueType>; + using joiner_type = Impl::StdTranformReduceDefaultJoinFunctor<ValueType>; + + return transform_reduce_custom_functors_impl( + label, ex, first1, last1, first2, std::move(init_reduction_value), + joiner_type(), transformer_type()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5e4ea7d792a03551914f7c6e2cde8fa13a8a4fbf --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp @@ -0,0 +1,193 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_UNIQUE_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include <std_algorithms/Kokkos_Move.hpp> +#include <std_algorithms/Kokkos_Distance.hpp> +#include <std_algorithms/Kokkos_AdjacentFind.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class InputIt, class OutputIt, + class BinaryPredicateType> +struct StdUniqueFunctor { + InputIt m_first_from; + InputIt m_last_from; + OutputIt m_first_dest; + BinaryPredicateType m_pred; + + KOKKOS_FUNCTION + StdUniqueFunctor(InputIt first_from, InputIt last_from, OutputIt first_dest, + BinaryPredicateType pred) + : m_first_from(std::move(first_from)), + m_last_from(std::move(last_from)), + m_first_dest(std::move(first_dest)), + m_pred(std::move(pred)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, IndexType& update, + const bool final_pass) const { + auto& val_i = m_first_from[i]; + const auto& val_ip1 = m_first_from[i + 1]; + + if (final_pass) { + if (!m_pred(val_i, val_ip1)) { + m_first_dest[update] = std::move(val_i); + } + } + + if (!m_pred(val_i, val_ip1)) { + update += 1; + } + } +}; + +template <class ExecutionSpace, class IteratorType, class PredicateType> +IteratorType unique_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + if (num_elements == 0) { + return first; + } else if (num_elements == 1) { + return last; + } else { + // ---------- + // step 1: + // find first location of adjacent equal elements + // ---------- + auto it_found = + ::Kokkos::Experimental::adjacent_find(ex, first, last, pred); + + // if none, all elements are unique, so nothing to do + if (it_found == last) { + return last; + } else { + // if here, we found some equal adjacent elements, + // so count all preceeding unique elements + const auto num_unique_found_in_step_one = it_found - first; + + // ---------- + // step 2: + // ---------- + // since we found some unique elements, we don't need to explore + // the full range [first, last), but only need to focus on the + // remaining range [it_found, last) + const auto num_elements_to_explore = last - it_found; + + // create a tmp view to use to *move* all unique elements + // using the same algorithm used for unique_copy but we now move things + using value_type = typename IteratorType::value_type; + using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; + tmp_view_type tmp_view("std_unique_tmp_view", num_elements_to_explore); + + // scan extent is: num_elements_to_explore - 1 + // for same reason as the one explained in unique_copy + const auto scan_size = num_elements_to_explore - 1; + auto tmp_first = ::Kokkos::Experimental::begin(tmp_view); + using output_it = decltype(tmp_first); + + using index_type = typename IteratorType::difference_type; + using func_type = + StdUniqueFunctor<index_type, IteratorType, output_it, PredicateType>; + index_type count = 0; + ::Kokkos::parallel_scan( + label, RangePolicy<ExecutionSpace>(ex, 0, scan_size), + func_type(it_found, last, tmp_first, pred), count); + + // move last element too, for the same reason as the unique_copy + auto unused_r = + Impl::move_impl("Kokkos::move_from_unique", ex, it_found + scan_size, + last, tmp_first + count); + (void)unused_r; // r1 not used + + // ---------- + // step 3 + // ---------- + // move back from tmp to original range, + // ensuring we start overwriting after the original unique found + using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); + using step3_func_t = + StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; + + ::Kokkos::parallel_for( + "unique_step3_parfor", + RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), + step3_func_t(begin(tmp_view), + (first + num_unique_found_in_step_one))); + + ex.fence("Kokkos::unique: fence after operation"); + + // return iterator to one passed the last written + // (the +1 is needed to account for the last element, see above) + return (first + num_unique_found_in_step_one + count + 1); + } + } +} + +template <class ExecutionSpace, class IteratorType> +IteratorType unique_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using binary_pred_t = StdAlgoEqualBinaryPredicate<value_type>; + return unique_impl(label, ex, first, last, binary_pred_t()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e4fd6f3ed8c5e36aa85c2305c89b540aa79c38b4 --- /dev/null +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp @@ -0,0 +1,156 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_UNIQUE_COPY_IMPL_HPP + +#include <Kokkos_Core.hpp> +#include "Kokkos_Constraints.hpp" +#include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_CopyCopyN.hpp" +#include <std_algorithms/Kokkos_Distance.hpp> +#include <string> + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template <class IndexType, class InputIt, class OutputIt, + class BinaryPredicateType> +struct StdUniqueCopyFunctor { + InputIt m_first_from; + InputIt m_last_from; + OutputIt m_first_dest; + BinaryPredicateType m_pred; + + KOKKOS_FUNCTION + StdUniqueCopyFunctor(InputIt first_from, InputIt last_from, + OutputIt first_dest, BinaryPredicateType pred) + : m_first_from(std::move(first_from)), + m_last_from(std::move(last_from)), + m_first_dest(std::move(first_dest)), + m_pred(std::move(pred)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, IndexType& update, + const bool final_pass) const { + const auto& val_i = m_first_from[i]; + const auto& val_ip1 = m_first_from[i + 1]; + + if (final_pass) { + if (!m_pred(val_i, val_ip1)) { + m_first_dest[update] = val_i; + } + } + + if (!m_pred(val_i, val_ip1)) { + update += 1; + } + } +}; + +template <class ExecutionSpace, class InputIterator, class OutputIterator, + class PredicateType> +OutputIterator unique_copy_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first, + PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // branch for trivial vs non trivial case + const auto num_elements = Kokkos::Experimental::distance(first, last); + if (num_elements == 0) { + return d_first; + } else if (num_elements == 1) { + return Impl::copy_impl("Kokkos::copy_from_unique_copy", ex, first, last, + d_first); + } else { + // aliases + using index_type = typename InputIterator::difference_type; + using func_type = StdUniqueCopyFunctor<index_type, InputIterator, + OutputIterator, PredicateType>; + + // note here that we run scan for num_elements - 1 + // because of the way we implement this, the last element is always needed. + // We avoid performing checks inside functor that we are within limits + // and run a "safe" scan and then copy the last element. + const auto scan_size = num_elements - 1; + index_type count = 0; + ::Kokkos::parallel_scan(label, + RangePolicy<ExecutionSpace>(ex, 0, scan_size), + func_type(first, last, d_first, pred), count); + + return Impl::copy_impl("Kokkos::copy_from_unique_copy", ex, + first + scan_size, last, d_first + count); + } +} + +template <class ExecutionSpace, class InputIterator, class OutputIterator> +OutputIterator unique_copy_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // aliases + using value_type1 = typename InputIterator::value_type; + using value_type2 = typename OutputIterator::value_type; + + // default binary predicate uses == + using binary_pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; + + // run + return unique_copy_impl(label, ex, first, last, d_first, binary_pred_t()); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ValueWrapperForNoNeutralElement.hpp b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp similarity index 89% rename from packages/kokkos/algorithms/src/std_algorithms/Kokkos_ValueWrapperForNoNeutralElement.hpp rename to packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp index 019a0049db1f1f60ad3dfcb28ed7d2cff80a88f2..da9b6ef9a9ed0ec9c2c79f93eb55ac535be7c5ac 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ValueWrapperForNoNeutralElement.hpp +++ b/packages/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp @@ -42,8 +42,8 @@ //@HEADER */ -#ifndef KOKKOS_STD_VALUE_WRAPPER_FOR_NO_NEUTRAL_ELEMENT_HPP -#define KOKKOS_STD_VALUE_WRAPPER_FOR_NO_NEUTRAL_ELEMENT_HPP +#ifndef KOKKOS_STD_ALGORITHMS_VALUE_WRAPPER_FOR_NO_NEUTRAL_ELEMENT_HPP +#define KOKKOS_STD_ALGORITHMS_VALUE_WRAPPER_FOR_NO_NEUTRAL_ELEMENT_HPP namespace Kokkos { namespace Experimental { @@ -63,12 +63,6 @@ struct ValueWrapperForNoNeutralElement { val = rhs.val; is_initial = rhs.is_initial; } - - KOKKOS_FUNCTION - void operator=(const volatile ValueWrapperForNoNeutralElement& rhs) volatile { - val = rhs.val; - is_initial = rhs.is_initial; - } }; } // namespace Impl diff --git a/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp b/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp deleted file mode 100644 index b9b1b96aea466b9190d72f778d40ed429334904f..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp +++ /dev/null @@ -1,1285 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_MODIFYING_SEQUENCE_OPERATIONS_SET1_HPP -#define KOKKOS_MODIFYING_SEQUENCE_OPERATIONS_SET1_HPP - -#include <Kokkos_Core.hpp> -#include "../Kokkos_BeginEnd.hpp" -#include "../Kokkos_Constraints.hpp" -#include "../Kokkos_ModifyingOperations.hpp" -#include "../Kokkos_NonModifyingSequenceOperations.hpp" - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -//--------------------------- -// -// functors -// -//--------------------------- -template <class IndexType, class InputIterator, class OutputIterator> -struct StdCopyFunctor { - InputIterator m_first; - OutputIterator m_dest_first; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_first[i] = m_first[i]; } - - KOKKOS_FUNCTION - StdCopyFunctor(InputIterator _first, OutputIterator _dest_first) - : m_first(std::move(_first)), m_dest_first(std::move(_dest_first)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2> -struct StdCopyBackwardFunctor { - static_assert(std::is_signed<IndexType>::value, - "Kokkos: StdCopyBackwardFunctor requires signed index type"); - - IteratorType1 m_last; - IteratorType2 m_dest_last; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } - - KOKKOS_FUNCTION - StdCopyBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) - : m_last(std::move(_last)), m_dest_last(std::move(_dest_last)) {} -}; - -template <class IndexType, class FirstFrom, class FirstDest, class PredType> -struct StdCopyIfFunctor { - FirstFrom m_first_from; - FirstDest m_first_dest; - PredType m_pred; - - KOKKOS_FUNCTION - StdCopyIfFunctor(FirstFrom first_from, FirstDest first_dest, PredType pred) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_pred(std::move(pred)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, IndexType& update, - const bool final_pass) const { - const auto& myval = m_first_from[i]; - if (final_pass) { - if (m_pred(myval)) { - m_first_dest[update] = myval; - } - } - - if (m_pred(myval)) { - update += 1; - } - } -}; - -template <class InputIterator, class T> -struct StdFillFunctor { - using index_type = typename InputIterator::difference_type; - InputIterator m_first; - T m_value; - - KOKKOS_FUNCTION - void operator()(index_type i) const { m_first[i] = m_value; } - - KOKKOS_FUNCTION - StdFillFunctor(InputIterator _first, T _value) - : m_first(std::move(_first)), m_value(std::move(_value)) {} -}; - -template <class IndexType, class InputIterator, class OutputIterator, - class UnaryFunctorType> -struct StdTransformFunctor { - InputIterator m_first; - OutputIterator m_d_first; - UnaryFunctorType m_unary_op; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { m_d_first[i] = m_unary_op(m_first[i]); } - - KOKKOS_FUNCTION - StdTransformFunctor(InputIterator _first, OutputIterator _m_d_first, - UnaryFunctorType _functor) - : m_first(std::move(_first)), - m_d_first(std::move(_m_d_first)), - m_unary_op(std::move(_functor)) {} -}; - -template <class IndexType, class InputIterator1, class InputIterator2, - class OutputIterator, class BinaryFunctorType> -struct StdTransformBinaryFunctor { - InputIterator1 m_first1; - InputIterator2 m_first2; - OutputIterator m_d_first; - BinaryFunctorType m_binary_op; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { - m_d_first[i] = m_binary_op(m_first1[i], m_first2[i]); - } - - KOKKOS_FUNCTION - StdTransformBinaryFunctor(InputIterator1 _first1, InputIterator2 _first2, - OutputIterator _m_d_first, - BinaryFunctorType _functor) - : m_first1(std::move(_first1)), - m_first2(std::move(_first2)), - m_d_first(std::move(_m_d_first)), - m_binary_op(std::move(_functor)) {} -}; - -template <class IteratorType, class Generator> -struct StdGenerateFunctor { - using index_type = typename IteratorType::difference_type; - IteratorType m_first; - Generator m_generator; - - KOKKOS_FUNCTION - void operator()(index_type i) const { m_first[i] = m_generator(); } - - KOKKOS_FUNCTION - StdGenerateFunctor(IteratorType _first, Generator _g) - : m_first(std::move(_first)), m_generator(std::move(_g)) {} -}; - -template <class InputIterator, class PredicateType, class NewValueType> -struct StdReplaceIfFunctor { - using index_type = typename InputIterator::difference_type; - - InputIterator m_first; - PredicateType m_predicate; - NewValueType m_new_value; - - KOKKOS_FUNCTION - void operator()(index_type i) const { - if (m_predicate(m_first[i])) { - m_first[i] = m_new_value; - } - } - - KOKKOS_FUNCTION - StdReplaceIfFunctor(InputIterator first, PredicateType pred, - NewValueType new_value) - : m_first(std::move(first)), - m_predicate(std::move(pred)), - m_new_value(std::move(new_value)) {} -}; - -template <class InputIterator, class ValueType> -struct StdReplaceFunctor { - using index_type = typename InputIterator::difference_type; - InputIterator m_first; - ValueType m_old_value; - ValueType m_new_value; - - KOKKOS_FUNCTION - void operator()(index_type i) const { - if (m_first[i] == m_old_value) { - m_first[i] = m_new_value; - } - } - - KOKKOS_FUNCTION - StdReplaceFunctor(InputIterator first, ValueType old_value, - ValueType new_value) - : m_first(std::move(first)), - m_old_value(std::move(old_value)), - m_new_value(std::move(new_value)) {} -}; - -template <class InputIterator, class OutputIterator, class ValueType> -struct StdReplaceCopyFunctor { - using index_type = typename InputIterator::difference_type; - - InputIterator m_first_from; - OutputIterator m_first_dest; - ValueType m_old_value; - ValueType m_new_value; - - KOKKOS_FUNCTION - void operator()(index_type i) const { - const auto& myvalue_from = m_first_from[i]; - - if (myvalue_from == m_old_value) { - m_first_dest[i] = m_new_value; - } else { - m_first_dest[i] = myvalue_from; - } - } - - KOKKOS_FUNCTION - StdReplaceCopyFunctor(InputIterator first_from, OutputIterator first_dest, - ValueType old_value, ValueType new_value) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_old_value(std::move(old_value)), - m_new_value(std::move(new_value)) {} -}; - -template <class IndexType, class InputIterator, class OutputIterator, - class PredicateType, class ValueType> -struct StdReplaceIfCopyFunctor { - InputIterator m_first_from; - OutputIterator m_first_dest; - PredicateType m_pred; - ValueType m_new_value; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { - const auto& myvalue_from = m_first_from[i]; - - if (m_pred(myvalue_from)) { - m_first_dest[i] = m_new_value; - } else { - m_first_dest[i] = myvalue_from; - } - } - - KOKKOS_FUNCTION - StdReplaceIfCopyFunctor(InputIterator first_from, OutputIterator first_dest, - PredicateType pred, ValueType new_value) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_pred(std::move(pred)), - m_new_value(std::move(new_value)) {} -}; - -// ------------------------------------------ -// copy_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first, d_first); - Impl::expect_valid_range(first, last); - - // aliases - using index_type = typename InputIterator::difference_type; - using func_t = StdCopyFunctor<index_type, InputIterator, OutputIterator>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, d_first)); - ex.fence("Kokkos::copy: fence after operation"); - - // return - return d_first + num_elements; -} - -// ------------------------------------------ -// copy_n_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class Size, - class OutputIterator> -OutputIterator copy_n_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first_from, Size count, - OutputIterator first_dest) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - - if (count > 0) { - return copy_impl(label, ex, first_from, first_from + count, first_dest); - } else { - return first_dest; - } -} - -// ------------------------------------------ -// copy_backward_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 copy_backward_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 d_last) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_last); - Impl::static_assert_iterators_have_matching_difference_type(first, d_last); - Impl::expect_valid_range(first, last); - - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = - StdCopyBackwardFunctor<index_type, IteratorType1, IteratorType2>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(last, d_last)); - ex.fence("Kokkos::copy_backward: fence after operation"); - - // return - return d_last - num_elements; -} - -// ------------------------------------------ -// copy_if_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class PredicateType> -OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first, PredicateType pred) { - /* - To explain the impl, suppose that our data is: - - | 1 | 1 | 2 | 2 | 3 | -2 | 4 | 4 | 4 | 5 | 7 | -10 | - - and we want to copy only the even entries, - We can use an exclusive scan where the "update" - is incremented only for the elements that satisfy the predicate. - This way, the update allows us to track where in the destination - we need to copy the elements: - - In this case, counting only the even entries, the exlusive scan - during the final pass would yield: - - | 0 | 0 | 0 | 1 | 2 | 2 | 3 | 4 | 5 | 6 | 6 | 6 | - * * * * * * * - - which provides the indexing in the destination where - each starred (*) element needs to be copied to since - the starred elements are those that satisfy the predicate. - */ - - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first, d_first); - Impl::expect_valid_range(first, last); - - if (first == last) { - return d_first; - } else { - // aliases - using index_type = typename InputIterator::difference_type; - using func_type = StdCopyIfFunctor<index_type, InputIterator, - OutputIterator, PredicateType>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - index_type count = 0; - ::Kokkos::parallel_scan(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(first, d_first, pred), count); - - // fence not needed because of the scan accumulating into count - return d_first + count; - } -} - -// ------------------------------------------ -// fill_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class T> -void fill_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - StdFillFunctor<IteratorType, T>(first, value)); - ex.fence("Kokkos::fill: fence after operation"); -} - -template <class ExecutionSpace, class IteratorType, class SizeType, class T> -IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, const T& value) { - auto last = first + n; - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - if (n <= 0) { - return first; - } - - fill_impl(label, ex, first, last, value); - return last; -} - -// ------------------------------------------ -// transform_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class UnaryOperation> -OutputIterator transform_impl(const std::string& label, - const ExecutionSpace& ex, InputIterator first1, - InputIterator last1, OutputIterator d_first, - UnaryOperation unary_op) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first1, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first1, d_first); - Impl::expect_valid_range(first1, last1); - - // aliases - using index_type = typename InputIterator::difference_type; - using func_t = StdTransformFunctor<index_type, InputIterator, OutputIterator, - UnaryOperation>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first1, last1); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first1, d_first, unary_op)); - ex.fence("Kokkos::transform: fence after operation"); - - // return - return d_first + num_elements; -} - -template <class ExecutionSpace, class InputIterator1, class InputIterator2, - class OutputIterator, class BinaryOperation> -OutputIterator transform_impl(const std::string& label, - const ExecutionSpace& ex, InputIterator1 first1, - InputIterator1 last1, InputIterator2 first2, - OutputIterator d_first, - BinaryOperation binary_op) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first1, first2, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first1, first2, - d_first); - Impl::expect_valid_range(first1, last1); - - // aliases - using index_type = typename InputIterator1::difference_type; - using func_t = - StdTransformBinaryFunctor<index_type, InputIterator1, InputIterator2, - OutputIterator, BinaryOperation>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first1, last1); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first1, first2, d_first, binary_op)); - ex.fence("Kokkos::transform: fence after operation"); - return d_first + num_elements; -} - -// ------------------------------------------ -// generate_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class Generator> -void generate_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Generator g) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - // aliases - using func_t = StdGenerateFunctor<IteratorType, Generator>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, g)); - ex.fence("Kokkos::generate: fence after operation"); -} - -template <class ExecutionSpace, class IteratorType, class Size, class Generator> -IteratorType generate_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, Size count, Generator g) { - if (count <= 0) { - return first; - } - - generate_impl(label, ex, first, first + count, g); - return first + count; -} - -// ------------------------------------------ -// replace_if_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class PredicateType, - class ValueType> -void replace_if_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, PredicateType pred, - const ValueType& new_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - // aliases - using func_t = StdReplaceIfFunctor<IteratorType, PredicateType, ValueType>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, std::move(pred), new_value)); - ex.fence("Kokkos::replace_if: fence after operation"); -} - -// ------------------------------------------ -// replace_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class ValueType> -void replace_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - const ValueType& old_value, const ValueType& new_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - // aliases - using func_t = StdReplaceFunctor<IteratorType, ValueType>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, old_value, new_value)); - ex.fence("Kokkos::replace: fence after operation"); -} - -// ------------------------------------------ -// replace_copy_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType> -OutputIteratorType replace_copy_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - const ValueType& old_value, - const ValueType& new_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using func_t = - StdReplaceCopyFunctor<InputIteratorType, OutputIteratorType, ValueType>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first_from, first_dest, old_value, new_value)); - ex.fence("Kokkos::replace_copy: fence after operation"); - - // return - return first_dest + num_elements; -} - -// ------------------------------------------ -// replace_copy_if_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class PredicateType, class ValueType> -OutputIteratorType replace_copy_if_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - PredicateType pred, - const ValueType& new_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using func_t = - StdReplaceIfCopyFunctor<index_type, InputIteratorType, OutputIteratorType, - PredicateType, ValueType>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first_from, first_dest, std::move(pred), new_value)); - ex.fence("Kokkos::replace_copy_if: fence after operation"); - - // return - return first_dest + num_elements; -} - -} // namespace Impl - -// ------------------- -// replace copy -// ------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class ValueType> -OutputIterator replace_copy(const ExecutionSpace& ex, InputIterator first_from, - InputIterator last_from, OutputIterator first_dest, - const ValueType& old_value, - const ValueType& new_value) { - return Impl::replace_copy_impl("Kokkos::replace_copy_iterator_api", ex, - first_from, last_from, first_dest, old_value, - new_value); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class ValueType> -OutputIterator replace_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first_from, InputIterator last_from, - OutputIterator first_dest, - const ValueType& old_value, - const ValueType& new_value) { - return Impl::replace_copy_impl(label, ex, first_from, last_from, first_dest, - old_value, new_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType> -auto replace_copy(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - const ValueType& old_value, const ValueType& new_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl("Kokkos::replace_copy_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), old_value, new_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType> -auto replace_copy(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - const ValueType& old_value, const ValueType& new_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - old_value, new_value); -} - -// ------------------- -// replace_copy_if -// ------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class PredicateType, class ValueType> -OutputIterator replace_copy_if(const ExecutionSpace& ex, - InputIterator first_from, - InputIterator last_from, - OutputIterator first_dest, PredicateType pred, - const ValueType& new_value) { - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_iterator_api", ex, - first_from, last_from, first_dest, pred, - new_value); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class PredicateType, class ValueType> -OutputIterator replace_copy_if(const std::string& label, - const ExecutionSpace& ex, - InputIterator first_from, - InputIterator last_from, - OutputIterator first_dest, PredicateType pred, - const ValueType& new_value) { - return Impl::replace_copy_if_impl(label, ex, first_from, last_from, - first_dest, pred, new_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class PredicateType, - class ValueType> -auto replace_copy_if(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - PredicateType pred, const ValueType& new_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), pred, new_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class PredicateType, - class ValueType> -auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - PredicateType pred, const ValueType& new_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - pred, new_value); -} - -// ------------------- -// replace -// ------------------- -template <class ExecutionSpace, class Iterator, class ValueType> -void replace(const ExecutionSpace& ex, Iterator first, Iterator last, - const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl("Kokkos::replace_iterator_api", ex, first, last, - old_value, new_value); -} - -template <class ExecutionSpace, class Iterator, class ValueType> -void replace(const std::string& label, const ExecutionSpace& ex, Iterator first, - Iterator last, const ValueType& old_value, - const ValueType& new_value) { - return Impl::replace_impl(label, ex, first, last, old_value, new_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class ValueType> -void replace(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ValueType& old_value, const ValueType& new_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl("Kokkos::replace_view_api", ex, KE::begin(view), - KE::end(view), old_value, new_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class ValueType> -void replace(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - const ValueType& old_value, const ValueType& new_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl(label, ex, KE::begin(view), KE::end(view), - old_value, new_value); -} - -// ------------------- -// replace_if -// ------------------- -template <class ExecutionSpace, class InputIterator, class Predicate, - class ValueType> -void replace_if(const ExecutionSpace& ex, InputIterator first, - InputIterator last, Predicate pred, - const ValueType& new_value) { - return Impl::replace_if_impl("Kokkos::replace_if_iterator_api", ex, first, - last, pred, new_value); -} - -template <class ExecutionSpace, class InputIterator, class Predicate, - class ValueType> -void replace_if(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate pred, - const ValueType& new_value) { - return Impl::replace_if_impl(label, ex, first, last, pred, new_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class Predicate, class ValueType> -void replace_if(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - Predicate pred, const ValueType& new_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl("Kokkos::replace_if_view_api", ex, - KE::begin(view), KE::end(view), pred, new_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class Predicate, class ValueType> -void replace_if(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view, - Predicate pred, const ValueType& new_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl(label, ex, KE::begin(view), KE::end(view), pred, - new_value); -} - -// ------------------- -// copy -// ------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator copy(const ExecutionSpace& ex, InputIterator first, - InputIterator last, OutputIterator d_first) { - return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last, - d_first); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { - return Impl::copy_impl(label, ex, first, last, d_first); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto copy(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl("Kokkos::copy_view_api_default", ex, - KE::cbegin(source), KE::cend(source), KE::begin(dest)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto copy(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source), - KE::begin(dest)); -} - -// ------------------- -// copy_n -// ------------------- -template <class ExecutionSpace, class InputIterator, class Size, - class OutputIterator> -OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count, - OutputIterator result) { - return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first, - count, result); -} - -template <class ExecutionSpace, class InputIterator, class Size, - class OutputIterator> -OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex, - InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl(label, ex, first, count, result); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class Size, class DataType2, class... Properties2> -auto copy_n(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, Size count, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex, - KE::cbegin(source), count, KE::begin(dest)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class Size, class DataType2, class... Properties2> -auto copy_n(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, Size count, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl(label, ex, KE::cbegin(source), count, - KE::begin(dest)); -} - -// ------------------- -// copy_backward -// ------------------- -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default", - ex, first, last, d_last); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 d_last) { - return Impl::copy_backward_impl(label, ex, first, last, d_last); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto copy_backward(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex, - cbegin(source), cend(source), end(dest)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto copy_backward(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source), - end(dest)); -} - -// ------------------- -// copy_if -// ------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class Predicate> -OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first, - InputIterator last, OutputIterator d_first, - Predicate pred) { - return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first, - last, d_first, std::move(pred)); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class Predicate> -OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class Predicate> -auto copy_if(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class Predicate> -auto copy_if(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::copy_if_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); -} - -// ------------------- -// fill -// ------------------- -template <class ExecutionSpace, class IteratorType, class T> -void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last, - const T& value) { - Impl::fill_impl("Kokkos::fill_iterator_api_default", ex, first, last, value); -} - -template <class ExecutionSpace, class IteratorType, class T> -void fill(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl(label, ex, first, last, value); -} - -template <class ExecutionSpace, class DataType, class... Properties, class T> -void fill(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, const T& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - Impl::fill_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view), - value); -} - -template <class ExecutionSpace, class DataType, class... Properties, class T> -void fill(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, const T& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - Impl::fill_impl(label, ex, begin(view), end(view), value); -} - -// ------------------- -// fill_n -// ------------------- -template <class ExecutionSpace, class IteratorType, class SizeType, class T> -IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n, - const T& value) { - return Impl::fill_n_impl("Kokkos::fill_n_iterator_api_default", ex, first, n, - value); -} - -template <class ExecutionSpace, class IteratorType, class SizeType, class T> -IteratorType fill_n(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl(label, ex, first, n, value); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class SizeType, class T> -auto fill_n(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, SizeType n, - const T& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - return Impl::fill_n_impl("Kokkos::fill_n_view_api_default", ex, begin(view), - n, value); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class SizeType, class T> -auto fill_n(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, SizeType n, - const T& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - return Impl::fill_n_impl(label, ex, begin(view), n, value); -} - -// ------------------- -// transform -// ------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class UnaryOperation> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator first1, InputIterator last1, - OutputIterator d_first, UnaryOperation unary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, d_first, std::move(unary_op)); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class UnaryOperation> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator first1, InputIterator last1, OutputIterator d_first, - UnaryOperation unary_op) { - return Impl::transform_impl(label, ex, first1, last1, d_first, - std::move(unary_op)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class UnaryOperation> -auto transform(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest, - UnaryOperation unary_op) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source), end(source), begin(dest), - std::move(unary_op)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class UnaryOperation> -auto transform(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest, - UnaryOperation unary_op) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::transform_impl(label, ex, begin(source), end(source), - begin(dest), std::move(unary_op)); -} - -template <class ExecutionSpace, class InputIterator1, class InputIterator2, - class OutputIterator, class BinaryOperation> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator d_first, - BinaryOperation binary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, first2, d_first, - std::move(binary_op)); -} - -template <class ExecutionSpace, class InputIterator1, class InputIterator2, - class OutputIterator, class BinaryOperation> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator d_first, BinaryOperation binary_op) { - return Impl::transform_impl(label, ex, first1, last1, first2, d_first, - std::move(binary_op)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class DataType3, - class... Properties3, class BinaryOperation> -auto transform(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source1, - const ::Kokkos::View<DataType2, Properties2...>& source2, - ::Kokkos::View<DataType3, Properties3...>& dest, - BinaryOperation binary_op) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source1), end(source1), begin(source2), - begin(dest), std::move(binary_op)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class DataType3, - class... Properties3, class BinaryOperation> -auto transform(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source1, - const ::Kokkos::View<DataType2, Properties2...>& source2, - ::Kokkos::View<DataType3, Properties3...>& dest, - BinaryOperation binary_op) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::transform_impl(label, ex, begin(source1), end(source1), - begin(source2), begin(dest), - std::move(binary_op)); -} - -// ------------------- -// generate -// ------------------- -template <class ExecutionSpace, class IteratorType, class Generator> -void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last, - Generator g) { - Impl::generate_impl("Kokkos::generate_iterator_api_default", ex, first, last, - std::move(g)); -} - -template <class ExecutionSpace, class IteratorType, class Generator> -void generate(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl(label, ex, first, last, std::move(g)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Generator> -void generate(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - Generator g) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - Impl::generate_impl("Kokkos::generate_view_api_default", ex, begin(view), - end(view), std::move(g)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class Generator> -void generate(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - Generator g) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - Impl::generate_impl(label, ex, begin(view), end(view), std::move(g)); -} - -// ------------------- -// generate_n -// ------------------- -template <class ExecutionSpace, class IteratorType, class Size, class Generator> -IteratorType generate_n(const ExecutionSpace& ex, IteratorType first, - Size count, Generator g) { - Impl::generate_n_impl("Kokkos::generate_n_iterator_api_default", ex, first, - count, std::move(g)); - return first + count; -} - -template <class ExecutionSpace, class IteratorType, class Size, class Generator> -IteratorType generate_n(const std::string& label, const ExecutionSpace& ex, - IteratorType first, Size count, Generator g) { - Impl::generate_n_impl(label, ex, first, count, std::move(g)); - return first + count; -} - -template <class ExecutionSpace, class DataType, class... Properties, class Size, - class Generator> -auto generate_n(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, Size count, - Generator g) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - return Impl::generate_n_impl("Kokkos::generate_n_view_api_default", ex, - begin(view), count, std::move(g)); -} - -template <class ExecutionSpace, class DataType, class... Properties, class Size, - class Generator> -auto generate_n(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, Size count, - Generator g) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - return Impl::generate_n_impl(label, ex, begin(view), count, std::move(g)); -} - -} // namespace Experimental -} // namespace Kokkos - -#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp b/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp deleted file mode 100644 index 9d2c85f00d38c97595da35d6833a799ebff36170..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp +++ /dev/null @@ -1,1783 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_MODIFYING_SEQUENCE_OPERATIONS_SET2_HPP -#define KOKKOS_MODIFYING_SEQUENCE_OPERATIONS_SET2_HPP - -#include <Kokkos_Core.hpp> -#include "../Kokkos_BeginEnd.hpp" -#include "../Kokkos_Constraints.hpp" -#include "../Kokkos_ModifyingOperations.hpp" -#include "../Kokkos_NonModifyingSequenceOperations.hpp" - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -//------------------------- -// -// functors -// -//------------------------- - -template <class IndexType, class InputIt, class OutputIt, - class BinaryPredicateType> -struct StdUniqueCopyFunctor { - InputIt m_first_from; - InputIt m_last_from; - OutputIt m_first_dest; - BinaryPredicateType m_pred; - - KOKKOS_FUNCTION - StdUniqueCopyFunctor(InputIt first_from, InputIt last_from, - OutputIt first_dest, BinaryPredicateType pred) - : m_first_from(std::move(first_from)), - m_last_from(std::move(last_from)), - m_first_dest(std::move(first_dest)), - m_pred(std::move(pred)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, IndexType& update, - const bool final_pass) const { - const auto& val_i = m_first_from[i]; - const auto& val_ip1 = m_first_from[i + 1]; - - if (final_pass) { - if (!m_pred(val_i, val_ip1)) { - m_first_dest[update] = val_i; - } - } - - if (!m_pred(val_i, val_ip1)) { - update += 1; - } - } -}; - -template <class InputIterator> -struct StdReverseFunctor { - using index_type = typename InputIterator::difference_type; - static_assert(std::is_signed<index_type>::value, - "Kokkos: StdReverseFunctor requires signed index type"); - - InputIterator m_first; - InputIterator m_last; - - KOKKOS_FUNCTION - void operator()(index_type i) const { - // the swap below is doing the same thing, but - // for Intel 18.0.5 does not work. - // But putting the impl directly here, it works. -#ifdef KOKKOS_COMPILER_INTEL - typename InputIterator::value_type tmp = std::move(m_first[i]); - m_first[i] = std::move(m_last[-i - 1]); - m_last[-i - 1] = std::move(tmp); -#else - ::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]); -#endif - } - - StdReverseFunctor(InputIterator first, InputIterator last) - : m_first(std::move(first)), m_last(std::move(last)) {} -}; - -template <class IndexType, class InputIterator, class OutputIterator> -struct StdReverseCopyFunctor { - static_assert(std::is_signed<IndexType>::value, - "Kokkos: StdReverseCopyFunctor requires signed index type"); - - InputIterator m_last; - OutputIterator m_dest_first; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_first[i] = m_last[-1 - i]; } - - StdReverseCopyFunctor(InputIterator _last, OutputIterator _dest_first) - : m_last(std::move(_last)), m_dest_first(std::move(_dest_first)) {} -}; - -template <class IndexType, class InputIterator, class OutputIterator> -struct StdMoveFunctor { - InputIterator m_first; - OutputIterator m_dest_first; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { - m_dest_first[i] = std::move(m_first[i]); - } - - StdMoveFunctor(InputIterator _first, OutputIterator _dest_first) - : m_first(std::move(_first)), m_dest_first(std::move(_dest_first)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2> -struct StdMoveBackwardFunctor { - static_assert(std::is_signed<IndexType>::value, - "Kokkos: StdMoveBackwardFunctor requires signed index type"); - - IteratorType1 m_last; - IteratorType2 m_dest_last; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { - m_dest_last[-i] = std::move(m_last[-i]); - } - - StdMoveBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) - : m_last(std::move(_last)), m_dest_last(std::move(_dest_last)) {} -}; - -template <class IndexType, class IteratorType1, class IteratorType2> -struct StdSwapRangesFunctor { - IteratorType1 m_first1; - IteratorType2 m_first2; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { - // the swap below is doing the same thing, but - // for Intel 18.0.5 does not work. - // But putting the impl directly here, it works. -#ifdef KOKKOS_COMPILER_INTEL - typename IteratorType1::value_type tmp = std::move(m_first1[i]); - m_first1[i] = std::move(m_first2[i]); - m_first2[i] = std::move(tmp); -#else - ::Kokkos::Experimental::swap(m_first1[i], m_first2[i]); -#endif - } - - KOKKOS_FUNCTION - StdSwapRangesFunctor(IteratorType1 _first1, IteratorType2 _first2) - : m_first1(std::move(_first1)), m_first2(std::move(_first2)) {} -}; - -template <class IndexType, class InputIt, class OutputIt, - class BinaryPredicateType> -struct StdUniqueFunctor { - InputIt m_first_from; - InputIt m_last_from; - OutputIt m_first_dest; - BinaryPredicateType m_pred; - - KOKKOS_FUNCTION - StdUniqueFunctor(InputIt first_from, InputIt last_from, OutputIt first_dest, - BinaryPredicateType pred) - : m_first_from(std::move(first_from)), - m_last_from(std::move(last_from)), - m_first_dest(std::move(first_dest)), - m_pred(std::move(pred)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, IndexType& update, - const bool final_pass) const { - auto& val_i = m_first_from[i]; - const auto& val_ip1 = m_first_from[i + 1]; - - if (final_pass) { - if (!m_pred(val_i, val_ip1)) { - m_first_dest[update] = std::move(val_i); - } - } - - if (!m_pred(val_i, val_ip1)) { - update += 1; - } - } -}; - -template <class IndexType, class InputIterator, class OutputIterator> -struct StdRotateCopyFunctor { - InputIterator m_first; - InputIterator m_last; - InputIterator m_first_n; - OutputIterator m_dest_first; - - KOKKOS_FUNCTION - void operator()(IndexType i) const { - const IndexType shift = m_last - m_first_n; - - if (i < shift) { - m_dest_first[i] = m_first_n[i]; - } else { - m_dest_first[i] = m_first[i - shift]; - } - } - - StdRotateCopyFunctor(InputIterator first, InputIterator last, - InputIterator first_n, OutputIterator dest_first) - : m_first(std::move(first)), - m_last(std::move(last)), - m_first_n(std::move(first_n)), - m_dest_first(std::move(dest_first)) {} -}; - -template <class IndexType, class FirstFrom, class FirstDest, class PredType> -struct StdRemoveIfStage1Functor { - FirstFrom m_first_from; - FirstDest m_first_dest; - PredType m_must_remove; - - KOKKOS_FUNCTION - StdRemoveIfStage1Functor(FirstFrom first_from, FirstDest first_dest, - PredType pred) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_must_remove(std::move(pred)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, IndexType& update, - const bool final_pass) const { - auto& myval = m_first_from[i]; - if (final_pass) { - if (!m_must_remove(myval)) { - // calling move here is ok because we are inside final pass - // we are calling move assign as specified by the std - m_first_dest[update] = std::move(myval); - } - } - - if (!m_must_remove(myval)) { - update += 1; - } - } -}; - -template <class IndexType, class InputIteratorType, class OutputIteratorType> -struct StdRemoveIfStage2Functor { - InputIteratorType m_first_from; - OutputIteratorType m_first_to; - - KOKKOS_FUNCTION - StdRemoveIfStage2Functor(InputIteratorType first_from, - OutputIteratorType first_to) - : m_first_from(std::move(first_from)), m_first_to(std::move(first_to)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i) const { - m_first_to[i] = std::move(m_first_from[i]); - } -}; - -// ------------------------------------------ -// unique_copy_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class PredicateType> -OutputIterator unique_copy_impl(const std::string& label, - const ExecutionSpace& ex, InputIterator first, - InputIterator last, OutputIterator d_first, - PredicateType pred) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first, d_first); - Impl::expect_valid_range(first, last); - - // branch for trivial vs non trivial case - const auto num_elements = Kokkos::Experimental::distance(first, last); - if (num_elements == 0) { - return d_first; - } else if (num_elements == 1) { - return Impl::copy_impl("Kokkos::copy_from_unique_copy", ex, first, last, - d_first); - } else { - // aliases - using index_type = typename InputIterator::difference_type; - using func_type = StdUniqueCopyFunctor<index_type, InputIterator, - OutputIterator, PredicateType>; - - // note here that we run scan for num_elements - 1 - // because of the way we implement this, the last element is always needed. - // We avoid performing checks inside functor that we are within limits - // and run a "safe" scan and then copy the last element. - const auto scan_size = num_elements - 1; - index_type count = 0; - ::Kokkos::parallel_scan(label, - RangePolicy<ExecutionSpace>(ex, 0, scan_size), - func_type(first, last, d_first, pred), count); - - return Impl::copy_impl("Kokkos::copy_from_unique_copy", ex, - first + scan_size, last, d_first + count); - } -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator unique_copy_impl(const std::string& label, - const ExecutionSpace& ex, InputIterator first, - InputIterator last, OutputIterator d_first) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first, d_first); - Impl::expect_valid_range(first, last); - - // aliases - using value_type1 = typename InputIterator::value_type; - using value_type2 = typename OutputIterator::value_type; - - // default binary predicate uses == - using binary_pred_t = StdAlgoEqualBinaryPredicate<value_type1, value_type2>; - - // run - return unique_copy_impl(label, ex, first, last, d_first, binary_pred_t()); -} - -// ------------------------------------------ -// reverse_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator> -void reverse_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - // aliases - using func_t = StdReverseFunctor<InputIterator>; - - // run - if (last >= first + 2) { - // only need half - const auto num_elements = Kokkos::Experimental::distance(first, last) / 2; - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, last)); - ex.fence("Kokkos::reverse: fence after operation"); - } -} - -// ------------------------------------------ -// reverse_copy_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator reverse_copy_impl(const std::string& label, - const ExecutionSpace& ex, InputIterator first, - InputIterator last, OutputIterator d_first) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first, d_first); - Impl::expect_valid_range(first, last); - - // aliases - using index_type = typename InputIterator::difference_type; - using func_t = - StdReverseCopyFunctor<index_type, InputIterator, OutputIterator>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(last, d_first)); - ex.fence("Kokkos::reverse_copy: fence after operation"); - - // return - return d_first + num_elements; -} - -// ------------------------------------------ -// move_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator move_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first, d_first); - Impl::expect_valid_range(first, last); - - // aliases - using index_type = typename InputIterator::difference_type; - using func_t = StdMoveFunctor<index_type, InputIterator, OutputIterator>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, d_first)); - ex.fence("Kokkos::move: fence after operation"); - - // return - return d_first + num_elements; -} - -// ------------------------------------------ -// move_backward_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 move_backward_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 d_last) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_last); - Impl::static_assert_iterators_have_matching_difference_type(first, d_last); - Impl::expect_valid_range(first, last); - - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = - StdMoveBackwardFunctor<index_type, IteratorType1, IteratorType2>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(last, d_last)); - ex.fence("Kokkos::move_backward: fence after operation"); - - // return - return d_last - num_elements; -} - -// ------------------------------------------ -// swap_ranges_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 swap_ranges_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first1, first2); - Impl::static_assert_iterators_have_matching_difference_type(first1, first2); - Impl::expect_valid_range(first1, last1); - - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = StdSwapRangesFunctor<index_type, IteratorType1, IteratorType2>; - - // run - const auto num_elements_to_swap = - Kokkos::Experimental::distance(first1, last1); - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_swap), - func_t(first1, first2)); - ex.fence("Kokkos::swap_ranges: fence after operation"); - - // return - return first2 + num_elements_to_swap; -} - -// ------------------------------------------ -// unique_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class PredicateType> -IteratorType unique_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - PredicateType pred) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - const auto num_elements = Kokkos::Experimental::distance(first, last); - if (num_elements == 0) { - return first; - } else if (num_elements == 1) { - return last; - } else { - // ---------- - // step 1: - // find first location of adjacent equal elements - // ---------- - auto it_found = - ::Kokkos::Experimental::adjacent_find(ex, first, last, pred); - - // if none, all elements are unique, so nothing to do - if (it_found == last) { - return last; - } else { - // if here, we found some equal adjacent elements, - // so count all preceeding unique elements - const auto num_unique_found_in_step_one = it_found - first; - - // ---------- - // step 2: - // ---------- - // since we found some unique elements, we don't need to explore - // the full range [first, last), but only need to focus on the - // remaining range [it_found, last) - const auto num_elements_to_explore = last - it_found; - - // create a tmp view to use to *move* all unique elements - // using the same algorithm used for unique_copy but we now move things - using value_type = typename IteratorType::value_type; - using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; - tmp_view_type tmp_view("std_unique_tmp_view", num_elements_to_explore); - - // scan extent is: num_elements_to_explore - 1 - // for same reason as the one explained in unique_copy - const auto scan_size = num_elements_to_explore - 1; - auto tmp_first = ::Kokkos::Experimental::begin(tmp_view); - using output_it = decltype(tmp_first); - - using index_type = typename IteratorType::difference_type; - using func_type = - StdUniqueFunctor<index_type, IteratorType, output_it, PredicateType>; - index_type count = 0; - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, scan_size), - func_type(it_found, last, tmp_first, pred), count); - - // move last element too, for the same reason as the unique_copy - auto unused_r = - Impl::move_impl("Kokkos::move_from_unique", ex, it_found + scan_size, - last, tmp_first + count); - (void)unused_r; // r1 not used - - // ---------- - // step 3 - // ---------- - // move back from tmp to original range, - // ensuring we start overwriting after the original unique found - using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); - using step3_func_t = - StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; - - ::Kokkos::parallel_for( - "unique_step3_parfor", - RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), - step3_func_t(begin(tmp_view), - (first + num_unique_found_in_step_one))); - - ex.fence("Kokkos::unique: fence after operation"); - - // return iterator to one passed the last written - // (the +1 is needed to account for the last element, see above) - return (first + num_unique_found_in_step_one + count + 1); - } - } -} - -template <class ExecutionSpace, class IteratorType> -IteratorType unique_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { - using value_type = typename IteratorType::value_type; - using binary_pred_t = StdAlgoEqualBinaryPredicate<value_type>; - return unique_impl(label, ex, first, last, binary_pred_t()); -} - -// ------------------------------------------ -// rotate_copy_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator rotate_copy_impl(const std::string& label, - const ExecutionSpace& ex, InputIterator first, - InputIterator n_first, InputIterator last, - OutputIterator d_first) { - /* - algorithm is implemented as follows: - - first n_first last - | | | - o o o o o o o o o o o o - - dest+0 -> first_n - dest+1 -> first_n+1 - dest+2 -> first_n+2 - dest+3 -> first - dest+4 -> first+1 - dest+5 -> first+2 - dest+6 -> first+3 - dest+7 -> first+4 - dest+8 -> first+5 - ... - let shift = last - first_n; - - then we have: - if (i < shift){ - *(dest_first + i) = *(first_n + i); - } - else{ - *(dest_first + i) = *(from + i - shift); - } - */ - - // checks - Impl::static_assert_random_access_and_accessible(ex, first, d_first); - Impl::static_assert_iterators_have_matching_difference_type(first, d_first); - Impl::expect_valid_range(first, last); - Impl::expect_valid_range(first, n_first); - Impl::expect_valid_range(n_first, last); - - if (first == last) { - return d_first; - } - - // aliases - using index_type = typename InputIterator::difference_type; - using func_type = - StdRotateCopyFunctor<index_type, InputIterator, OutputIterator>; - - // run - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(first, last, n_first, d_first)); - - ex.fence("Kokkos::rotate_copy: fence after operation"); - - // return - return d_first + num_elements; -} - -// ------------------------------------------ -// rotate_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType> -IteratorType rotate_with_pivot_in_left_half(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType n_first, - IteratorType last) { - /* - This impl is specific for when the n_first iterator points to - an element that is before or equal to the middle of the range. - - If we have: - - | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 | * - ^ ^ mid ^ - first n_first last - - In step 1, we create a temporary view with extent = distance(n_first, last) - and *move* the elements from [n_first, last) to tmp view, such that - tmp view becomes: - - | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 | - - In step 2, we move the elements in [first, n_first) - to the new position where they are supposed to end up. - - In step 3, we move the elements from the tmp view to - the range starting at first. - */ - - namespace KE = ::Kokkos::Experimental; - const auto num_elements_on_left = KE::distance(first, n_first); - const auto num_elements_on_right = KE::distance(n_first, last); - - // create helper tmp view - using value_type = typename IteratorType::value_type; - using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; - tmp_view_type tmp_view("rotate_impl_for_pivot_in_left_half_impl", - num_elements_on_right); - using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); - - // index_type is the same and needed in all steps - using index_type = typename IteratorType::difference_type; - - // stage 1 - using step1_func_type = - StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>; - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_right), - step1_func_type(n_first, begin(tmp_view))); - - // stage 2 - using step2_func_type = - StdMoveFunctor<index_type, IteratorType, IteratorType>; - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_left), - step2_func_type(first, first + num_elements_on_right)); - - // step 3 - using step3_func_type = - StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), - step3_func_type(begin(tmp_view), first)); - - ex.fence("Kokkos::rotate: fence after operation"); - return first + (last - n_first); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType rotate_with_pivot_in_right_half(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType n_first, - IteratorType last) { - /* - This impl is specific for when the n_first iterator points to - an element that is after the middle of the range. - - If we have: - - | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | -6 | -5 | 8 | 9 | 11 | * - ^ mid ^ ^ - first n_first last - - In step 1, we create a temporary view with extent = distance(first, n_first) - and *move* the elements from [first, n_first) to tmp view, - such that tmp view becomes: - - | 0 | 1 | 2 | 1 | 4 | 5 | 2 | 2 | 10 | -3 | 1 | - - In step 2, we move the elements in [n_first, last) - to the beginning where they are supposed to end up. - - In step 3, we move the elements from the tmp view to - the range starting at first. - */ - - namespace KE = ::Kokkos::Experimental; - const auto num_elements_on_left = KE::distance(first, n_first); - const auto num_elements_on_right = KE::distance(n_first, last); - - // create helper tmp view - using value_type = typename IteratorType::value_type; - using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; - tmp_view_type tmp_view("rotate_impl_for_pivot_in_left_half_impl", - num_elements_on_left); - using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); - - // index_type is the same and needed in all steps - using index_type = typename IteratorType::difference_type; - - // stage 1 - using step1_func_type = - StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>; - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_left), - step1_func_type(first, begin(tmp_view))); - - // stage 2 - using step2_func_type = - StdMoveFunctor<index_type, IteratorType, IteratorType>; - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_on_right), - step2_func_type(n_first, first)); - - // step 3: - using step3_func_type = - StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), - step3_func_type(begin(tmp_view), first + num_elements_on_right)); - - ex.fence("Kokkos::rotate: fence after operation"); - return first + (last - n_first); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType rotate_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType n_first, - IteratorType last) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - Impl::expect_valid_range(first, n_first); - Impl::expect_valid_range(n_first, last); - - namespace KE = ::Kokkos::Experimental; - const auto num_elements = KE::distance(first, last); - const auto n_distance_from_first = KE::distance(first, n_first); - if (n_distance_from_first <= num_elements / 2) { - return rotate_with_pivot_in_left_half(label, ex, first, n_first, last); - } else { - return rotate_with_pivot_in_right_half(label, ex, first, n_first, last); - } -} - -// ------------------------------------------ -// remove_if_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class UnaryPredicateType> -IteratorType remove_if_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - UnaryPredicateType pred) { - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - - if (first == last) { - return last; - } else { - // create tmp buffer to use to *move* all elements that we need to keep. - // note that the tmp buffer is just large enought to store - // all elements to keep, because ideally we do not need/want one - // as large as the original range. - // To allocate the right tmp view, we need a call to count_if. - // We could just do a "safe" allocation of a buffer as - // large as (last-first), but I think a call to count_if is more afforable. - - // count how many elements we need to keep - // note that the elements to remove are those that meet the predicate - const auto remove_count = - ::Kokkos::Experimental::count_if(ex, first, last, pred); - const auto keep_count = - Kokkos::Experimental::distance(first, last) - remove_count; - - // create helper tmp view - using value_type = typename IteratorType::value_type; - using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; - tmp_view_type tmp_view("std_remove_if_tmp_view", keep_count); - using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); - - // in stage 1, *move* all elements to keep from original range to tmp - // we use similar impl as copy_if except that we *move* rather than copy - using index_type = typename IteratorType::difference_type; - using func1_type = StdRemoveIfStage1Functor<index_type, IteratorType, - tmp_readwrite_iterator_type, - UnaryPredicateType>; - - const auto scan_num_elements = Kokkos::Experimental::distance(first, last); - index_type scan_count = 0; - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, scan_num_elements), - func1_type(first, begin(tmp_view), pred), scan_count); - - // scan_count should be equal to keep_count - assert(scan_count == keep_count); - (void)scan_count; // to avoid unused complaints - - // stage 2, we do parfor to move from tmp to original range - using func2_type = - StdRemoveIfStage2Functor<index_type, tmp_readwrite_iterator_type, - IteratorType>; - ::Kokkos::parallel_for( - "remove_if_stage2_parfor", - RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), - func2_type(begin(tmp_view), first)); - ex.fence("Kokkos::remove_if: fence after stage2"); - - // return - return first + keep_count; - } -} - -// ------------------------------------------ -// remove_impl -// ------------------------------------------ -template <class ExecutionSpace, class IteratorType, class ValueType> -auto remove_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - const ValueType& value) { - using predicate_type = StdAlgoEqualsValUnaryPredicate<ValueType>; - return remove_if_impl(label, ex, first, last, predicate_type(value)); -} - -// ------------------------------------------ -// remove_copy_impl -// ------------------------------------------ -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType> -auto remove_copy_impl(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, const ValueType& value) { - // this is like copy_if except that we need to *ignore* the elements - // that match the value, so we can solve this as follows: - - using predicate_type = StdAlgoNotEqualsValUnaryPredicate<ValueType>; - return ::Kokkos::Experimental::copy_if(label, ex, first_from, last_from, - first_dest, predicate_type(value)); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class UnaryPredicate> -auto remove_copy_if_impl(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - const UnaryPredicate& pred) { - // this is like copy_if except that we need to *ignore* the elements - // satisfying the pred, so we can solve this as follows: - - using value_type = typename InputIteratorType::value_type; - using pred_wrapper_type = - StdAlgoNegateUnaryPredicateWrapper<value_type, UnaryPredicate>; - return ::Kokkos::Experimental::copy_if(label, ex, first_from, last_from, - first_dest, pred_wrapper_type(pred)); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType shift_left_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - typename IteratorType::difference_type n) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - KOKKOS_EXPECTS(n >= 0); - - // handle trivial cases - if (n == 0) { - return last; - } - - if (n >= Kokkos::Experimental::distance(first, last)) { - return first; - } - - /* - Suppose that n = 5, and our [first,last) spans: - - | 0 | 1 | 2 | 1 | 2 | 1 | 2 | 2 | 10 | -3 | 1 | -6 | * - ^ ^ - first last - - shift_left modifies the range such that we have this data: - | 1 | 2 | 2 | 10 | -3 | 1 | -6 | x | x | x | x | x | * - ^ - return it pointing here - - - and returns an iterator pointing to one past the new end. - Note: elements marked x are in undefined state because have been moved. - - We implement this in two steps: - step 1: - we create a temporary view with extent = distance(first+n, last) - and *move* assign the elements from [first+n, last) to tmp view, such that - tmp view becomes: - - | 1 | 2 | 2 | 10 | -3 | 1 | -6 | - - step 2: - move elements of tmp view back to range starting at first. - */ - - const auto num_elements_to_move = - ::Kokkos::Experimental::distance(first + n, last); - - // create tmp view - using value_type = typename IteratorType::value_type; - using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; - tmp_view_type tmp_view("shift_left_impl", num_elements_to_move); - using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); - - using index_type = typename IteratorType::difference_type; - - // step 1 - using step1_func_type = - StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>; - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_move), - step1_func_type(first + n, begin(tmp_view))); - - // step 2 - using step2_func_type = - StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), - step2_func_type(begin(tmp_view), first)); - - ex.fence("Kokkos::shift_left: fence after operation"); - - return last - n; -} - -template <class ExecutionSpace, class IteratorType> -IteratorType shift_right_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, - typename IteratorType::difference_type n) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first); - Impl::expect_valid_range(first, last); - KOKKOS_EXPECTS(n >= 0); - - // handle trivial cases - if (n == 0) { - return first; - } - - if (n >= Kokkos::Experimental::distance(first, last)) { - return last; - } - - /* - Suppose that n = 3, and [first,last) spans: - - | 0 | 1 | 2 | 1 | 2 | 1 | 2 | 2 | 10 | -3 | 1 | -6 | * - ^ ^ - first last - - shift_right modifies the range such that we have this data: - | x | x | x | 0 | 1 | 2 | 1 | 2 | 1 | 2 | 2 | 10 | * - ^ - return it points here - - and returns an iterator pointing to the new beginning. - Note: elements marked x are in undefined state because have been moved. - - We implement this in two steps: - step 1: - we create a temporary view with extent = distance(first, last-n) - and *move* assign the elements from [first, last-n) to tmp view, such that - tmp view becomes: - - | 0 | 1 | 2 | 1 | 2 | 1 | 2 | 2 | 10 | - - step 2: - move elements of tmp view back to range starting at first+n. - */ - - const auto num_elements_to_move = - ::Kokkos::Experimental::distance(first, last - n); - - // create tmp view - using value_type = typename IteratorType::value_type; - using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>; - tmp_view_type tmp_view("shift_right_impl", num_elements_to_move); - using tmp_readwrite_iterator_type = decltype(begin(tmp_view)); - - using index_type = typename IteratorType::difference_type; - - // step 1 - using step1_func_type = - StdMoveFunctor<index_type, IteratorType, tmp_readwrite_iterator_type>; - ::Kokkos::parallel_for( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements_to_move), - step1_func_type(first, begin(tmp_view))); - - // step 2 - using step2_func_type = - StdMoveFunctor<index_type, tmp_readwrite_iterator_type, IteratorType>; - ::Kokkos::parallel_for(label, - RangePolicy<ExecutionSpace>(ex, 0, tmp_view.extent(0)), - step2_func_type(begin(tmp_view), first + n)); - - ex.fence("Kokkos::shift_right: fence after operation"); - - return first + n; -} - -} // namespace Impl - -// ------------------- -// reverse_copy -// ------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator reverse_copy(const ExecutionSpace& ex, InputIterator first, - InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl("Kokkos::reverse_copy_iterator_api_default", - ex, first, last, d_first); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator reverse_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { - return Impl::reverse_copy_impl(label, ex, first, last, d_first); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto reverse_copy(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::reverse_copy_impl("Kokkos::reverse_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto reverse_copy(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::reverse_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest)); -} - -// ------------------- -// reverse -// ------------------- -template <class ExecutionSpace, class InputIterator> -void reverse(const ExecutionSpace& ex, InputIterator first, - InputIterator last) { - return Impl::reverse_impl("Kokkos::reverse_iterator_api_default", ex, first, - last); -} - -template <class ExecutionSpace, class InputIterator> -void reverse(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last) { - return Impl::reverse_impl(label, ex, first, last); -} - -template <class ExecutionSpace, class DataType, class... Properties> -void reverse(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl("Kokkos::reverse_view_api_default", ex, - KE::begin(view), KE::end(view)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -void reverse(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl(label, ex, KE::begin(view), KE::end(view)); -} - -// ---------------------- -// move -// ---------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator move(const ExecutionSpace& ex, InputIterator first, - InputIterator last, OutputIterator d_first) { - return Impl::move_impl("Kokkos::move_iterator_api_default", ex, first, last, - d_first); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator move(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { - return Impl::move_impl(label, ex, first, last, d_first); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto move(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::move_impl("Kokkos::move_view_api_default", ex, begin(source), - end(source), begin(dest)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto move(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::move_impl(label, ex, begin(source), end(source), begin(dest)); -} - -// ------------------- -// move_backward -// ------------------- -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 move_backward(const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl("Kokkos::move_backward_iterator_api_default", - ex, first, last, d_last); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto move_backward(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::move_backward_impl("Kokkos::move_backward_view_api_default", ex, - begin(source), end(source), end(dest)); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 move_backward(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 d_last) { - return Impl::move_backward_impl(label, ex, first, last, d_last); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto move_backward(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::move_backward_impl(label, ex, begin(source), end(source), - end(dest)); -} - -// ---------------------- -// swap_ranges -// ---------------------- -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 swap_ranges(const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl("Kokkos::swap_ranges_iterator_api_default", ex, - first1, last1, first2); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto swap_ranges(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl("Kokkos::swap_ranges_view_api_default", ex, - begin(source), end(source), begin(dest)); -} - -template <class ExecutionSpace, class IteratorType1, class IteratorType2> -IteratorType2 swap_ranges(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { - return Impl::swap_ranges_impl(label, ex, first1, last1, first2); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto swap_ranges(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl(label, ex, begin(source), end(source), - begin(dest)); -} - -// ------------------- -// unique -// ------------------- -// note: the enable_if below is to avoid "call to ... is ambiguous" -// for example in the unit test when using a variadic function - -// overload set1 -template <class ExecutionSpace, class IteratorType> -std::enable_if_t<!::Kokkos::is_view<IteratorType>::value, IteratorType> unique( - const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last); -} - -template <class ExecutionSpace, class IteratorType> -std::enable_if_t<!::Kokkos::is_view<IteratorType>::value, IteratorType> unique( - const std::string& label, const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::unique_impl(label, ex, first, last); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto unique(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique("Kokkos::unique_view_api_default", ex, - begin(view), end(view)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto unique(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique(label, ex, begin(view), end(view)); -} - -// overload set2 -template <class ExecutionSpace, class IteratorType, class BinaryPredicate> -IteratorType unique(const ExecutionSpace& ex, IteratorType first, - IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last, pred); -} - -template <class ExecutionSpace, class IteratorType, class BinaryPredicate> -IteratorType unique(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - BinaryPredicate pred) { - return Impl::unique_impl(label, ex, first, last, pred); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class BinaryPredicate> -auto unique(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - BinaryPredicate pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl("Kokkos::unique_view_api_default", ex, begin(view), - end(view), std::move(pred)); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class BinaryPredicate> -auto unique(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - BinaryPredicate pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl(label, ex, begin(view), end(view), std::move(pred)); -} - -// ------------------- -// unique_copy -// ------------------- -// note: the enable_if below is to avoid "call to ... is ambiguous" -// for example in the unit test when using a variadic function - -// overload set1 -template <class ExecutionSpace, class InputIterator, class OutputIterator> -std::enable_if_t<!::Kokkos::is_view<InputIterator>::value, OutputIterator> -unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, - OutputIterator d_first) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator> -std::enable_if_t<!::Kokkos::is_view<InputIterator>::value, OutputIterator> -unique_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::unique_copy_impl(label, ex, first, last, d_first); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto unique_copy(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - const ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return ::Kokkos::Experimental::unique_copy( - "Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source), - begin(dest)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto unique_copy(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - const ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return ::Kokkos::Experimental::unique_copy(label, ex, cbegin(source), - cend(source), begin(dest)); -} - -// overload set2 -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class BinaryPredicate> -OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, - InputIterator last, OutputIterator d_first, - BinaryPredicate pred) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first, pred); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class BinaryPredicate> -OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl(label, ex, first, last, d_first, pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicate> -auto unique_copy(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - const ::Kokkos::View<DataType2, Properties2...>& dest, - BinaryPredicate pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::unique_copy_impl("Kokkos::unique_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryPredicate> -auto unique_copy(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - const ::Kokkos::View<DataType2, Properties2...>& dest, - BinaryPredicate pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::unique_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); -} - -// ------------------- -// rotate -// ------------------- - -template <class ExecutionSpace, class IteratorType> -IteratorType rotate(const ExecutionSpace& ex, IteratorType first, - IteratorType n_first, IteratorType last) { - return Impl::rotate_impl("Kokkos::rotate_iterator_api_default", ex, first, - n_first, last); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType rotate(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType n_first, - IteratorType last) { - return Impl::rotate_impl(label, ex, first, n_first, last); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto rotate(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - std::size_t n_location) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl("Kokkos::rotate_view_api_default", ex, begin(view), - begin(view) + n_location, end(view)); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto rotate(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - std::size_t n_location) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl(label, ex, begin(view), begin(view) + n_location, - end(view)); -} - -// ------------------- -// rotate_copy -// ------------------- - -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator rotate_copy(const ExecutionSpace& ex, InputIterator first, - InputIterator n_first, InputIterator last, - OutputIterator d_first) { - return Impl::rotate_copy_impl("Kokkos::rotate_copy_iterator_api_default", ex, - first, n_first, last, d_first); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator> -OutputIterator rotate_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator n_first, - InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl(label, ex, first, n_first, last, d_first); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto rotate_copy(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - std::size_t n_location, - const ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::rotate_copy_impl("Kokkos::rotate_copy_view_api_default", ex, - cbegin(source), cbegin(source) + n_location, - cend(source), begin(dest)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto rotate_copy(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& source, - std::size_t n_location, - const ::Kokkos::View<DataType2, Properties2...>& dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - - return Impl::rotate_copy_impl(label, ex, cbegin(source), - cbegin(source) + n_location, cend(source), - begin(dest)); -} - -// ------------------- -// remove_if -// ------------------- -template <class ExecutionSpace, class Iterator, class UnaryPredicate> -Iterator remove_if(const ExecutionSpace& ex, Iterator first, Iterator last, - UnaryPredicate pred) { - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - first, last, pred); -} - -template <class ExecutionSpace, class Iterator, class UnaryPredicate> -Iterator remove_if(const std::string& label, const ExecutionSpace& ex, - Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl(label, ex, first, last, pred); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class UnaryPredicate> -auto remove_if(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - UnaryPredicate pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class UnaryPredicate> -auto remove_if(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - UnaryPredicate pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); -} - -// ------------------- -// remove -// ------------------- -template <class ExecutionSpace, class Iterator, class ValueType> -Iterator remove(const ExecutionSpace& ex, Iterator first, Iterator last, - const ValueType& value) { - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, first, - last, value); -} - -template <class ExecutionSpace, class Iterator, class ValueType> -Iterator remove(const std::string& label, const ExecutionSpace& ex, - Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl(label, ex, first, last, value); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class ValueType> -auto remove(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - const ValueType& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); -} - -template <class ExecutionSpace, class DataType, class... Properties, - class ValueType> -auto remove(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - const ValueType& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); -} - -// ------------------- -// remove_copy -// ------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class ValueType> -OutputIterator remove_copy(const ExecutionSpace& ex, InputIterator first_from, - InputIterator last_from, OutputIterator first_dest, - const ValueType& value) { - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - first_from, last_from, first_dest, value); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class ValueType> -OutputIterator remove_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first_from, InputIterator last_from, - OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl(label, ex, first_from, last_from, first_dest, - value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType> -auto remove_copy(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - const ValueType& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - ::Kokkos::Experimental::cbegin(view_from), - ::Kokkos::Experimental::cend(view_from), - ::Kokkos::Experimental::begin(view_dest), - value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType> -auto remove_copy(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - const ValueType& value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - - return Impl::remove_copy_impl( - label, ex, ::Kokkos::Experimental::cbegin(view_from), - ::Kokkos::Experimental::cend(view_from), - ::Kokkos::Experimental::begin(view_dest), value); -} - -// ------------------- -// remove_copy_if -// ------------------- -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class UnaryPredicate> -OutputIterator remove_copy_if(const ExecutionSpace& ex, - InputIterator first_from, InputIterator last_from, - OutputIterator first_dest, - const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl( - "Kokkos::remove_copy_if_iterator_api_default", ex, first_from, last_from, - first_dest, pred); -} - -template <class ExecutionSpace, class InputIterator, class OutputIterator, - class UnaryPredicate> -OutputIterator remove_copy_if(const std::string& label, - const ExecutionSpace& ex, - InputIterator first_from, InputIterator last_from, - OutputIterator first_dest, - const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl(label, ex, first_from, last_from, first_dest, - pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class UnaryPredicate> -auto remove_copy_if(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - const UnaryPredicate& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - - return Impl::remove_copy_if_impl( - "Kokkos::remove_copy_if_iterator_api_default", ex, - ::Kokkos::Experimental::cbegin(view_from), - ::Kokkos::Experimental::cend(view_from), - ::Kokkos::Experimental::begin(view_dest), pred); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class UnaryPredicate> -auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - const UnaryPredicate& pred) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - - return Impl::remove_copy_if_impl( - label, ex, ::Kokkos::Experimental::cbegin(view_from), - ::Kokkos::Experimental::cend(view_from), - ::Kokkos::Experimental::begin(view_dest), pred); -} - -// ------------------- -// shift_left -// ------------------- -template <class ExecutionSpace, class IteratorType> -IteratorType shift_left(const ExecutionSpace& ex, IteratorType first, - IteratorType last, - typename IteratorType::difference_type n) { - return Impl::shift_left_impl("Kokkos::shift_left_iterator_api_default", ex, - first, last, n); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType shift_left(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - typename IteratorType::difference_type n) { - return Impl::shift_left_impl(label, ex, first, last, n); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto shift_left(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - typename decltype(begin(view))::difference_type n) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl("Kokkos::shift_left_view_api_default", ex, - begin(view), end(view), n); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto shift_left(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - typename decltype(begin(view))::difference_type n) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl(label, ex, begin(view), end(view), n); -} - -// ------------------- -// shift_right -// ------------------- -template <class ExecutionSpace, class IteratorType> -IteratorType shift_right(const ExecutionSpace& ex, IteratorType first, - IteratorType last, - typename IteratorType::difference_type n) { - return Impl::shift_right_impl("Kokkos::shift_right_iterator_api_default", ex, - first, last, n); -} - -template <class ExecutionSpace, class IteratorType> -IteratorType shift_right(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - typename IteratorType::difference_type n) { - return Impl::shift_right_impl(label, ex, first, last, n); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto shift_right(const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - typename decltype(begin(view))::difference_type n) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl("Kokkos::shift_right_view_api_default", ex, - begin(view), end(view), n); -} - -template <class ExecutionSpace, class DataType, class... Properties> -auto shift_right(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType, Properties...>& view, - typename decltype(begin(view))::difference_type n) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl(label, ex, begin(view), end(view), n); -} - -} // namespace Experimental -} // namespace Kokkos - -#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_ExclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_ExclusiveScan.hpp deleted file mode 100644 index 62ebbec427e2b0d5e8159d703f117ec0a7ca4b06..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_ExclusiveScan.hpp +++ /dev/null @@ -1,517 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_STD_NUMERICS_EXCLUSIVE_SCAN_HPP -#define KOKKOS_STD_NUMERICS_EXCLUSIVE_SCAN_HPP - -#include <Kokkos_Core.hpp> -#include "../Kokkos_BeginEnd.hpp" -#include "../Kokkos_Constraints.hpp" -#include "../Kokkos_Distance.hpp" -#include "../Kokkos_ModifyingOperations.hpp" -#include "../Kokkos_ValueWrapperForNoNeutralElement.hpp" -#include "Kokkos_IdentityReferenceUnaryFunctor.hpp" - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, - class FirstDest> -struct ExclusiveScanDefaultFunctor { - using execution_space = ExeSpace; - using value_type = - ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement<ValueType>; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctor(ValueType init, FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - if (final_pass) { - if (i == 0) { - m_first_dest[i] = m_init_value; - } else { - m_first_dest[i] = update.val + m_init_value; - } - } - - const auto tmp = value_type{m_first_from[i], false}; - this->join(update, tmp); - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { - if (update.is_initial) { - update.val = input.val; - update.is_initial = false; - } else { - update.val = update.val + input.val; - } - } -}; - -template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, - class FirstDest, class BinaryOpType, class UnaryOpType> -struct TransformExclusiveScanFunctor { - using execution_space = ExeSpace; - using value_type = - ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement<ValueType>; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - BinaryOpType m_binary_op; - UnaryOpType m_unary_op; - - KOKKOS_FUNCTION - TransformExclusiveScanFunctor(ValueType init, FirstFrom first_from, - FirstDest first_dest, BinaryOpType bop, - UnaryOpType uop) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_binary_op(std::move(bop)), - m_unary_op(std::move(uop)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - if (final_pass) { - if (i == 0) { - // for both ExclusiveScan and TransformExclusiveScan, - // init is unmodified - m_first_dest[i] = m_init_value; - } else { - m_first_dest[i] = m_binary_op(update.val, m_init_value); - } - } - - const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; - this->join(update, tmp); - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { - if (update.is_initial) { - update.val = input.val; - } else { - update.val = m_binary_op(update.val, input.val); - } - update.is_initial = false; - } -}; - -// -------------------------------------------------- -// exclusive_scan_custom_op_impl -// -------------------------------------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType, class BinaryOpType> -OutputIteratorType exclusive_scan_custom_op_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>; - using func_type = - TransformExclusiveScanFunctor<ExecutionSpace, index_type, ValueType, - InputIteratorType, OutputIteratorType, - BinaryOpType, unary_op_type>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(init_value, first_from, first_dest, bop, unary_op_type())); - ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - - // return - return first_dest + num_elements; -} - -// -------------------------------------------------- -// transform_exclusive_scan_impl -// -------------------------------------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType, class BinaryOpType, - class UnaryOpType> -OutputIteratorType transform_exclusive_scan_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop, - UnaryOpType uop) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using func_type = - TransformExclusiveScanFunctor<ExecutionSpace, index_type, ValueType, - InputIteratorType, OutputIteratorType, - BinaryOpType, UnaryOpType>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(init_value, first_from, first_dest, bop, uop)); - ex.fence("Kokkos::transform_exclusive_scan: fence after operation"); - - // return - return first_dest + num_elements; -} - -// -------------------------------------------------- -// exclusive_scan_default_op_impl -// -------------------------------------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType> -OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - ValueType init_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // does it make sense to do this static_assert too? - // using input_iterator_value_type = typename InputIteratorType::value_type; - // static_assert - // (std::is_convertible<std::remove_cv_t<input_iterator_value_type>, - // ValueType>::value, - // "exclusive_scan: InputIteratorType::value_type not convertible to - // ValueType"); - - // we are unnecessarily duplicating code, but this is on purpose - // so that we can use the default_op for OpenMPTarget. - // Originally, I had this implemented as: - // ''' - // using bop_type = StdExclusiveScanDefaultJoinFunctor<ValueType>; - // call exclusive_scan_custom_op_impl(..., bop_type()); - // ''' - // which avoids duplicating the functors, but for OpenMPTarget - // I cannot use a custom binary op. - // This is the same problem that occurs for reductions. - - // aliases - using index_type = typename InputIteratorType::difference_type; - using func_type = - ExclusiveScanDefaultFunctor<ExecutionSpace, index_type, ValueType, - InputIteratorType, OutputIteratorType>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(init_value, first_from, first_dest)); - ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); - - return first_dest + num_elements; -} - -} // end namespace Impl - -/////////////////////////////// -// -// exclusive scan API -// -/////////////////////////////// - -// overload set 1 -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value) { - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl( - "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last, - first_dest, init_value); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value) { - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl(label, ex, first, last, - first_dest, init_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType> -auto exclusive_scan(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - ValueType init_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl( - "Kokkos::exclusive_scan_default_functors_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType> -auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - ValueType init_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), init_value); -} - -// overload set 2 -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType, class BinaryOpType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType bop) { - Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl( - "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, init_value, bop); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType, class BinaryOpType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType bop) { - Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest, - init_value, bop); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType, - class BinaryOpType> -auto exclusive_scan(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - ValueType init_value, BinaryOpType bop) { - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( - "Kokkos::exclusive_scan_custom_functors_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, bop); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType, - class BinaryOpType> -auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - ValueType init_value, BinaryOpType bop) { - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( - label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, bop); -} - -////////////////////////////////////// -// -// transform_exclusive_scan public API -// -////////////////////////////////////// - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType, class BinaryOpType, - class UnaryOpType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType binary_op, - UnaryOpType unary_op) { - Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl( - "Kokkos::transform_exclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, init_value, binary_op, unary_op); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class ValueType, class BinaryOpType, - class UnaryOpType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType binary_op, UnaryOpType unary_op) { - Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl(label, ex, first, last, first_dest, - init_value, binary_op, unary_op); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType, - class BinaryOpType, class UnaryOpType> -auto transform_exclusive_scan( - const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( - "Kokkos::transform_exclusive_scan_custom_functors_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, binary_op, unary_op); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class ValueType, - class BinaryOpType, class UnaryOpType> -auto transform_exclusive_scan( - const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible<ValueType>::value, - "ValueType must be move constructible."); - namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( - label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, binary_op, unary_op); -} - -} // namespace Experimental -} // namespace Kokkos - -#endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_InclusiveScan.hpp b/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_InclusiveScan.hpp deleted file mode 100644 index cdafc818697a3c9578b581b308e5d22a733a3df4..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_InclusiveScan.hpp +++ /dev/null @@ -1,699 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_STD_NUMERICS_INCLUSIVE_SCAN_HPP -#define KOKKOS_STD_NUMERICS_INCLUSIVE_SCAN_HPP - -#include <Kokkos_Core.hpp> -#include "../Kokkos_BeginEnd.hpp" -#include "../Kokkos_Constraints.hpp" -#include "../Kokkos_Distance.hpp" -#include "../Kokkos_ModifyingOperations.hpp" -#include "../Kokkos_ValueWrapperForNoNeutralElement.hpp" -#include "Kokkos_IdentityReferenceUnaryFunctor.hpp" - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, - class FirstDest> -struct InclusiveScanDefaultFunctor { - using execution_space = ExeSpace; - using value_type = ValueWrapperForNoNeutralElement<ValueType>; - - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - InclusiveScanDefaultFunctor(FirstFrom first_from, FirstDest first_dest) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - const auto tmp = value_type{m_first_from[i], false}; - this->join(update, tmp); - - if (final_pass) { - m_first_dest[i] = update.val; - } - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { - if (update.is_initial) { - update.val = input.val; - } else { - update.val = update.val + input.val; - } - update.is_initial = false; - } -}; - -template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, - class FirstDest, class BinaryOpType, class UnaryOpType> -struct TransformInclusiveScanNoInitValueFunctor { - using execution_space = ExeSpace; - using value_type = ValueWrapperForNoNeutralElement<ValueType>; - - FirstFrom m_first_from; - FirstDest m_first_dest; - BinaryOpType m_binary_op; - UnaryOpType m_unary_op; - - KOKKOS_FUNCTION - TransformInclusiveScanNoInitValueFunctor(FirstFrom first_from, - FirstDest first_dest, - BinaryOpType bop, UnaryOpType uop) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_binary_op(std::move(bop)), - m_unary_op(std::move(uop)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; - this->join(update, tmp); - if (final_pass) { - m_first_dest[i] = update.val; - } - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { - if (update.is_initial) { - update.val = input.val; - } else { - update.val = m_binary_op(update.val, input.val); - } - update.is_initial = false; - } -}; - -template <class ExeSpace, class IndexType, class ValueType, class FirstFrom, - class FirstDest, class BinaryOpType, class UnaryOpType> -struct TransformInclusiveScanWithInitValueFunctor { - using execution_space = ExeSpace; - using value_type = ValueWrapperForNoNeutralElement<ValueType>; - - FirstFrom m_first_from; - FirstDest m_first_dest; - BinaryOpType m_binary_op; - UnaryOpType m_unary_op; - ValueType m_init; - - KOKKOS_FUNCTION - TransformInclusiveScanWithInitValueFunctor(FirstFrom first_from, - FirstDest first_dest, - BinaryOpType bop, UnaryOpType uop, - ValueType init) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_binary_op(std::move(bop)), - m_unary_op(std::move(uop)), - m_init(std::move(init)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; - this->join(update, tmp); - - if (final_pass) { - m_first_dest[i] = m_binary_op(update.val, m_init); - } - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { - if (update.is_initial) { - update.val = input.val; - } else { - update.val = m_binary_op(update.val, input.val); - } - update.is_initial = false; - } -}; - -// ------------------------------------------------------------- -// inclusive_scan_default_op_impl -// ------------------------------------------------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType> -OutputIteratorType inclusive_scan_default_op_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using value_type = - std::remove_const_t<typename InputIteratorType::value_type>; - using func_type = - InclusiveScanDefaultFunctor<ExecutionSpace, index_type, value_type, - InputIteratorType, OutputIteratorType>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(first_from, first_dest)); - ex.fence("Kokkos::inclusive_scan_default_op: fence after operation"); - - // return - return first_dest + num_elements; -} - -// ------------------------------------------------------------- -// inclusive_scan_custom_binary_op_impl -// ------------------------------------------------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOpType> -OutputIteratorType inclusive_scan_custom_binary_op_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, BinaryOpType binary_op) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using value_type = - std::remove_const_t<typename InputIteratorType::value_type>; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>; - using func_type = TransformInclusiveScanNoInitValueFunctor< - ExecutionSpace, index_type, value_type, InputIteratorType, - OutputIteratorType, BinaryOpType, unary_op_type>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(first_from, first_dest, binary_op, unary_op_type())); - ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); - - // return - return first_dest + num_elements; -} - -// ------------------------------------------------------------- -// inclusive_scan_custom_binary_op_impl with init_value -// ------------------------------------------------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOpType, class ValueType> -OutputIteratorType inclusive_scan_custom_binary_op_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, BinaryOpType binary_op, - ValueType init_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>; - using func_type = TransformInclusiveScanWithInitValueFunctor< - ExecutionSpace, index_type, ValueType, InputIteratorType, - OutputIteratorType, BinaryOpType, unary_op_type>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan(label, - RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(first_from, first_dest, binary_op, - unary_op_type(), init_value)); - ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); - - // return - return first_dest + num_elements; -} - -// ------------------------------------------------------------- -// transform_inclusive_scan_impl without init_value -// ------------------------------------------------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOpType, class UnaryOpType> -OutputIteratorType transform_inclusive_scan_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - BinaryOpType binary_op, - UnaryOpType unary_op) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using value_type = - std::remove_const_t<typename InputIteratorType::value_type>; - using func_type = TransformInclusiveScanNoInitValueFunctor< - ExecutionSpace, index_type, value_type, InputIteratorType, - OutputIteratorType, BinaryOpType, UnaryOpType>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(first_from, first_dest, binary_op, unary_op)); - ex.fence("Kokkos::transform_inclusive_scan: fence after operation"); - - // return - return first_dest + num_elements; -} - -// ------------------------------------------------------------- -// transform_inclusive_scan_impl with init_value -// ------------------------------------------------------------- -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOpType, class UnaryOpType, - class ValueType> -OutputIteratorType transform_inclusive_scan_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, - ValueType init_value) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using func_type = TransformInclusiveScanWithInitValueFunctor< - ExecutionSpace, index_type, ValueType, InputIteratorType, - OutputIteratorType, BinaryOpType, UnaryOpType>; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_type(first_from, first_dest, binary_op, unary_op, init_value)); - ex.fence("Kokkos::transform_inclusive_scan: fence after operation"); - - // return - return first_dest + num_elements; -} - -} // end namespace Impl - -/////////////////////////////// -// -// inclusive scan API -// -/////////////////////////////// - -// overload set 1 -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl( - "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last, - first_dest); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl(label, ex, first, last, - first_dest); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto inclusive_scan( - const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl( - "Kokkos::inclusive_scan_default_functors_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2> -auto inclusive_scan( - const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest)); -} - -// overload set 2 (accepting custom binary op) -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOp> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl( - "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, binary_op); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOp> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl(label, ex, first, last, - first_dest, binary_op); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryOp> -auto inclusive_scan(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - BinaryOp binary_op) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( - "Kokkos::inclusive_scan_custom_functors_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryOp> -auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - BinaryOp binary_op) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( - label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op); -} - -// overload set 3 -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOp, class ValueType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op, ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( - "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, binary_op, init_value); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOp, class ValueType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op, - ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( - label, ex, first, last, first_dest, binary_op, init_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryOp, - class ValueType> -auto inclusive_scan(const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - BinaryOp binary_op, ValueType init_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( - "Kokkos::inclusive_scan_custom_functors_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, init_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryOp, - class ValueType> -auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - BinaryOp binary_op, ValueType init_value) { - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( - label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, init_value); -} - -////////////////////////////////////// -// -// transform_inclusive_scan public API -// -////////////////////////////////////// - -// overload set 1 (no init value) -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOpType, class UnaryOpType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::transform_inclusive_scan_impl( - "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, binary_op, unary_op); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOpType, class UnaryOpType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op) { - Impl::static_assert_is_not_openmptarget(ex); - - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryOpType, - class UnaryOpType> -auto transform_inclusive_scan( - const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - BinaryOpType binary_op, UnaryOpType unary_op) { - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( - "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, unary_op); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryOpType, - class UnaryOpType> -auto transform_inclusive_scan( - const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - BinaryOpType binary_op, UnaryOpType unary_op) { - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( - label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, unary_op); -} - -// overload set 2 (init value) -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOpType, class UnaryOpType, - class ValueType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op, - ValueType init_value) { - Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( - "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, binary_op, unary_op, init_value); -} - -template <class ExecutionSpace, class InputIteratorType, - class OutputIteratorType, class BinaryOpType, class UnaryOpType, - class ValueType> -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op, ValueType init_value) { - Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op, init_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryOpType, - class UnaryOpType, class ValueType> -auto transform_inclusive_scan( - const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( - "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, unary_op, init_value); -} - -template <class ExecutionSpace, class DataType1, class... Properties1, - class DataType2, class... Properties2, class BinaryOpType, - class UnaryOpType, class ValueType> -auto transform_inclusive_scan( - const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View<DataType1, Properties1...>& view_from, - const ::Kokkos::View<DataType2, Properties2...>& view_dest, - BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { - Impl::static_assert_is_not_openmptarget(ex); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); - Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( - label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, unary_op, init_value); -} - -} // namespace Experimental -} // namespace Kokkos - -#endif diff --git a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt index 94e6b2784f1812b8ff2c4f20232e965d7adcb097..0c50ff7a0805d63319f5d9857f313ff87ebafdb8 100644 --- a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt +++ b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -80,6 +80,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget) StdAlgorithmsSearch StdAlgorithmsSearch_n StdAlgorithmsMismatch + StdAlgorithmsMoveBackward ) list(APPEND STDALGO_SOURCES_C Test${Name}.cpp) endforeach() diff --git a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp index 464c86a7b62f34fa9e1c4671e3d73d4e0cf24a0d..19c82003cf3429714a17a262fe55d59907ec5ecf 100644 --- a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -98,16 +98,6 @@ struct RandomProperties { max = add.max > max ? add.max : max; return *this; } - - KOKKOS_INLINE_FUNCTION - void operator+=(const volatile RandomProperties& add) volatile { - count += add.count; - mean += add.mean; - variance += add.variance; - covariance += add.covariance; - min = add.min < min ? add.min : min; - max = add.max > max ? add.max : max; - } }; // FIXME_OPENMPTARGET: Need this for OpenMPTarget because contra to the standard @@ -532,13 +522,15 @@ struct TestDynRankView { Pool random(13); double min = 10.; double max = 100.; - Kokkos::fill_random(A, random, min, max); + ExecutionSpace exec; + Kokkos::fill_random(exec, A, random, min, max); ReducerValueType val; - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(0, A.size()), - *this, ReducerType(val)); + Kokkos::parallel_reduce( + Kokkos::RangePolicy<ExecutionSpace>(exec, 0, A.size()), *this, + ReducerType(val)); - Kokkos::fence(); + exec.fence(); ASSERT_GE(val.min_val, min); ASSERT_LE(val.max_val, max); } diff --git a/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp b/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp index 23e8fec7d5e463ba737d21e8a19a1c2c331bdfaa..5136ad434ba42b17292deeeb792a7d416fcc2ea6 100644 --- a/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp @@ -43,9 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_RandomAccessIterator.hpp> -#include <std_algorithms/Kokkos_Distance.hpp> namespace KE = Kokkos::Experimental; @@ -206,28 +203,28 @@ TEST_F(random_access_iterator_test, operatorsSet4) { auto it4 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 4); auto it5 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 4); auto it6 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 4); - EXPECT_TRUE(it1 != it4); - EXPECT_TRUE(it2 != it5); - EXPECT_TRUE(it3 != it6); - EXPECT_TRUE(it1 < it4); - EXPECT_TRUE(it2 < it5); - EXPECT_TRUE(it3 < it6); - EXPECT_TRUE(it1 <= it4); - EXPECT_TRUE(it2 <= it5); - EXPECT_TRUE(it3 <= it6); + EXPECT_NE(it1, it4); + EXPECT_NE(it2, it5); + EXPECT_NE(it3, it6); + EXPECT_LT(it1, it4); + EXPECT_LT(it2, it5); + EXPECT_LT(it3, it6); + EXPECT_LE(it1, it4); + EXPECT_LE(it2, it5); + EXPECT_LE(it3, it6); auto it7 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3); auto it8 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3); auto it9 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3); - EXPECT_TRUE(it1 == it7); - EXPECT_TRUE(it2 == it8); - EXPECT_TRUE(it3 == it9); - EXPECT_TRUE(it1 >= it7); - EXPECT_TRUE(it2 >= it8); - EXPECT_TRUE(it3 >= it9); - EXPECT_TRUE(it4 > it7); - EXPECT_TRUE(it5 > it8); - EXPECT_TRUE(it6 > it9); + EXPECT_EQ(it1, it7); + EXPECT_EQ(it2, it8); + EXPECT_EQ(it3, it9); + EXPECT_GE(it1, it7); + EXPECT_GE(it2, it8); + EXPECT_GE(it3, it9); + EXPECT_GT(it4, it7); + EXPECT_GT(it5, it8); + EXPECT_GT(it6, it9); } TEST_F(random_access_iterator_test, assignment_operator) { diff --git a/packages/kokkos/algorithms/unit_tests/TestSort.hpp b/packages/kokkos/algorithms/unit_tests/TestSort.hpp index 9108731c15800fdd5c95a7cc8b7ae751dfa1272d..120a04bdb535a55b31516b047a0da4c347c581b9 100644 --- a/packages/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestSort.hpp @@ -137,7 +137,12 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) { // Test sorting array with all numbers equal ExecutionSpace exec; Kokkos::deep_copy(exec, keys, KeyType(1)); +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 Kokkos::sort(exec, keys, force_kokkos); +#else + (void)force_kokkos; // suppress warnings about unused variable + Kokkos::sort(exec, keys); +#endif Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931); Kokkos::fill_random(keys, g, @@ -151,7 +156,11 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) { Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n), sum<ExecutionSpace, KeyType>(keys), sum_before); +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 Kokkos::sort(exec, keys, force_kokkos); +#else + Kokkos::sort(exec, keys); +#endif Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n), sum<ExecutionSpace, KeyType>(keys), sum_after); @@ -396,7 +405,7 @@ void test_sort_integer_overflow() { Kokkos::Experimental::finite_min<T>::value}; auto vd = Kokkos::create_mirror_view_and_copy( ExecutionSpace(), Kokkos::View<T[2], Kokkos::HostSpace>(a)); - Kokkos::sort(vd, /*force using Kokkos bin sort*/ true); + Kokkos::sort(vd); auto vh = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), vd); EXPECT_TRUE(std::is_sorted(vh.data(), vh.data() + 2)) << "view (" << vh[0] << ", " << vh[1] << ") is not sorted"; @@ -407,7 +416,9 @@ void test_sort_integer_overflow() { template <class ExecutionSpace, typename KeyType> void test_1D_sort(unsigned int N) { test_1D_sort_impl<ExecutionSpace, KeyType>(N * N * N, true); +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 test_1D_sort_impl<ExecutionSpace, KeyType>(N * N * N, false); +#endif } template <class ExecutionSpace, typename KeyType> diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp index 4036112b4976c089bf6affae9a72106bdd7ab92c..d37f657f57599e1029cf32e59f6bb9775b57a224 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp @@ -44,7 +44,7 @@ #include <TestStdAlgorithmsCommon.hpp> #include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_Numeric.hpp> +#include <std_algorithms/Kokkos_AdjacentDifference.hpp> #include <utility> #include <numeric> @@ -185,7 +185,7 @@ void verify_data(TestViewType test_view, GoldViewType gold) { const auto gold_h = create_mirror_view_and_copy(Kokkos::HostSpace(), gold); for (std::size_t i = 0; i < test_view.extent(0); ++i) { - EXPECT_TRUE(gold_h(i) == test_view_dc_h(i)); + EXPECT_EQ(gold_h(i), test_view_dc_h(i)); } } @@ -225,7 +225,7 @@ void run_single_scenario(const InfoType& scenario_info, auto res1 = KE::adjacent_difference(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_TRUE(res1 == KE::end(view_dest)); + EXPECT_EQ(res1, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -235,7 +235,7 @@ void run_single_scenario(const InfoType& scenario_info, auto res2 = KE::adjacent_difference( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_TRUE(res2 == KE::end(view_dest)); + EXPECT_EQ(res2, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -244,7 +244,7 @@ void run_single_scenario(const InfoType& scenario_info, create_view<ValueType>(Tag{}, view_ext, "adj_diff_dest_view"); auto res3 = KE::adjacent_difference(exespace(), view_from, view_dest, args...); - EXPECT_TRUE(res3 == KE::end(view_dest)); + EXPECT_EQ(res3, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -253,7 +253,7 @@ void run_single_scenario(const InfoType& scenario_info, create_view<ValueType>(Tag{}, view_ext, "adj_diff_dest_view"); auto res4 = KE::adjacent_difference("label", exespace(), view_from, view_dest, args...); - EXPECT_TRUE(res4 == KE::end(view_dest)); + EXPECT_EQ(res4, KE::end(view_dest)); verify_data(view_dest, gold); } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp index 6433a9cf635c8e0b10ff3da43d045d901df88a4b..874748193e366b1921a870100c4baadb16fe2a82 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp @@ -44,7 +44,7 @@ #include <TestStdAlgorithmsCommon.hpp> #include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> +#include "std_algorithms/Kokkos_AdjacentFind.hpp" #include <utility> namespace Test { @@ -257,7 +257,7 @@ void verify(DiffType my_diff, ViewType view, Args... args) { my_std_adjacent_find(KE::cbegin(view_h), KE::cend(view_h), args...); const auto std_diff = std_r - KE::cbegin(view_h); - EXPECT_TRUE(my_diff == std_diff); + EXPECT_EQ(my_diff, std_diff); } template <class Tag, class ValueType, class InfoType, class... Args> diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp index 65b6000962d273ec710f5b5452f6d81cea5aa02e..a1307d4c23cb0dabd093155841a4589a64d0f3f3 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp @@ -44,7 +44,9 @@ #include <TestStdAlgorithmsCommon.hpp> #include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> +#include <std_algorithms/Kokkos_AllOf.hpp> +#include <std_algorithms/Kokkos_AnyOf.hpp> +#include <std_algorithms/Kokkos_NoneOf.hpp> #include <algorithm> namespace Test { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp index 6d2b65a567d8551cbaf67c946dc3122a6c44c04d..a06f9c61c03adef2950c3074a38b07757cfaa120 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp @@ -46,8 +46,9 @@ #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_STD_ALGOS_COMMON_HPP #include <gtest/gtest.h> +#include <Kokkos_StdAlgorithms.hpp> #include <TestStdAlgorithmsHelperFunctors.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> +#include <utility> #include <numeric> #include <random> @@ -249,6 +250,71 @@ struct std_algorithms_test : public ::testing::Test { } }; +struct CustomValueType { + KOKKOS_INLINE_FUNCTION + CustomValueType(){}; + + KOKKOS_INLINE_FUNCTION + CustomValueType(value_type val) : value(val){}; + + KOKKOS_INLINE_FUNCTION + CustomValueType(const CustomValueType& other) { this->value = other.value; } + + KOKKOS_INLINE_FUNCTION + explicit operator value_type() const { return value; } + + KOKKOS_INLINE_FUNCTION + value_type& operator()() { return value; } + + KOKKOS_INLINE_FUNCTION + const value_type& operator()() const { return value; } + + KOKKOS_INLINE_FUNCTION + CustomValueType& operator+=(const CustomValueType& other) { + this->value += other.value; + return *this; + } + + KOKKOS_INLINE_FUNCTION + CustomValueType& operator=(const CustomValueType& other) { + this->value = other.value; + return *this; + } + + KOKKOS_INLINE_FUNCTION + CustomValueType operator+(const CustomValueType& other) const { + CustomValueType result; + result.value = this->value + other.value; + return result; + } + + KOKKOS_INLINE_FUNCTION + CustomValueType operator-(const CustomValueType& other) const { + CustomValueType result; + result.value = this->value - other.value; + return result; + } + + KOKKOS_INLINE_FUNCTION + CustomValueType operator*(const CustomValueType& other) const { + CustomValueType result; + result.value = this->value * other.value; + return result; + } + + KOKKOS_INLINE_FUNCTION + bool operator==(const CustomValueType& other) const { + return this->value == other.value; + } + + private: + friend std::ostream& operator<<(std::ostream& os, + const CustomValueType& custom_value_type) { + return os << custom_value_type.value; + } + value_type value = {}; +}; + } // namespace stdalgos } // namespace Test diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp index 2f2172eccc1e4d318a7b77dc8ddd29ebd41b637e..037dac36ed9de12a37b36d060a42c37c91cc40f6 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp @@ -42,7 +42,6 @@ //@HEADER */ -#include <std_algorithms/Kokkos_BeginEnd.hpp> #include <Kokkos_StdAlgorithms.hpp> namespace Test { @@ -61,12 +60,6 @@ struct TrivialBinaryFunctor { ValueType operator()(const ValueType &a, const ValueType &b) const { return (a + b); } - - KOKKOS_INLINE_FUNCTION - ValueType operator()(const volatile ValueType &a, - const volatile ValueType &b) const { - return (a + b); - } }; template <class ValueType> @@ -100,12 +93,6 @@ struct TrivialComparator { bool operator()(const ValueType &a, const ValueType &b) const { return a > b; } - - KOKKOS_INLINE_FUNCTION - bool operator()(const volatile ValueType &a, - const volatile ValueType &b) const { - return a > b; - } }; template <class ValueType> @@ -120,12 +107,6 @@ struct TrivialReduceJoinFunctor { ValueType operator()(const ValueType &a, const ValueType &b) const { return a + b; } - - KOKKOS_FUNCTION - ValueType operator()(const volatile ValueType &a, - const volatile ValueType &b) const { - return a + b; - } }; template <class ValueType> diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp index b1981df28115a5938d24abcdeef59ca812aef548..3eb13c98c4577c315ac2d7232b9b69969f541c1a 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp @@ -44,7 +44,7 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <std_algorithms/Kokkos_Constraints.hpp> +#include <Kokkos_StdAlgorithms.hpp> namespace Test { namespace stdalgos { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp index f1d078bd725cd64968fbc678e4b1a1ea168e54d5..d5758e2438bac69d146068b36b110be72b5d263a 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -165,49 +163,49 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(0)); } else if (name == "one-element-b") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(2)); } else if (name == "two-elements-a") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2)); - EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(1), static_cast<value_type>(0)); } else if (name == "two-elements-b") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2)); - EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(1), static_cast<value_type>(0)); } else if (name == "small-a") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(-4)); - EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(-2)); - EXPECT_TRUE(view_test_h(2) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(3) == static_cast<value_type>(2)); - EXPECT_TRUE(view_test_h(4) == static_cast<value_type>(4)); - EXPECT_TRUE(view_test_h(5) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(6) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(7) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(8) == static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(-4)); + EXPECT_EQ(view_test_h(1), static_cast<value_type>(-2)); + EXPECT_EQ(view_test_h(2), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(3), static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(4), static_cast<value_type>(4)); + EXPECT_EQ(view_test_h(5), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(6), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(7), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(8), static_cast<value_type>(0)); } else if (name == "small-b") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(22)); - EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(-12)); - EXPECT_TRUE(view_test_h(2) == static_cast<value_type>(22)); - EXPECT_TRUE(view_test_h(3) == static_cast<value_type>(-12)); - EXPECT_TRUE(view_test_h(4) == static_cast<value_type>(22)); - EXPECT_TRUE(view_test_h(5) == static_cast<value_type>(-12)); - EXPECT_TRUE(view_test_h(6) == static_cast<value_type>(22)); - EXPECT_TRUE(view_test_h(7) == static_cast<value_type>(-12)); - EXPECT_TRUE(view_test_h(8) == static_cast<value_type>(22)); - EXPECT_TRUE(view_test_h(9) == static_cast<value_type>(-12)); - EXPECT_TRUE(view_test_h(10) == static_cast<value_type>(22)); - EXPECT_TRUE(view_test_h(11) == static_cast<value_type>(-12)); - EXPECT_TRUE(view_test_h(12) == static_cast<value_type>(22)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(22)); + EXPECT_EQ(view_test_h(1), static_cast<value_type>(-12)); + EXPECT_EQ(view_test_h(2), static_cast<value_type>(22)); + EXPECT_EQ(view_test_h(3), static_cast<value_type>(-12)); + EXPECT_EQ(view_test_h(4), static_cast<value_type>(22)); + EXPECT_EQ(view_test_h(5), static_cast<value_type>(-12)); + EXPECT_EQ(view_test_h(6), static_cast<value_type>(22)); + EXPECT_EQ(view_test_h(7), static_cast<value_type>(-12)); + EXPECT_EQ(view_test_h(8), static_cast<value_type>(22)); + EXPECT_EQ(view_test_h(9), static_cast<value_type>(-12)); + EXPECT_EQ(view_test_h(10), static_cast<value_type>(22)); + EXPECT_EQ(view_test_h(11), static_cast<value_type>(-12)); + EXPECT_EQ(view_test_h(12), static_cast<value_type>(22)); } else if (name == "medium" || name == "large") { @@ -220,13 +218,14 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, std::size_t count = 0; for (std::size_t i = 0; i < view_from_h.extent(0); ++i) { if (pred(view_from_h(i))) { - EXPECT_TRUE(view_test_h(count++) == view_from_h(i)); + EXPECT_EQ(view_test_h(count), view_from_h(i)); + count++; } } // all other entries of test view should be zero for (; count < view_test_h.extent(0); ++count) { // std::cout << count << '\n'; - EXPECT_TRUE(view_test_h(count) == value_type(0)); + EXPECT_EQ(view_test_h(count), value_type(0)); } } @@ -255,7 +254,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::copy_if(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred); verify_data(name, view_from, view_dest, pred); - EXPECT_TRUE(rit == (KE::begin(view_dest) + n)); + EXPECT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -264,7 +263,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::copy_if("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred); verify_data(name, view_from, view_dest, pred); - EXPECT_TRUE(rit == (KE::begin(view_dest) + n)); + EXPECT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -272,7 +271,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto view_dest = create_view<ValueType>(Tag{}, view_ext, "copy_if_dest"); auto rit = KE::copy_if(exespace(), view_from, view_dest, pred); verify_data(name, view_from, view_dest, pred); - EXPECT_TRUE(rit == (KE::begin(view_dest) + n)); + EXPECT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -280,7 +279,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto view_dest = create_view<ValueType>(Tag{}, view_ext, "copy_if_dest"); auto rit = KE::copy_if("label", exespace(), view_from, view_dest, pred); verify_data(name, view_from, view_dest, pred); - EXPECT_TRUE(rit == (KE::begin(view_dest) + n)); + EXPECT_EQ(rit, (KE::begin(view_dest) + n)); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp index dfc7d794ed68e7a1cc217f7d02c147d6903addce..4c92a990595bb6a4ef5dced69531fa4f22da49d2 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> #include <algorithm> namespace Test { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp index 78edff4230db7c9bb5d88eba34448782443c1278..e5b1e8514db8b175fa7b1c4ff078c5516099680c 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp @@ -43,9 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <algorithm> namespace Test { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp index 99c921323beaa4b623451c739aa96b757b6836cc..e470ee86204bde1098d1064dd0713090dbc7a907 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_Numeric.hpp> #include <utility> namespace Test { @@ -78,10 +76,16 @@ struct UnifDist<int> { int operator()() { return m_dist(m_gen); } }; -template <class ViewType> -void fill_zero(ViewType view) { - Kokkos::parallel_for(view.extent(0), FillZeroFunctor<ViewType>(view)); -} +template <> +struct UnifDist<CustomValueType> { + using dist_type = std::uniform_real_distribution<double>; + std::mt19937 m_gen; + dist_type m_dist; + + UnifDist() : m_dist(0.05, 1.2) { m_gen.seed(1034343); } + + CustomValueType operator()() { return m_dist(m_gen); } +}; template <class ViewType> void fill_view(ViewType dest_view, const std::string& name) { @@ -181,15 +185,17 @@ void verify_data(ViewType1 data_view, // contains data // << gold_h(i) << " " << test_view_h(i) << " " // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same<gold_view_value_type, int>::value) { - EXPECT_TRUE(gold_h(i) == test_view_h(i)); + EXPECT_EQ(gold_h(i), test_view_h(i)); } else { - const auto error = std::abs(gold_h(i) - test_view_h(i)); + const auto error = + std::abs(static_cast<double>(gold_h(i) - test_view_h(i))); if (error > 1e-10) { std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " " << gold_h(i) << " " << test_view_h(i) << " " - << std::abs(gold_h(i) - test_view_h(i)) << std::endl; + << std::abs(static_cast<double>(gold_h(i) - test_view_h(i))) + << std::endl; } - EXPECT_TRUE(error < 1e-10); + EXPECT_LT(error, 1e-10); } } } @@ -201,12 +207,6 @@ struct MultiplyFunctor { ValueType operator()(const ValueType& a, const ValueType& b) const { return (a * b); } - - KOKKOS_INLINE_FUNCTION - ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return (a * b); - } }; template <class ValueType> @@ -215,12 +215,6 @@ struct SumFunctor { ValueType operator()(const ValueType& a, const ValueType& b) const { return (a + b); } - - KOKKOS_INLINE_FUNCTION - ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return (a + b); - } }; std::string value_type_to_string(int) { return "int"; } @@ -247,7 +241,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info, auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -256,14 +250,14 @@ void run_single_scenario_default_op(const InfoType& scenario_info, auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } { fill_zero(view_dest); auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -271,7 +265,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest, init_value); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -297,7 +291,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -306,7 +300,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -314,7 +308,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value, bop); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -322,7 +316,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest, init_value, bop); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -342,7 +336,7 @@ void run_exclusive_scan_all_scenarios() { run_single_scenario_default_op<Tag, ValueType>(it, ValueType{-2}); run_single_scenario_default_op<Tag, ValueType>(it, ValueType{3}); -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET // custom multiply op is only run for small views otherwise it overflows if (it.first == "small-a" || it.first == "small-b") { using custom_bop_t = MultiplyFunctor<ValueType>; @@ -374,6 +368,8 @@ TEST(std_algorithms_numeric_ops_test, exclusive_scan) { run_exclusive_scan_all_scenarios<StridedThreeTag, double>(); run_exclusive_scan_all_scenarios<DynamicTag, int>(); run_exclusive_scan_all_scenarios<StridedThreeTag, int>(); + run_exclusive_scan_all_scenarios<DynamicTag, CustomValueType>(); + run_exclusive_scan_all_scenarios<StridedThreeTag, CustomValueType>(); } } // namespace EScan diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp index 357e733dc2b7c9e8d6132319cfb9e44422e48e8b..35b232e94fb9baf6ce832c26e5815cf0b8fd40e9 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp @@ -44,8 +44,6 @@ #include <TestStdAlgorithmsCommon.hpp> #include <iterator> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> #include <algorithm> namespace Test { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp index b4685ced723199d19303660ce7f0a0d57b4ec3b2..2a6d271856a978366aa8c052583219fe3b22fac3 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -312,7 +310,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); // std::cout << "result : " << mydiff << " " << stddiff << std::endl; - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { @@ -321,21 +319,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::find_end(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::find_end("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp index bd6ea0300a6ff80f6d9a0c3af7139668280f6311..84892bc37645407a4e0c9895782f3a51c65354f0 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -231,7 +229,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { @@ -240,21 +238,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::find_first_of(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::find_first_of("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp index c8cec00edc60a50986502f4c8730a60d01c7cf82..79badc7c4f75b1dcd3f52f3abf87e6615ee198e3 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> #include <algorithm> namespace Test { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp index ef366c56e78786ff13592e0ea6663e55be845100..882a6012ebb27afd9bf2e81991e33ae3776168cb 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp @@ -156,12 +156,6 @@ struct CustomLessThanComparator { return a < b; } - KOKKOS_INLINE_FUNCTION - bool operator()(const volatile ValueType1& a, - const volatile ValueType1& b) const { - return a < b; - } - KOKKOS_INLINE_FUNCTION CustomLessThanComparator() {} }; diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp index 0f90623a34d2c60eb5c0236f7ab729923f515d4e..173fbed660f4b50d8ef8f7968fb3e20ef9f1f560 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_Numeric.hpp> #include <utility> namespace Test { @@ -78,10 +76,16 @@ struct UnifDist<int> { int operator()() { return m_dist(m_gen); } }; -template <class ViewType> -void fill_zero(ViewType view) { - Kokkos::parallel_for(view.extent(0), FillZeroFunctor<ViewType>(view)); -} +template <> +struct UnifDist<CustomValueType> { + using dist_type = std::uniform_real_distribution<double>; + std::mt19937 m_gen; + dist_type m_dist; + + UnifDist() : m_dist(0.05, 1.2) { m_gen.seed(1034343); } + + CustomValueType operator()() { return m_dist(m_gen); } +}; template <class ViewType> void fill_view(ViewType dest_view, const std::string& name) { @@ -195,15 +199,17 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same<gold_view_value_type, int>::value) { - EXPECT_TRUE(gold_h(i) == test_view_h(i)); + EXPECT_EQ(gold_h(i), test_view_h(i)); } else { - const auto error = std::abs(gold_h(i) - test_view_h(i)); + const auto error = + std::abs(static_cast<double>(gold_h(i) - test_view_h(i))); if (error > 1e-10) { std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " " << gold_h(i) << " " << test_view_h(i) << " " - << std::abs(gold_h(i) - test_view_h(i)) << std::endl; + << std::abs(static_cast<double>(gold_h(i) - test_view_h(i))) + << std::endl; } - EXPECT_TRUE(error < 1e-10); + EXPECT_LT(error, 1e-10); } } // std::cout << " last el: " << test_view_h(ext-1) << std::endl; @@ -216,12 +222,6 @@ struct MultiplyFunctor { ValueType operator()(const ValueType& a, const ValueType& b) const { return (a * b); } - - KOKKOS_INLINE_FUNCTION - ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return (a * b); - } }; template <class ValueType> @@ -230,12 +230,6 @@ struct SumFunctor { ValueType operator()(const ValueType& a, const ValueType& b) const { return (a + b); } - - KOKKOS_INLINE_FUNCTION - ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return (a + b); - } }; std::string value_type_to_string(int) { return "int"; } @@ -258,7 +252,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info) { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } @@ -266,21 +260,21 @@ void run_single_scenario_default_op(const InfoType& scenario_info) { fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), view_from, view_dest); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } { fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } @@ -313,7 +307,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bop, args...); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -322,14 +316,14 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bop, args...); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), view_from, view_dest, bop, args...); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -337,7 +331,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest, bop, args...); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -354,7 +348,7 @@ void run_inclusive_scan_all_scenarios() { for (const auto& it : scenarios) { run_single_scenario_default_op<Tag, ValueType>(it); -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET // the sum custom op is always run using sum_binary_op = SumFunctor<ValueType>; sum_binary_op sbop; @@ -383,6 +377,8 @@ TEST(std_algorithms_numeric_ops_test, inclusive_scan) { run_inclusive_scan_all_scenarios<StridedThreeTag, double>(); run_inclusive_scan_all_scenarios<DynamicTag, int>(); run_inclusive_scan_all_scenarios<StridedThreeTag, int>(); + run_inclusive_scan_all_scenarios<DynamicTag, CustomValueType>(); + run_inclusive_scan_all_scenarios<StridedThreeTag, CustomValueType>(); } } // namespace IncScan diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp index acfb4c3f420cb043ce9453897763511947db1efc..b0df935392f8bd39ea49fb7d2614fc3f75cae10d 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_SortingOperations.hpp> #include <utility> namespace Test { @@ -178,7 +176,7 @@ void run_single_scenario(const InfoType& scenario_info) { [=](bool v) { return v == gold; }); EXPECT_TRUE(allA); -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET CustomLessThanComparator<ValueType, ValueType> comp; std::vector<bool> resultsB(4); resultsB[0] = diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp index 3860fecfc637c39282386b05981adb1a8791697c..7f0071e248b952db0e3f8630a45cb8bf7d16a640 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_SortingOperations.hpp> #include <utility> namespace Test { @@ -175,12 +173,12 @@ void run_single_scenario(const InfoType& scenario_info) { KE::is_sorted_until("label", exespace(), KE::begin(view), KE::end(view)); auto r3 = KE::is_sorted_until(exespace(), view); auto r4 = KE::is_sorted_until("label", exespace(), view); - EXPECT_TRUE(r1 == gold); - EXPECT_TRUE(r2 == gold); - EXPECT_TRUE(r3 == gold); - EXPECT_TRUE(r4 == gold); + EXPECT_EQ(r1, gold); + EXPECT_EQ(r2, gold); + EXPECT_EQ(r3, gold); + EXPECT_EQ(r4, gold); -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET CustomLessThanComparator<ValueType, ValueType> comp; auto r5 = KE::is_sorted_until(exespace(), KE::cbegin(view), KE::cend(view), comp); @@ -190,10 +188,10 @@ void run_single_scenario(const InfoType& scenario_info) { auto r8 = KE::is_sorted_until("label", exespace(), view, comp); #endif - EXPECT_TRUE(r1 == gold); - EXPECT_TRUE(r2 == gold); - EXPECT_TRUE(r3 == gold); - EXPECT_TRUE(r4 == gold); + EXPECT_EQ(r1, gold); + EXPECT_EQ(r2, gold); + EXPECT_EQ(r3, gold); + EXPECT_EQ(r4, gold); Kokkos::fence(); } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp index 8c0c3e4cc8639b36dd80977440a7078a9669cc4f..8bfa51b4f54b6413855e3bac527ec0d90e9e4032 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp @@ -43,9 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <algorithm> namespace Test { @@ -172,7 +169,7 @@ void run_all_scenarios() { TEST(std_algorithms_lexicographical_compare_test, test) { // FIXME: should this disable only custom comparator tests? -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios<DynamicTag, double>(); run_all_scenarios<StridedTwoTag, int>(); run_all_scenarios<StridedThreeTag, unsigned>(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp index c13cdac0b1d5891e5dacf66cd17db7b99cd44e6c..56819de8c100f88958d9aafaa43b24017801fffb 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp @@ -43,7 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_MinMaxElementOperations.hpp> namespace KE = Kokkos::Experimental; @@ -228,39 +227,39 @@ template <class ViewType> void test_max_element_trivial_data(ViewType view) { /* if we pass empty range, should return last */ auto result = KE::max_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_TRUE(result == KE::cbegin(view)); + EXPECT_EQ(result, KE::cbegin(view)); /* if we pass empty range, should return last */ auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::max_element(exespace(), it0, it1); - EXPECT_TRUE(result2 == it1); + EXPECT_EQ(result2, it1); } template <class ViewType> void test_min_element_trivial_data(ViewType view) { /* if we pass empty range, should return last */ auto result = KE::min_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_TRUE(result == KE::cbegin(view)); + EXPECT_EQ(result, KE::cbegin(view)); /* if we pass empty range, should return last */ auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::min_element(exespace(), it0, it1); - EXPECT_TRUE(result2 == it1); + EXPECT_EQ(result2, it1); } template <class ViewType> void test_minmax_element_empty_range(ViewType view) { auto result = KE::minmax_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_TRUE(result.first == KE::cbegin(view)); - EXPECT_TRUE(result.second == KE::cbegin(view)); + EXPECT_EQ(result.first, KE::cbegin(view)); + EXPECT_EQ(result.second, KE::cbegin(view)); auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::minmax_element(exespace(), it0, it1); - EXPECT_TRUE(result2.first == it1); - EXPECT_TRUE(result2.second == it1); + EXPECT_EQ(result2.first, it1); + EXPECT_EQ(result2.second, it1); } template <class ViewType> @@ -337,7 +336,7 @@ void std_algorithms_min_max_element_test::test_minmax_element_non_trivial_data( } } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET template <class ViewType> void std_algorithms_min_max_element_test:: test_max_element_non_trivial_data_custom_comp(ViewType view) { @@ -446,7 +445,7 @@ TEST_F(std_algorithms_min_max_element_test, max_element_non_trivial_data) { test_max_element_non_trivial_data(m_strided_view); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET // non-trivial data, custom comp TEST_F(std_algorithms_min_max_element_test, min_element_non_trivial_data_custom_comp) { @@ -478,7 +477,7 @@ TEST_F(std_algorithms_min_max_element_test, minmax_element_non_trivial_data) { } #endif -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET // OpenMPTarget does not yet support custom comparator TEST_F(std_algorithms_min_max_element_test, minmax_element_non_trivial_data_custom_comp) { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp index f13fe071d554599be6735820075da0e848463b81..4bc4e018b498eaffa9d9db88db8a84e82e9f254f 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp @@ -44,8 +44,6 @@ #include <TestStdAlgorithmsCommon.hpp> #include <iterator> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp> #include <algorithm> #include <numeric> @@ -150,10 +148,10 @@ void run_single_scenario(ViewType view1, ViewType view2, const auto my_diff12 = my_res1.second - f2; const auto my_diff21 = my_res2.first - f1; const auto my_diff22 = my_res2.second - f2; - EXPECT_TRUE(my_diff11 == std_diff1); - EXPECT_TRUE(my_diff12 == std_diff2); - EXPECT_TRUE(my_diff21 == std_diff1); - EXPECT_TRUE(my_diff22 == std_diff2); + EXPECT_EQ(my_diff11, std_diff1); + EXPECT_EQ(my_diff12, std_diff2); + EXPECT_EQ(my_diff21, std_diff1); + EXPECT_EQ(my_diff22, std_diff2); } { @@ -164,10 +162,10 @@ void run_single_scenario(ViewType view1, ViewType view2, const auto my_diff12 = my_res1.second - KE::begin(view2); const auto my_diff21 = my_res2.first - KE::begin(view1); const auto my_diff22 = my_res2.second - KE::begin(view2); - EXPECT_TRUE(my_diff11 == std_diff1); - EXPECT_TRUE(my_diff12 == std_diff2); - EXPECT_TRUE(my_diff21 == std_diff1); - EXPECT_TRUE(my_diff22 == std_diff2); + EXPECT_EQ(my_diff11, std_diff1); + EXPECT_EQ(my_diff12, std_diff2); + EXPECT_EQ(my_diff21, std_diff1); + EXPECT_EQ(my_diff22, std_diff2); } } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp index 44acb477395ba25ee70fba3d4a33ed856c952158..8d4f604037d302327961c82b2020ef99bacef24e 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp @@ -43,7 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_ModifyingOperations.hpp> namespace Test { namespace stdalgos { @@ -76,19 +75,19 @@ struct MyMovableType { TEST(std_algorithms_mod_ops_test, move) { MyMovableType a; - using move_t = decltype(KE::move(a)); + using move_t = decltype(std::move(a)); static_assert(std::is_rvalue_reference<move_t>::value, ""); // move constr - MyMovableType b(KE::move(a)); - EXPECT_TRUE(b.m_value == 11); - EXPECT_TRUE(a.m_value == -2); + MyMovableType b(std::move(a)); + EXPECT_EQ(b.m_value, 11); + EXPECT_EQ(a.m_value, -2); // move assign MyMovableType c; - c = KE::move(b); - EXPECT_TRUE(c.m_value == 11); - EXPECT_TRUE(b.m_value == -4); + c = std::move(b); + EXPECT_EQ(c.m_value, 11); + EXPECT_EQ(b.m_value, -4); } template <class ViewType> @@ -98,9 +97,9 @@ struct StdAlgoModSeqOpsTestMove { KOKKOS_INLINE_FUNCTION void operator()(const int index) const { typename ViewType::value_type a{11}; - using move_t = decltype(KE::move(a)); + using move_t = decltype(std::move(a)); static_assert(std::is_rvalue_reference<move_t>::value, ""); - m_view(index) = KE::move(a); + m_view(index) = std::move(a); } StdAlgoModSeqOpsTestMove(ViewType view) : m_view(view) {} @@ -126,8 +125,8 @@ TEST(std_algorithms_mod_ops_test, swap) { int a = 1; int b = 2; KE::swap(a, b); - EXPECT_TRUE(a == 2); - EXPECT_TRUE(b == 1); + EXPECT_EQ(a, 2); + EXPECT_EQ(b, 1); } { @@ -180,17 +179,17 @@ void test_iter_swap(ViewType view) { using value_type = typename ViewType::value_type; auto a_dc = create_deep_copyable_compatible_clone(view); auto a_h = create_mirror_view_and_copy(Kokkos::HostSpace(), a_dc); - EXPECT_TRUE(view.extent(0) == 10); - EXPECT_TRUE(a_h(0) == value_type(3)); - EXPECT_TRUE(a_h(1) == value_type(1)); - EXPECT_TRUE(a_h(2) == value_type(2)); - EXPECT_TRUE(a_h(3) == value_type(0)); - EXPECT_TRUE(a_h(4) == value_type(6)); - EXPECT_TRUE(a_h(5) == value_type(5)); - EXPECT_TRUE(a_h(6) == value_type(4)); - EXPECT_TRUE(a_h(7) == value_type(7)); - EXPECT_TRUE(a_h(8) == value_type(8)); - EXPECT_TRUE(a_h(9) == value_type(9)); + EXPECT_EQ(view.extent_int(0), 10); + EXPECT_EQ(a_h(0), value_type(3)); + EXPECT_EQ(a_h(1), value_type(1)); + EXPECT_EQ(a_h(2), value_type(2)); + EXPECT_EQ(a_h(3), value_type(0)); + EXPECT_EQ(a_h(4), value_type(6)); + EXPECT_EQ(a_h(5), value_type(5)); + EXPECT_EQ(a_h(6), value_type(4)); + EXPECT_EQ(a_h(7), value_type(7)); + EXPECT_EQ(a_h(8), value_type(8)); + EXPECT_EQ(a_h(9), value_type(9)); } TEST(std_algorithms_mod_ops_test, iter_swap_static_view) { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp index 3a8883d48ef1064fca68560fcd4155f4d815eac5..1e3960c5e690dd7b7b13cb1b7946d847d0912438 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> -#include "std_algorithms/Kokkos_BeginEnd.hpp" namespace KE = Kokkos::Experimental; @@ -390,16 +388,16 @@ void test_swap_ranges(ViewType view) { parallel_for(ext, cp_func_a_t(view, checkViewA)); auto cvA_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkViewA); - EXPECT_TRUE(cvA_h(0) == 0); - EXPECT_TRUE(cvA_h(1) == 1); - EXPECT_TRUE(cvA_h(2) == 99); - EXPECT_TRUE(cvA_h(3) == 98); - EXPECT_TRUE(cvA_h(4) == 97); - EXPECT_TRUE(cvA_h(5) == 96); - EXPECT_TRUE(cvA_h(6) == 6); - EXPECT_TRUE(cvA_h(7) == 7); - EXPECT_TRUE(cvA_h(8) == 8); - EXPECT_TRUE(cvA_h(9) == 9); + EXPECT_EQ(cvA_h(0), 0); + EXPECT_EQ(cvA_h(1), 1); + EXPECT_EQ(cvA_h(2), 99); + EXPECT_EQ(cvA_h(3), 98); + EXPECT_EQ(cvA_h(4), 97); + EXPECT_EQ(cvA_h(5), 96); + EXPECT_EQ(cvA_h(6), 6); + EXPECT_EQ(cvA_h(7), 7); + EXPECT_EQ(cvA_h(8), 8); + EXPECT_EQ(cvA_h(9), 9); /* check viewB */ static_view_type checkViewB("tmpB"); @@ -407,16 +405,16 @@ void test_swap_ranges(ViewType view) { Kokkos::parallel_for(ext, cp_func_b_t(viewB, checkViewB)); auto cvB_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkViewB); - EXPECT_TRUE(cvB_h(0) == 100); - EXPECT_TRUE(cvB_h(1) == 2); - EXPECT_TRUE(cvB_h(2) == 3); - EXPECT_TRUE(cvB_h(3) == 4); - EXPECT_TRUE(cvB_h(4) == 5); - EXPECT_TRUE(cvB_h(5) == 95); - EXPECT_TRUE(cvB_h(6) == 94); - EXPECT_TRUE(cvB_h(7) == 93); - EXPECT_TRUE(cvB_h(8) == 92); - EXPECT_TRUE(cvB_h(9) == 91); + EXPECT_EQ(cvB_h(0), 100); + EXPECT_EQ(cvB_h(1), 2); + EXPECT_EQ(cvB_h(2), 3); + EXPECT_EQ(cvB_h(3), 4); + EXPECT_EQ(cvB_h(4), 5); + EXPECT_EQ(cvB_h(5), 95); + EXPECT_EQ(cvB_h(6), 94); + EXPECT_EQ(cvB_h(7), 93); + EXPECT_EQ(cvB_h(8), 92); + EXPECT_EQ(cvB_h(9), 91); } TEST_F(std_algorithms_mod_seq_ops_test, swap_ranges) { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp new file mode 100644 index 0000000000000000000000000000000000000000..002d35466127470ff78616de34497da2e1ac3c98 --- /dev/null +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp @@ -0,0 +1,135 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <TestStdAlgorithmsCommon.hpp> +#include <utility> +#include <Kokkos_Random.hpp> + +namespace Test { +namespace stdalgos { +namespace MoveBackward { + +namespace KE = Kokkos::Experimental; + +template <class Tag, class ValueType, class InfoType> +void run_single_scenario(const InfoType& scenario_info, int apiId) { + const std::size_t view_ext = std::get<1>(scenario_info); + + auto v = create_view<ValueType>(Tag{}, view_ext, "v"); + + // v might not be deep copyable so to modify it on the host + // need to do all this + auto v_dc = create_deep_copyable_compatible_view_with_same_extent(v); + auto v_dc_h = create_mirror_view(Kokkos::HostSpace(), v_dc); + Kokkos::Random_XorShift64_Pool<Kokkos::DefaultHostExecutionSpace> pool(12371); + Kokkos::fill_random(v_dc_h, pool, 0, 523); + // copy to v_dc and then to v + Kokkos::deep_copy(v_dc, v_dc_h); + CopyFunctor<decltype(v_dc), decltype(v)> F1(v_dc, v); + Kokkos::parallel_for("copy", v.extent(0), F1); + + // make a gold copy of v before calling the algorithm + // since the algorithm will modify v + auto gold = create_host_space_copy(v); + + // create another view that is bigger than v + // because we need it to test the move_backward + auto v2 = create_view<ValueType>(Tag{}, view_ext + 5, "v2"); + + if (apiId == 0) { + auto rit = + KE::move_backward(exespace(), KE::begin(v), KE::end(v), KE::end(v2)); + const int dist = KE::distance(KE::begin(v2), rit); + EXPECT_EQ(dist, 5); + } else if (apiId == 1) { + auto rit = KE::move_backward("mylabel", exespace(), KE::begin(v), + KE::end(v), KE::end(v2)); + const int dist = KE::distance(KE::begin(v2), rit); + EXPECT_EQ(dist, 5); + } else if (apiId == 2) { + auto rit = KE::move_backward(exespace(), v, v2); + const int dist = KE::distance(KE::begin(v2), rit); + EXPECT_EQ(dist, 5); + } else if (apiId == 3) { + auto rit = KE::move_backward("mylabel", exespace(), v, v2); + const int dist = KE::distance(KE::begin(v2), rit); + EXPECT_EQ(dist, 5); + } + + // check + auto v2_h = create_host_space_copy(v2); + for (std::size_t j = 0; j < v2_h.extent(1); ++j) { + if (j < 5) { + EXPECT_TRUE(v2_h(j) == static_cast<ValueType>(0)); + } else { + EXPECT_TRUE(gold(j - 5) == v2_h(j)); + } + } +} + +template <class Tag, class ValueType> +void run_all_scenarios() { + const std::map<std::string, std::size_t> scenarios = { + {"empty", 0}, {"one-element-a", 1}, {"one-element-b", 1}, + {"two-elements-a", 2}, {"two-elements-b", 2}, {"small-a", 9}, + {"small-b", 13}, {"medium", 1103}, {"large", 101513}}; + + for (const auto& it : scenarios) { + run_single_scenario<Tag, ValueType>(it, 0); + run_single_scenario<Tag, ValueType>(it, 1); + run_single_scenario<Tag, ValueType>(it, 2); + run_single_scenario<Tag, ValueType>(it, 3); + } +} + +TEST(std_algorithms_mod_seq_ops, move_backward) { + run_all_scenarios<DynamicTag, int>(); + run_all_scenarios<DynamicTag, double>(); + run_all_scenarios<StridedThreeTag, int>(); + run_all_scenarios<StridedThreeTag, double>(); +} + +} // namespace MoveBackward +} // namespace stdalgos +} // namespace Test diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp index 51f74220ce3f0c34f87c0c5d1266664bdc4e93c9..0ea5fcc99ad319eb337cc992c75a829660801c37 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp @@ -43,87 +43,12 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_Numeric.hpp> namespace KE = Kokkos::Experimental; namespace Test { namespace stdalgos { -struct CustomValueType { - KOKKOS_INLINE_FUNCTION - CustomValueType(){}; - - KOKKOS_INLINE_FUNCTION - CustomValueType(value_type val) : value(val){}; - - KOKKOS_INLINE_FUNCTION - CustomValueType(const CustomValueType& other) { this->value = other.value; } - - KOKKOS_INLINE_FUNCTION - value_type& operator()() { return value; } - - KOKKOS_INLINE_FUNCTION - const value_type& operator()() const { return value; } - - KOKKOS_INLINE_FUNCTION - CustomValueType& operator+=(const CustomValueType& other) { - this->value += other.value; - return *this; - } - - KOKKOS_INLINE_FUNCTION - CustomValueType& operator=(const CustomValueType& other) { - this->value = other.value; - return *this; - } - - KOKKOS_INLINE_FUNCTION - CustomValueType operator+(const CustomValueType& other) const { - CustomValueType result; - result.value = this->value + other.value; - return result; - } - - KOKKOS_INLINE_FUNCTION - CustomValueType operator*(const CustomValueType& other) const { - CustomValueType result; - result.value = this->value * other.value; - return result; - } - - KOKKOS_INLINE_FUNCTION - bool operator==(const CustomValueType& other) const { - return this->value == other.value; - } - - // - // volatile overloads needed for the kokkos reductions - // - // note the void return - KOKKOS_INLINE_FUNCTION - void operator+=(const volatile CustomValueType& other) volatile { - this->value += other.value; - } - - // note the void return - KOKKOS_INLINE_FUNCTION - void operator=(const CustomValueType& other) volatile { - this->value = other.value; - } - - KOKKOS_INLINE_FUNCTION - CustomValueType operator+(const volatile CustomValueType& other) const - volatile { - CustomValueType result; - result.value = this->value + other.value; - return result; - } - - private: - value_type value = {}; -}; - template <class ValueType> struct TimesTwoUnaryTransformFunctor { KOKKOS_INLINE_FUNCTION @@ -144,12 +69,6 @@ struct SumJoinFunctor { ValueType operator()(const ValueType& a, const ValueType& b) const { return a + b; } - - KOKKOS_INLINE_FUNCTION - ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return a + b; - } }; struct std_algorithms_numerics_test : public ::testing::Test { @@ -239,7 +158,7 @@ struct std_algorithms_numerics_test : public ::testing::Test { } }; -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET // ------------------------------------------------------------------- // test default case of transform_reduce @@ -260,8 +179,8 @@ void run_and_check_transform_reduce_default(ViewType1 first_view, const auto r2 = KE::transform_reduce( "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view), KE::cbegin(second_view), init_value); - EXPECT_TRUE(r1 == init_value); - EXPECT_TRUE(r2 == init_value); + EXPECT_EQ(r1, init_value); + EXPECT_EQ(r2, init_value); // non-trivial cases const auto r3 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(first_view), @@ -277,10 +196,10 @@ void run_and_check_transform_reduce_default(ViewType1 first_view, const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view, init_value); - EXPECT_TRUE(r3 == result_value); - EXPECT_TRUE(r4 == result_value); - EXPECT_TRUE(r5 == result_value); - EXPECT_TRUE(r6 == result_value); + EXPECT_EQ(r3, result_value); + EXPECT_EQ(r4, result_value); + EXPECT_EQ(r5, result_value); + EXPECT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -363,8 +282,8 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, KE::cbegin(first_view), KE::cbegin(second_view), init_value, std::forward<Args>(args)...); - EXPECT_TRUE(r1 == init_value); - EXPECT_TRUE(r2 == init_value); + EXPECT_EQ(r1, init_value); + EXPECT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::transform_reduce( @@ -382,10 +301,10 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view, init_value, std::forward<Args>(args)...); - EXPECT_TRUE(r3 == result_value); - EXPECT_TRUE(r4 == result_value); - EXPECT_TRUE(r5 == result_value); - EXPECT_TRUE(r6 == result_value); + EXPECT_EQ(r3, result_value); + EXPECT_EQ(r4, result_value); + EXPECT_EQ(r5, result_value); + EXPECT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -482,8 +401,8 @@ void run_and_check_transform_reduce_overloadB(ViewType view, KE::cbegin(view), KE::cbegin(view), init_value, std::forward<Args>(args)...); - EXPECT_TRUE(r1 == init_value); - EXPECT_TRUE(r2 == init_value); + EXPECT_EQ(r1, init_value); + EXPECT_EQ(r2, init_value); // non trivial const auto r3 = @@ -499,10 +418,10 @@ void run_and_check_transform_reduce_overloadB(ViewType view, const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view, init_value, std::forward<Args>(args)...); - EXPECT_TRUE(r3 == result_value); - EXPECT_TRUE(r4 == result_value); - EXPECT_TRUE(r5 == result_value); - EXPECT_TRUE(r6 == result_value); + EXPECT_EQ(r3, result_value); + EXPECT_EQ(r4, result_value); + EXPECT_EQ(r5, result_value); + EXPECT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -556,8 +475,8 @@ void run_and_check_reduce_overloadA(ViewType view, ValueType non_trivial_result, KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view)); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_TRUE(r1 == trivial_result); - EXPECT_TRUE(r2 == trivial_result); + EXPECT_EQ(r1, trivial_result); + EXPECT_EQ(r2, trivial_result); // non trivial cases const auto r3 = @@ -567,10 +486,10 @@ void run_and_check_reduce_overloadA(ViewType view, ValueType non_trivial_result, const auto r5 = KE::reduce(ExecutionSpace(), view); const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view); - EXPECT_TRUE(r3 == non_trivial_result); - EXPECT_TRUE(r4 == non_trivial_result); - EXPECT_TRUE(r5 == non_trivial_result); - EXPECT_TRUE(r6 == non_trivial_result); + EXPECT_EQ(r3, non_trivial_result); + EXPECT_EQ(r4, non_trivial_result); + EXPECT_EQ(r5, non_trivial_result); + EXPECT_EQ(r6, non_trivial_result); } TEST_F(std_algorithms_numerics_test, @@ -612,8 +531,8 @@ void run_and_check_reduce_overloadB(ViewType view, ValueType result_value, KE::cbegin(view), init_value); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), init_value); - EXPECT_TRUE(r1 == init_value); - EXPECT_TRUE(r2 == init_value); + EXPECT_EQ(r1, init_value); + EXPECT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), @@ -623,10 +542,10 @@ void run_and_check_reduce_overloadB(ViewType view, ValueType result_value, const auto r5 = KE::reduce(ExecutionSpace(), view, init_value); const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view, init_value); - EXPECT_TRUE(r3 == result_value); - EXPECT_TRUE(r4 == result_value); - EXPECT_TRUE(r5 == result_value); - EXPECT_TRUE(r6 == result_value); + EXPECT_EQ(r3, result_value); + EXPECT_EQ(r4, result_value); + EXPECT_EQ(r5, result_value); + EXPECT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -662,8 +581,8 @@ void run_and_check_reduce_overloadC(ViewType view, ValueType result_value, KE::cbegin(view), init_value, joiner); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), init_value, joiner); - EXPECT_TRUE(r1 == init_value); - EXPECT_TRUE(r2 == init_value); + EXPECT_EQ(r1, init_value); + EXPECT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), @@ -674,10 +593,10 @@ void run_and_check_reduce_overloadC(ViewType view, ValueType result_value, const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view, init_value, joiner); - EXPECT_TRUE(r3 == result_value); - EXPECT_TRUE(r4 == result_value); - EXPECT_TRUE(r5 == result_value); - EXPECT_TRUE(r6 == result_value); + EXPECT_EQ(r3, result_value); + EXPECT_EQ(r4, result_value); + EXPECT_EQ(r5, result_value); + EXPECT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp index 3fb4f9d15f405c310c2ea4393f1a42e713b8c10d..a461f275154e06b4f39c414787309f464a3a4529 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_PartitioningOperations.hpp> #include <utility> #include <algorithm> @@ -160,12 +158,12 @@ void verify_data(const std::string& name, ResultType my_result, const std::size_t my_diff_true = my_result.first - KE::begin(view_dest_true); const std::size_t my_diff_false = my_result.second - KE::begin(view_dest_false); - EXPECT_TRUE(std_diff_true == my_diff_true); - EXPECT_TRUE(std_diff_false == my_diff_false); + EXPECT_EQ(std_diff_true, my_diff_true); + EXPECT_EQ(std_diff_false, my_diff_false); auto view_dest_true_h = create_host_space_copy(view_dest_true); for (std::size_t i = 0; i < std_diff_true; ++i) { - EXPECT_TRUE(std_vec_true[i] == view_dest_true_h(i)); + EXPECT_EQ(std_vec_true[i], view_dest_true_h(i)); // std::cout << "i= " << i << " " // << " std_true = " << std_vec_true[i] << " " // << " mine = " << view_dest_true_h(i) << '\n'; @@ -173,45 +171,45 @@ void verify_data(const std::string& name, ResultType my_result, auto view_dest_false_h = create_host_space_copy(view_dest_false); for (std::size_t i = 0; i < std_diff_false; ++i) { - EXPECT_TRUE(std_vec_false[i] == view_dest_false_h(i)); + EXPECT_EQ(std_vec_false[i], view_dest_false_h(i)); // std::cout << "i= " << i << " " // << " std_false = " << std_vec_false[i] << " " // << " mine = " << view_dest_false_h(i) << '\n'; } if (name == "empty") { - EXPECT_TRUE(my_diff_true == 0); - EXPECT_TRUE(my_diff_false == 0); + EXPECT_EQ(my_diff_true, 0u); + EXPECT_EQ(my_diff_false, 0u); } else if (name == "one-element-a") { - EXPECT_TRUE(my_diff_true == 0); - EXPECT_TRUE(my_diff_false == 1); + EXPECT_EQ(my_diff_true, 0u); + EXPECT_EQ(my_diff_false, 1u); } else if (name == "one-element-b") { - EXPECT_TRUE(my_diff_true == 1); - EXPECT_TRUE(my_diff_false == 0); + EXPECT_EQ(my_diff_true, 1u); + EXPECT_EQ(my_diff_false, 0u); } else if (name == "two-elements-a") { - EXPECT_TRUE(my_diff_true == 1); - EXPECT_TRUE(my_diff_false == 1); + EXPECT_EQ(my_diff_true, 1u); + EXPECT_EQ(my_diff_false, 1u); } else if (name == "two-elements-b") { - EXPECT_TRUE(my_diff_true == 1); - EXPECT_TRUE(my_diff_false == 1); + EXPECT_EQ(my_diff_true, 1u); + EXPECT_EQ(my_diff_false, 1u); } else if (name == "small-b") { - EXPECT_TRUE(my_diff_true == 13); - EXPECT_TRUE(my_diff_false == 0); + EXPECT_EQ(my_diff_true, 13u); + EXPECT_EQ(my_diff_false, 0u); } else if (name == "small-c") { - EXPECT_TRUE(my_diff_true == 0); - EXPECT_TRUE(my_diff_false == 15); + EXPECT_EQ(my_diff_true, 0u); + EXPECT_EQ(my_diff_false, 15u); } } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp index 990d41ead6b5689aa90f29d95813c726f0799809..0d46151559031937fac59bf9e18effac9a4c6604 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp @@ -43,7 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_PartitioningOperations.hpp> namespace KE = Kokkos::Experimental; @@ -180,15 +179,15 @@ TEST_F(std_algorithms_partitioning_test, is_partitioned_trivial) { IsNegativeFunctor<value_type> p; const auto result1 = KE::is_partitioned(exespace(), KE::cbegin(m_static_view), KE::cbegin(m_static_view), p); - EXPECT_EQ(true, result1); + EXPECT_TRUE(result1); const auto result2 = KE::is_partitioned( exespace(), KE::cbegin(m_dynamic_view), KE::cbegin(m_dynamic_view), p); - EXPECT_EQ(true, result2); + EXPECT_TRUE(result2); const auto result3 = KE::is_partitioned( exespace(), KE::cbegin(m_strided_view), KE::cbegin(m_strided_view), p); - EXPECT_EQ(true, result3); + EXPECT_TRUE(result3); } TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_iterators) { diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp index 0cd931d87288848ef429805df1da23758c4e1085..8f345f044e437ca65ab9938f7da58423fe9e0ab4 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -147,12 +145,12 @@ void verify_data(ViewTypeData view_data_h, ViewTypeTest view_test, // check that returned iterators are correct const std::size_t std_diff = std_result - KE::begin(view_data_h); const std::size_t my_diff = my_result - KE::begin(view_test); - EXPECT_TRUE(std_diff == my_diff); + EXPECT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_test_h = create_host_space_copy(view_test); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_TRUE(view_test_h(i) == view_data_h[i]); + EXPECT_EQ(view_test_h(i), view_data_h[i]); // std::cout << "i= " << i << " " // << "mine: " << view_test_h(i) << " " // << "std: " << view_data_h(i) diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp index 0b2de8147c3751e34d1ba19734b2d53c28fd4225..bb7d0b52bd387d4d3e3237a63536f77ecde1f153 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -165,12 +163,12 @@ void verify_data(ViewFromType view_from, ViewDestType view_dest, // check that returned iterators are correct const std::size_t std_diff = std_result - gold_dest_std.begin(); const std::size_t my_diff = my_result - KE::begin(view_dest); - EXPECT_TRUE(std_diff == my_diff); + EXPECT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_dest_h = create_host_space_copy(view_dest); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_TRUE(view_dest_h(i) == gold_dest_std[i]); + EXPECT_EQ(view_dest_h(i), gold_dest_std[i]); // std::cout << "i= " << i << " " // << "mine: " << view_dest_h(i) << " " // << "std: " << gold_dest_std[i] diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp index 0c20b6b0a727b8163320e0694b4241fe313b1fd4..b209b88ea968cb672f76f1827742aa9bac22f415 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -149,12 +147,12 @@ void verify_data(ViewTypeFrom view_from, ViewTypeDest view_dest, // check that returned iterators are correct const std::size_t std_diff = std_result - gold_dest_std.begin(); const std::size_t my_diff = my_result - KE::begin(view_dest); - EXPECT_TRUE(std_diff == my_diff); + EXPECT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_dest_h = create_host_space_copy(view_dest); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_TRUE(view_dest_h(i) == gold_dest_std[i]); + EXPECT_EQ(view_dest_h(i), gold_dest_std[i]); // std::cout << "i= " << i << " " // << "mine: " << view_dest_h(i) << " " // << "std: " << gold_dest_std[i] diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp index 05c86690a28689619e4c83efd92afbe5400ad463..f1f232369b87f8757292d3d8c54fa55a700eb92d 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -142,12 +140,12 @@ void verify_data(ViewTypeData view_data_h, ViewTypeTest view_test, // check that returned iterators are correct const std::size_t std_diff = std_result - KE::begin(view_data_h); const std::size_t my_diff = my_result - KE::begin(view_test); - EXPECT_TRUE(std_diff == my_diff); + EXPECT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_test_h = create_host_space_copy(view_test); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_TRUE(view_test_h(i) == view_data_h[i]); + EXPECT_EQ(view_test_h(i), view_data_h[i]); // std::cout << "i= " << i << " " // << "mine: " << view_test_h(i) << " " // << "std: " << view_data_h(i) diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp index 14ee73376957c3caa272bae1814a958a5a701bba..f044d975a7ce47402b6a4bb92f94158e4a86d4e9 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -134,30 +132,30 @@ void verify_data(const std::string& name, ViewType1 test_view, } else if (name == "one-element-a") { - EXPECT_TRUE(view_h(0) == ValueType{1}); + EXPECT_EQ(view_h(0), ValueType{1}); } else if (name == "one-element-b") { - EXPECT_TRUE(view_h(0) == new_value); + EXPECT_EQ(view_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_TRUE(view_h(0) == ValueType{1}); - EXPECT_TRUE(view_h(1) == new_value); + EXPECT_EQ(view_h(0), ValueType{1}); + EXPECT_EQ(view_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_TRUE(view_h(0) == new_value); - EXPECT_TRUE(view_h(1) == ValueType{-1}); + EXPECT_EQ(view_h(0), new_value); + EXPECT_EQ(view_h(1), ValueType{-1}); } else if (name == "small-a") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_TRUE(view_h(i) == new_value); + EXPECT_EQ(view_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast<ValueType>(i + 1); - EXPECT_TRUE(view_h(i) == gold); + EXPECT_EQ(view_h(i), gold); } } } @@ -165,9 +163,9 @@ void verify_data(const std::string& name, ViewType1 test_view, else if (name == "small-b") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i < 4) { - EXPECT_TRUE(view_h(i) == ValueType{-1}); + EXPECT_EQ(view_h(i), ValueType{-1}); } else { - EXPECT_TRUE(view_h(i) == new_value); + EXPECT_EQ(view_h(i), new_value); } } } @@ -175,9 +173,9 @@ void verify_data(const std::string& name, ViewType1 test_view, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_TRUE(view_h(i) == ValueType{-1}); + EXPECT_EQ(view_h(i), ValueType{-1}); } else { - EXPECT_TRUE(view_h(i) == new_value); + EXPECT_EQ(view_h(i), new_value); } } } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp index 1e7f48067f57697252959efb7d67a597c9f2e0d0..682622cc13cd97e53e4a710c17d1cde89cc695ea 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -142,40 +140,40 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_TRUE(view_from_h(0) == ValueType{1}); - EXPECT_TRUE(view_test_h(0) == view_from_h(0)); + EXPECT_EQ(view_from_h(0), ValueType{1}); + EXPECT_EQ(view_test_h(0), view_from_h(0)); } else if (name == "one-element-b") { - EXPECT_TRUE(view_from_h(0) == ValueType{2}); - EXPECT_TRUE(view_test_h(0) == new_value); + EXPECT_EQ(view_from_h(0), ValueType{2}); + EXPECT_EQ(view_test_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_TRUE(view_from_h(0) == ValueType{1}); - EXPECT_TRUE(view_from_h(1) == ValueType{2}); + EXPECT_EQ(view_from_h(0), ValueType{1}); + EXPECT_EQ(view_from_h(1), ValueType{2}); - EXPECT_TRUE(view_test_h(0) == view_from_h(0)); - EXPECT_TRUE(view_test_h(1) == new_value); + EXPECT_EQ(view_test_h(0), view_from_h(0)); + EXPECT_EQ(view_test_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_TRUE(view_from_h(0) == ValueType{2}); - EXPECT_TRUE(view_from_h(1) == ValueType{-1}); + EXPECT_EQ(view_from_h(0), ValueType{2}); + EXPECT_EQ(view_from_h(1), ValueType{-1}); - EXPECT_TRUE(view_test_h(0) == new_value); - EXPECT_TRUE(view_test_h(1) == view_from_h(1)); + EXPECT_EQ(view_test_h(0), new_value); + EXPECT_EQ(view_test_h(1), view_from_h(1)); } else if (name == "small-a") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_TRUE(view_from_h(i) == ValueType{2}); - EXPECT_TRUE(view_test_h(i) == new_value); + EXPECT_EQ(view_from_h(i), ValueType{2}); + EXPECT_EQ(view_test_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast<ValueType>(i + 1); - EXPECT_TRUE(view_from_h(i) == gold); - EXPECT_TRUE(view_test_h(i) == gold); + EXPECT_EQ(view_from_h(i), gold); + EXPECT_EQ(view_test_h(i), gold); } } } @@ -183,11 +181,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "small-b") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i < 4) { - EXPECT_TRUE(view_from_h(i) == ValueType{-1}); - EXPECT_TRUE(view_test_h(i) == view_from_h(i)); + EXPECT_EQ(view_from_h(i), ValueType{-1}); + EXPECT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_TRUE(view_from_h(i) == ValueType{2}); - EXPECT_TRUE(view_test_h(i) == new_value); + EXPECT_EQ(view_from_h(i), ValueType{2}); + EXPECT_EQ(view_test_h(i), new_value); } } } @@ -195,11 +193,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_TRUE(view_from_h(i) == ValueType{-1}); - EXPECT_TRUE(view_test_h(i) == view_from_h(i)); + EXPECT_EQ(view_from_h(i), ValueType{-1}); + EXPECT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_TRUE(view_from_h(i) == ValueType{2}); - EXPECT_TRUE(view_test_h(i) == new_value); + EXPECT_EQ(view_from_h(i), ValueType{2}); + EXPECT_EQ(view_test_h(i), new_value); } } } @@ -232,7 +230,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::replace_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -245,7 +243,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -257,7 +255,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy(exespace(), view_from, view_dest, old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -269,7 +267,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy("label", exespace(), view_from, view_dest, old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp index cb98aac08c2cb28351ea2dfd8dc56f9229d48f4d..c2ba66e920da8dd61a091fe3d842427c352a2ce7 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -142,40 +140,40 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_TRUE(view_from_h(0) == ValueType{1}); - EXPECT_TRUE(view_test_h(0) == view_from_h(0)); + EXPECT_EQ(view_from_h(0), ValueType{1}); + EXPECT_EQ(view_test_h(0), view_from_h(0)); } else if (name == "one-element-b") { - EXPECT_TRUE(view_from_h(0) == ValueType{2}); - EXPECT_TRUE(view_test_h(0) == new_value); + EXPECT_EQ(view_from_h(0), ValueType{2}); + EXPECT_EQ(view_test_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_TRUE(view_from_h(0) == ValueType{1}); - EXPECT_TRUE(view_from_h(1) == ValueType{2}); + EXPECT_EQ(view_from_h(0), ValueType{1}); + EXPECT_EQ(view_from_h(1), ValueType{2}); - EXPECT_TRUE(view_test_h(0) == view_from_h(0)); - EXPECT_TRUE(view_test_h(1) == new_value); + EXPECT_EQ(view_test_h(0), view_from_h(0)); + EXPECT_EQ(view_test_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_TRUE(view_from_h(0) == ValueType{2}); - EXPECT_TRUE(view_from_h(1) == ValueType{-1}); + EXPECT_EQ(view_from_h(0), ValueType{2}); + EXPECT_EQ(view_from_h(1), ValueType{-1}); - EXPECT_TRUE(view_test_h(0) == new_value); - EXPECT_TRUE(view_test_h(1) == view_from_h(1)); + EXPECT_EQ(view_test_h(0), new_value); + EXPECT_EQ(view_test_h(1), view_from_h(1)); } else if (name == "small-a") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_TRUE(view_from_h(i) == ValueType{2}); - EXPECT_TRUE(view_test_h(i) == new_value); + EXPECT_EQ(view_from_h(i), ValueType{2}); + EXPECT_EQ(view_test_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast<ValueType>(i + 1); - EXPECT_TRUE(view_from_h(i) == gold); - EXPECT_TRUE(view_test_h(i) == gold); + EXPECT_EQ(view_from_h(i), gold); + EXPECT_EQ(view_test_h(i), gold); } } } @@ -183,11 +181,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "small-b") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i < 4) { - EXPECT_TRUE(view_from_h(i) == ValueType{-1}); - EXPECT_TRUE(view_test_h(i) == view_from_h(i)); + EXPECT_EQ(view_from_h(i), ValueType{-1}); + EXPECT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_TRUE(view_from_h(i) == ValueType{2}); - EXPECT_TRUE(view_test_h(i) == new_value); + EXPECT_EQ(view_from_h(i), ValueType{2}); + EXPECT_EQ(view_test_h(i), new_value); } } } @@ -195,11 +193,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_TRUE(view_from_h(i) == ValueType{-1}); - EXPECT_TRUE(view_test_h(i) == view_from_h(i)); + EXPECT_EQ(view_from_h(i), ValueType{-1}); + EXPECT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_TRUE(view_from_h(i) == ValueType{2}); - EXPECT_TRUE(view_test_h(i) == new_value); + EXPECT_EQ(view_from_h(i), ValueType{2}); + EXPECT_EQ(view_test_h(i), new_value); } } } @@ -239,7 +237,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -250,7 +248,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -260,7 +258,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy_if(exespace(), view_from, view_dest, pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -270,7 +268,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy_if("label", exespace(), view_from, view_dest, pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp index 8c4d04889f60c0ee69da63b9db5271346b2520d2..7237e29555afbda7e595cdfbb7f8750428942c5d 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -168,7 +166,7 @@ void verify_data(ViewType1 data_view, // contains data // << data_view_dc(i) << " " // << data_view_h(i) << " " // << test_view_h(i) << std::endl; - EXPECT_TRUE(data_view_h(i) == test_view_h(i)); + EXPECT_EQ(data_view_h(i), test_view_h(i)); } } } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp index 77c80ed02fb629cb8c01bd0763b82c370d8b99f0..f8c81dc105a8986791e79316f3b36fc5bca92c35 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -107,7 +105,7 @@ void verify_data(ViewType1 test_view, ViewType2 orig_view) { const std::size_t ext = test_view.extent(0); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_TRUE(tv_h(i) == ov_h(ext - i - 1)); + EXPECT_EQ(tv_h(i), ov_h(ext - i - 1)); } } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp index 49d40115c9c040a6a58803f62c891c3b900bc5c8..bbf273970efe23d61f960c963369740ba91e5890 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -166,13 +164,13 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = result_it - KE::begin(view); const auto std_diff = std_rit - KE::begin(data_view_host); - EXPECT_TRUE(my_diff == std_diff); + EXPECT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); const std::size_t ext = view_h.extent(0); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_TRUE(view_h(i) == data_view_host[i]); + EXPECT_EQ(view_h(i), data_view_host[i]); // std::cout << "i= " << i << " " // << "mine: " << view_h(i) << " " // << "std: " << data_view_host(i) diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp index 02867478da181a3bd2a2bf82f22e03a0621c89a5..2012fde00860636d084cf096471589eae8a25751 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -169,7 +167,7 @@ void verify_data(ViewTypeFrom view_from, ViewTypeTest view_test, std_gold_h.begin()); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_TRUE(view_test_h(i) == std_gold_h[i]); + EXPECT_EQ(view_test_h(i), std_gold_h[i]); // std::cout << "i= " << i << " " // << "from: " << view_from_h(i) << " " // << "mine: " << view_test_h(i) << " " @@ -207,7 +205,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy(exespace(), KE::cbegin(view_from), n_it, KE::cend(view_from), KE::begin(view_dest)); verify_data(view_from, view_dest, rotation_point); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -217,7 +215,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy("label", exespace(), KE::cbegin(view_from), n_it, KE::cend(view_from), KE::begin(view_dest)); verify_data(view_from, view_dest, rotation_point); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -226,7 +224,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy(exespace(), view_from, rotation_point, view_dest); verify_data(view_from, view_dest, rotation_point); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -235,7 +233,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy("label", exespace(), view_from, rotation_point, view_dest); verify_data(view_from, view_dest, rotation_point); - EXPECT_TRUE(rit == (KE::begin(view_dest) + view_ext)); + EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsScalarRedVsView.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsScalarRedVsView.cpp deleted file mode 100644 index c054dfcc1013c15c856d56f14bf0866f22749252..0000000000000000000000000000000000000000 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsScalarRedVsView.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> -#include <TestStdAlgorithmsHelperFunctors.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_MinMaxElementOperations.hpp> - -namespace KE = Kokkos::Experimental; - -namespace Test { -namespace stdalgos { - -template <class ViewType> -void fill_view(ViewType dest_view) { - using value_type = typename ViewType::value_type; - using exe_space = typename ViewType::execution_space; - using aux_view_t = Kokkos::View<value_type*, exe_space>; - - const std::size_t ext = dest_view.extent(0); - aux_view_t aux_view("aux_view", ext); - auto v_h = create_mirror_view(Kokkos::HostSpace(), aux_view); - - for (std::size_t i = 0; i < ext; ++i) { - v_h(i) = (value_type)i; - } - v_h(ext / 2) = (value_type)-101; - - Kokkos::deep_copy(aux_view, v_h); - CopyFunctor<aux_view_t, ViewType> F1(aux_view, dest_view); - Kokkos::parallel_for("copy", dest_view.extent(0), F1); -} - -template <class ViewType, class IndexType, class ReducerType> -struct MyFunctor { - using red_value_type = typename ReducerType::value_type; - - ViewType m_view; - ReducerType m_reducer; - - KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { - m_reducer.join(red_value, red_value_type{m_view(i), i}); - } - - KOKKOS_FUNCTION - MyFunctor(ViewType view, ReducerType reducer) - : m_view(view), m_reducer(std::move(reducer)) {} -}; - -TEST(scalar_vs_view_red, use_scalar) { - using exe_space = Kokkos::DefaultExecutionSpace; - using index_type = int; - using scalar_type = int; - using view_type = Kokkos::View<scalar_type*, exe_space>; - - const auto ext = 10001; - view_type view("myview", ext); - fill_view(view); - - using reducer_type = ::Kokkos::MinLoc<scalar_type, index_type>; - using red_result_type = typename reducer_type::value_type; - using func_type = MyFunctor<view_type, index_type, reducer_type>; - red_result_type result; - reducer_type reducer(result); - Kokkos::parallel_reduce("MinLocReduce", - Kokkos::RangePolicy<exe_space>(exe_space(), 0, ext), - func_type(view, reducer), reducer); - std::cout << " use_scalar = " << result.val << '\n'; -} - -template <class IteratorType, class ReducerType> -struct StdMyMinFunctor { - using index_type = typename IteratorType::difference_type; - using red_value_type = typename ReducerType::value_type; - - IteratorType m_first; - ReducerType m_reducer; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& red_value) const { - m_reducer.join(red_value, red_value_type{m_first[i], i}); - } - - KOKKOS_FUNCTION - StdMyMinFunctor(IteratorType first, ReducerType reducer) - : m_first(std::move(first)), m_reducer(std::move(reducer)) {} -}; - -template <class ViewType, class ReducerType> -struct StdMyMinFunctor2 { - using red_value_type = typename ReducerType::value_type; - - ViewType m_view; - ReducerType m_reducer; - - KOKKOS_FUNCTION - void operator()(const std::size_t i, red_value_type& red_value) const { - m_reducer.join(red_value, red_value_type{m_view(i), i}); - } - - KOKKOS_FUNCTION - StdMyMinFunctor2(ViewType viewIn, ReducerType reducer) - : m_view(viewIn), m_reducer(std::move(reducer)) {} -}; - -template <class ExecutionSpace, class IteratorType> -IteratorType my_min_1(const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - using index_type = typename IteratorType::difference_type; - using value_type = typename IteratorType::value_type; - using reducer_type = - Kokkos::MinFirstLoc<value_type, index_type, ExecutionSpace>; - using result_view_type = typename reducer_type::result_view_type; - using func_t = StdMyMinFunctor<IteratorType, reducer_type>; - - result_view_type result("min_or_max_elem_impl_result"); - reducer_type reducer(result); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce( - "label", Kokkos::RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, reducer), reducer); - const auto result_h = - ::Kokkos::create_mirror_view_and_copy(::Kokkos::HostSpace(), result); - return first + result_h().loc; -} - -template <class ExecutionSpace, class IteratorType> -IteratorType my_min_2(const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - using index_type = typename IteratorType::difference_type; - using value_type = typename IteratorType::value_type; - using reducer_type = Kokkos::MinFirstLoc<value_type, index_type>; - using result_type = typename reducer_type::value_type; - using func_t = StdMyMinFunctor<IteratorType, reducer_type>; - - result_type result; - reducer_type reducer(result); - const auto num_elements = Kokkos::Experimental::distance(first, last); - ::Kokkos::parallel_reduce( - "label", Kokkos::RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(first, reducer), reducer); - return first + result.loc; -} - -template <class ExecutionSpace, class ViewType> -std::size_t my_min_3(const ExecutionSpace& ex, ViewType view) { - using index_type = std::size_t; - using value_type = typename ViewType::value_type; - using reducer_type = Kokkos::MinFirstLoc<value_type, index_type>; - using result_type = typename reducer_type::value_type; - using func_t = StdMyMinFunctor2<ViewType, reducer_type>; - - result_type result; - reducer_type reducer(result); - const auto num_elements = view.extent(0); - ::Kokkos::parallel_reduce( - "label", Kokkos::RangePolicy<ExecutionSpace>(ex, 0, num_elements), - func_t(view, reducer), reducer); - return result.loc; -} - -TEST(scalar_vs_view_red, my_min_it_use_result_view) { - using exe_space = Kokkos::DefaultExecutionSpace; - using view_type = Kokkos::View<int*, exe_space>; - view_type view("myview", 10001); - fill_view(view); - - auto rit = my_min_1(exe_space(), KE::cbegin(view), KE::cend(view)); - std::cout << " my_min_el = " << KE::distance(KE::cbegin(view), rit) << '\n'; -} - -TEST(scalar_vs_view_red, my_min_no_it_use_result_scalar) { - using exe_space = Kokkos::DefaultExecutionSpace; - using view_type = Kokkos::View<int*, exe_space>; - view_type view("myview", 10001); - fill_view(view); - - auto ind = my_min_3(exe_space(), view); - std::cout << " my_min_el = " << ind << '\n'; -} - -TEST(scalar_vs_view_red, my_min_it_use_result_scalar) { - using exe_space = Kokkos::DefaultExecutionSpace; - using view_type = Kokkos::View<int*, exe_space>; - view_type view("myview", 10001); - fill_view(view); - - auto rit = my_min_2(exe_space(), KE::cbegin(view), KE::cend(view)); - std::cout << " my_min_el = " << KE::distance(KE::cbegin(view), rit) << '\n'; -} - -} // namespace stdalgos -} // namespace Test diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp index 20e93e9648696b1117ef19730f2486b45b124fe0..e57385a8be6d17db10178f343bfe7026d199633b 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -289,7 +287,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { @@ -298,21 +296,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::search(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::search("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp index 4d54166353b5cb388e6485e8cf2990d873c52d58..31446046a597d2593fa20e106975ca06d141abd2 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -233,26 +231,26 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t count, auto myrit = KE::search_n(exespace(), KE::cbegin(view), KE::cend(view), count, value, args...); const auto mydiff = myrit - KE::cbegin(view); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n("label", exespace(), KE::cbegin(view), KE::cend(view), count, value, args...); const auto mydiff = myrit - KE::cbegin(view); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n("label", exespace(), view, count, value, args...); const auto mydiff = myrit - KE::begin(view); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n(exespace(), view, count, value, args...); const auto mydiff = myrit - KE::begin(view); - EXPECT_TRUE(mydiff == stddiff); + EXPECT_EQ(mydiff, stddiff); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp index 2af0b438ae7832cb227b0a246c30e39a776118ff..0c97f255e9cc8552757652e2d40e5f45f921aaeb 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -133,12 +131,12 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = result_it - KE::begin(view); const auto std_diff = std_rit - KE::begin(data_view_host); - EXPECT_TRUE(my_diff == std_diff); + EXPECT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); for (std::size_t i = 0; i < (std::size_t)my_diff; ++i) { - EXPECT_TRUE(view_h(i) == data_view_host[i]); + EXPECT_EQ(view_h(i), data_view_host[i]); // std::cout << "i= " << i << " " // << "mine: " << view_h(i) << " " // << "std: " << data_view_host(i) diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp index ae85e5c6d45c9444ae7d1d18fb5a364ba198eec7..d8aa350f1dd822cf8cd7a6de7ee5d879757ca270 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -131,14 +129,14 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = KE::end(view) - result_it; const auto std_diff = KE::end(data_view_host) - std_rit; - EXPECT_TRUE(my_diff == std_diff); + EXPECT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); auto it1 = KE::cbegin(view_h); auto it2 = KE::cbegin(data_view_host); for (std::size_t i = 0; i < (std::size_t)my_diff; ++i) { - EXPECT_TRUE(it1[i] == it2[i]); + EXPECT_EQ(it1[i], it2[i]); // std::cout << "i= " << i << " " // << "mine: " << it1[i] << " " // << "std: " << it2[i] diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp index e7785734433249a81543ae9cc5a1d168d694a8e4..e415eff06cff08a3bacf3e5c02e3f0564b83f4cb 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_Numeric.hpp> #include <utility> namespace Test { @@ -195,7 +193,7 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same<gold_view_value_type, int>::value) { - EXPECT_TRUE(gold_h(i) == test_view_h(i)); + EXPECT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(gold_h(i) - test_view_h(i)); if (error > 1e-10) { @@ -203,7 +201,7 @@ void verify_data(ViewType1 data_view, // contains data << " " << gold_h(i) << " " << test_view_h(i) << " " << std::abs(gold_h(i) - test_view_h(i)) << std::endl; } - EXPECT_TRUE(error < 1e-10); + EXPECT_LT(error, 1e-10); } } // std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) << @@ -223,12 +221,6 @@ struct SumBinaryFunctor { ValueType operator()(const ValueType& a, const ValueType& b) const { return (a + b); } - - KOKKOS_INLINE_FUNCTION - ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return (a + b); - } }; std::string value_type_to_string(int) { return "int"; } @@ -257,7 +249,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, auto r = KE::transform_exclusive_scan( exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop, uop); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -266,7 +258,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, auto r = KE::transform_exclusive_scan( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop, uop); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -274,7 +266,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, fill_zero(view_dest); auto r = KE::transform_exclusive_scan(exespace(), view_from, view_dest, init_value, bop, uop); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -282,7 +274,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, fill_zero(view_dest); auto r = KE::transform_exclusive_scan("label", exespace(), view_from, view_dest, init_value, bop, uop); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -306,7 +298,7 @@ void run_all_scenarios() { } } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan) { run_all_scenarios<DynamicTag, double>(); run_all_scenarios<StridedThreeTag, double>(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp index a30d6737931e8797bd0b2d34f6faea35afe75fc9..21ce01fb10428cf16af5c7576c7503fba6e699a1 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_Numeric.hpp> #include <utility> namespace Test { @@ -207,7 +205,7 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same<gold_view_value_type, int>::value) { - EXPECT_TRUE(gold_h(i) == test_view_h(i)); + EXPECT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(gold_h(i) - test_view_h(i)); if (error > 1e-10) { @@ -215,7 +213,7 @@ void verify_data(ViewType1 data_view, // contains data << " " << gold_h(i) << " " << test_view_h(i) << " " << std::abs(gold_h(i) - test_view_h(i)) << std::endl; } - EXPECT_TRUE(error < 1e-10); + EXPECT_LT(error, 1e-10); } } // std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) << @@ -235,12 +233,6 @@ struct SumBinaryFunctor { ValueType operator()(const ValueType& a, const ValueType& b) const { return (a + b); } - - KOKKOS_INLINE_FUNCTION - ValueType operator()(const volatile ValueType& a, - const volatile ValueType& b) const { - return (a + b); - } }; std::string value_type_to_string(int) { return "int"; } @@ -282,7 +274,7 @@ void run_single_scenario(const InfoType& scenario_info, auto r = KE::transform_inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -291,7 +283,7 @@ void run_single_scenario(const InfoType& scenario_info, auto r = KE::transform_inclusive_scan( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -299,7 +291,7 @@ void run_single_scenario(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::transform_inclusive_scan(exespace(), view_from, view_dest, args...); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -307,7 +299,7 @@ void run_single_scenario(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::transform_inclusive_scan("label", exespace(), view_from, view_dest, args...); - EXPECT_TRUE(r == KE::end(view_dest)); + EXPECT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -333,7 +325,7 @@ void run_all_scenarios() { } } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#if !defined KOKKOS_ENABLE_OPENMPTARGET TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan) { run_all_scenarios<DynamicTag, double>(); // run_all_scenarios<StridedThreeTag, double>(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp index 7adfc10c3e43830c5a4b1858630975bbca1b153b..35c293adcfff0947910ef09b633c15fbb9bb1bc3 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -88,7 +86,7 @@ void verify_data(ViewTypeFrom view_from, ViewTypeTest view_test) { create_mirror_view_and_copy(Kokkos::HostSpace(), view_from_dc); for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { - EXPECT_TRUE(view_test_h(i) == view_from_h(i) + value_type(1)); + EXPECT_EQ(view_test_h(i), view_from_h(i) + value_type(1)); } } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp index a13ba8240c73e822197b1871d5a4d80cf2068370..88dd4d259926c5f078188d2974c920c57249f508 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> #include <algorithm> @@ -187,7 +185,7 @@ void verify_data(const std::string& name, ResultIt my_result_it, // const auto std_diff = (std::size_t)(std_r - KE::begin(data_v_h)); const auto my_diff = (std::size_t)(my_result_it - KE::begin(view_test)); - EXPECT_TRUE(my_diff == std_diff); + EXPECT_EQ(my_diff, std_diff); // // check the data in the view @@ -200,14 +198,14 @@ void verify_data(const std::string& name, ResultIt my_result_it, // << " my = " << view_test_h(i) << " " // << " std = " << data_v_h(i) // << '\n'; - EXPECT_TRUE(view_test_h(i) == data_v_h(i)); + EXPECT_EQ(view_test_h(i), data_v_h(i)); } if (name == "medium-b") { using value_type = typename ViewType1::value_type; - EXPECT_TRUE(my_diff == (std::size_t)2); - EXPECT_TRUE(view_test_h(0) == (value_type)22); - EXPECT_TRUE(view_test_h(1) == (value_type)44); + EXPECT_EQ(my_diff, (std::size_t)2); + EXPECT_EQ(view_test_h(0), (value_type)22); + EXPECT_EQ(view_test_h(1), (value_type)44); } } diff --git a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp index 64f205b28e7c6db929054b63049c1f6588879032..fdede951701de39c0e55635667b90688e8fb867a 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp @@ -43,8 +43,6 @@ */ #include <TestStdAlgorithmsCommon.hpp> -#include <std_algorithms/Kokkos_BeginEnd.hpp> -#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include <utility> namespace Test { @@ -204,51 +202,51 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(1)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(1)); } else if (name == "one-element-b") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(2)); } else if (name == "two-elements-a") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(1)); - EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(1)); + EXPECT_EQ(view_test_h(1), static_cast<value_type>(2)); } else if (name == "two-elements-b") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(2)); - EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(-1)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(1), static_cast<value_type>(-1)); } else if (name == "small-a") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(1)); - EXPECT_TRUE(view_test_h(2) == static_cast<value_type>(2)); - EXPECT_TRUE(view_test_h(3) == static_cast<value_type>(3)); - EXPECT_TRUE(view_test_h(4) == static_cast<value_type>(4)); - EXPECT_TRUE(view_test_h(5) == static_cast<value_type>(5)); - EXPECT_TRUE(view_test_h(6) == static_cast<value_type>(6)); - EXPECT_TRUE(view_test_h(7) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(8) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(9) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(10) == static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(1), static_cast<value_type>(1)); + EXPECT_EQ(view_test_h(2), static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(3), static_cast<value_type>(3)); + EXPECT_EQ(view_test_h(4), static_cast<value_type>(4)); + EXPECT_EQ(view_test_h(5), static_cast<value_type>(5)); + EXPECT_EQ(view_test_h(6), static_cast<value_type>(6)); + EXPECT_EQ(view_test_h(7), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(8), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(9), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(10), static_cast<value_type>(0)); } else if (name == "small-b") { - EXPECT_TRUE(view_test_h(0) == static_cast<value_type>(1)); - EXPECT_TRUE(view_test_h(1) == static_cast<value_type>(2)); - EXPECT_TRUE(view_test_h(2) == static_cast<value_type>(3)); - EXPECT_TRUE(view_test_h(3) == static_cast<value_type>(4)); - EXPECT_TRUE(view_test_h(4) == static_cast<value_type>(5)); - EXPECT_TRUE(view_test_h(5) == static_cast<value_type>(6)); - EXPECT_TRUE(view_test_h(6) == static_cast<value_type>(8)); - EXPECT_TRUE(view_test_h(7) == static_cast<value_type>(9)); - EXPECT_TRUE(view_test_h(8) == static_cast<value_type>(8)); - EXPECT_TRUE(view_test_h(9) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(10) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(11) == static_cast<value_type>(0)); - EXPECT_TRUE(view_test_h(12) == static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(0), static_cast<value_type>(1)); + EXPECT_EQ(view_test_h(1), static_cast<value_type>(2)); + EXPECT_EQ(view_test_h(2), static_cast<value_type>(3)); + EXPECT_EQ(view_test_h(3), static_cast<value_type>(4)); + EXPECT_EQ(view_test_h(4), static_cast<value_type>(5)); + EXPECT_EQ(view_test_h(5), static_cast<value_type>(6)); + EXPECT_EQ(view_test_h(6), static_cast<value_type>(8)); + EXPECT_EQ(view_test_h(7), static_cast<value_type>(9)); + EXPECT_EQ(view_test_h(8), static_cast<value_type>(8)); + EXPECT_EQ(view_test_h(9), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(10), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(11), static_cast<value_type>(0)); + EXPECT_EQ(view_test_h(12), static_cast<value_type>(0)); } else if (name == "medium" || name == "large") { @@ -260,7 +258,7 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, (void)std_r; for (std::size_t i = 0; i < view_from_h.extent(0); ++i) { - EXPECT_TRUE(view_test_h(i) == tmp[i]); + EXPECT_EQ(view_test_h(i), tmp[i]); } } @@ -303,7 +301,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { KE::unique_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); verify_data(name, view_from, view_dest, args...); - EXPECT_TRUE(rit == (KE::begin(view_dest) + n)); + EXPECT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -313,7 +311,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { KE::unique_copy("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); verify_data(name, view_from, view_dest, args...); - EXPECT_TRUE(rit == (KE::begin(view_dest) + n)); + EXPECT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -321,7 +319,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { create_view<ValueType>(Tag{}, view_ext, "unique_copy_dest"); auto rit = KE::unique_copy(exespace(), view_from, view_dest, args...); verify_data(name, view_from, view_dest, args...); - EXPECT_TRUE(rit == (KE::begin(view_dest) + n)); + EXPECT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -330,7 +328,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { auto rit = KE::unique_copy("label", exespace(), view_from, view_dest, args...); verify_data(name, view_from, view_dest, args...); - EXPECT_TRUE(rit == (KE::begin(view_dest) + n)); + EXPECT_EQ(rit, (KE::begin(view_dest) + n)); } Kokkos::fence(); diff --git a/packages/kokkos/algorithms/unit_tests/TestStdReducers.cpp b/packages/kokkos/algorithms/unit_tests/TestStdReducers.cpp index 3ed2ecd839b4d2fa9088f079b98e767976bd8781..a88860749c8a8b8ff41a2b62b09ca596ef96aabb 100644 --- a/packages/kokkos/algorithms/unit_tests/TestStdReducers.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestStdReducers.cpp @@ -115,9 +115,9 @@ auto run_min_or_max_test(ViewType view, StdReducersTestEnumOrder enValue) { << "\n"; using view_value_type = typename ViewType::value_type; - using reducer_type = typename std::conditional< + using reducer_type = std::conditional_t< (flag == 0), Kokkos::MaxFirstLoc<view_value_type, IndexType, ExeSpace>, - Kokkos::MinFirstLoc<view_value_type, IndexType, ExeSpace> >::type; + Kokkos::MinFirstLoc<view_value_type, IndexType, ExeSpace> >; using reduction_value_type = typename reducer_type::value_type; reduction_value_type red_result; diff --git a/packages/kokkos/appveyor.yml b/packages/kokkos/appveyor.yml index 73a0d3187596be4d4c99ef9f211b93bd0659079e..e63fec718ae74e3c935de791e2ce099ab12ccca8 100644 --- a/packages/kokkos/appveyor.yml +++ b/packages/kokkos/appveyor.yml @@ -5,6 +5,6 @@ build_script: - cmd: >- mkdir build && cd build && - cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_3=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF && + cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_3=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF -DKokkos_ARCH_NATIVE=ON && cmake --build . --target install && ctest -C Debug --output-on-failure diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench.hpp b/packages/kokkos/benchmarks/bytes_and_flops/bench.hpp index 4fc6ca2c68b3a77e37360b90b678ae5c461204f6..be190e868ea06ac60f57a30ca29cb9aa71e6f97f 100644 --- a/packages/kokkos/benchmarks/bytes_and_flops/bench.hpp +++ b/packages/kokkos/benchmarks/bytes_and_flops/bench.hpp @@ -47,20 +47,21 @@ template <class Scalar, int Unroll, int Stride> struct Run { - static void run(int N, int K, int R, int F, int T, int S); + static void run(int N, int K, int R, int F, int T, int S, int B, int I); }; template <class Scalar, int Stride> struct RunStride { - static void run_1(int N, int K, int R, int F, int T, int S); - static void run_2(int N, int K, int R, int F, int T, int S); - static void run_3(int N, int K, int R, int F, int T, int S); - static void run_4(int N, int K, int R, int F, int T, int S); - static void run_5(int N, int K, int R, int F, int T, int S); - static void run_6(int N, int K, int R, int F, int T, int S); - static void run_7(int N, int K, int R, int F, int T, int S); - static void run_8(int N, int K, int R, int F, int T, int S); - static void run(int N, int K, int R, int U, int F, int T, int S); + static void run_1(int N, int K, int R, int F, int T, int S, int B, int I); + static void run_2(int N, int K, int R, int F, int T, int S, int B, int I); + static void run_3(int N, int K, int R, int F, int T, int S, int B, int I); + static void run_4(int N, int K, int R, int F, int T, int S, int B, int I); + static void run_5(int N, int K, int R, int F, int T, int S, int B, int I); + static void run_6(int N, int K, int R, int F, int T, int S, int B, int I); + static void run_7(int N, int K, int R, int F, int T, int S, int B, int I); + static void run_8(int N, int K, int R, int F, int T, int S, int B, int I); + static void run(int N, int K, int R, int U, int F, int T, int S, int B, + int I); }; #define STRIDE 1 @@ -83,11 +84,12 @@ struct RunStride { #undef STRIDE template <class Scalar> -void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) { - if (D == 1) RunStride<Scalar, 1>::run(N, K, R, U, F, T, S); - if (D == 2) RunStride<Scalar, 2>::run(N, K, R, U, F, T, S); - if (D == 4) RunStride<Scalar, 4>::run(N, K, R, U, F, T, S); - if (D == 8) RunStride<Scalar, 8>::run(N, K, R, U, F, T, S); - if (D == 16) RunStride<Scalar, 16>::run(N, K, R, U, F, T, S); - if (D == 32) RunStride<Scalar, 32>::run(N, K, R, U, F, T, S); +void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S, + int B, int I) { + if (D == 1) RunStride<Scalar, 1>::run(N, K, R, U, F, T, S, B, I); + if (D == 2) RunStride<Scalar, 2>::run(N, K, R, U, F, T, S, B, I); + if (D == 4) RunStride<Scalar, 4>::run(N, K, R, U, F, T, S, B, I); + if (D == 8) RunStride<Scalar, 8>::run(N, K, R, U, F, T, S, B, I); + if (D == 16) RunStride<Scalar, 16>::run(N, K, R, U, F, T, S, B, I); + if (D == 32) RunStride<Scalar, 32>::run(N, K, R, U, F, T, S, B, I); } diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_double.cpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_double.cpp index fb852377741a70c92e3f24baaf698431fa536fb7..73ad21b05cee90d8e55a0a1aa8af4cf5ef42812f 100644 --- a/packages/kokkos/benchmarks/bytes_and_flops/bench_double.cpp +++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_double.cpp @@ -45,4 +45,4 @@ #include <bench.hpp> template void run_stride_unroll<double>(int N, int K, int R, int D, int U, - int F, int T, int S); + int F, int T, int S, int B, int I); diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_float.cpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_float.cpp index 1df7a78c2ab1b7fd2fc381bffe7d2c5f16f06c31..3964df3fa8e15fd9274b208de40dfc085632a3b2 100644 --- a/packages/kokkos/benchmarks/bytes_and_flops/bench_float.cpp +++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_float.cpp @@ -45,4 +45,4 @@ #include <bench.hpp> template void run_stride_unroll<float>(int N, int K, int R, int D, int U, int F, - int T, int S); + int T, int S, int B, int I); diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp index 35fe4db376984871dca341ec585378b65339d500..d63c3080775c5c9d5bc39e3416fc783a3270749d 100644 --- a/packages/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp +++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp @@ -45,4 +45,4 @@ #include <bench.hpp> template void run_stride_unroll<int32_t>(int N, int K, int R, int D, int U, - int F, int T, int S); + int F, int T, int S, int B, int I); diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp index 3ce15027b7ddf99bca3a3c290a4c9aca64295417..51a31b16f0a88187e725fced27ff05baa367d318 100644 --- a/packages/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp +++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp @@ -45,4 +45,4 @@ #include <bench.hpp> template void run_stride_unroll<int64_t>(int N, int K, int R, int D, int U, - int F, int T, int S); + int F, int T, int S, int B, int I); diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp index 64817fe9dc4b5d1efcad168d9b2e0915da6d492a..c29f2a18c34bfc66ae04762e123fcb0f0b6d0931 100644 --- a/packages/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp +++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp @@ -69,55 +69,56 @@ template <class Scalar> struct RunStride<Scalar, STRIDE> { - static void run_1(int N, int K, int R, int F, int T, int S) { - Run<Scalar, 1, STRIDE>::run(N, K, R, F, T, S); + static void run_1(int N, int K, int R, int F, int T, int S, int B, int I) { + Run<Scalar, 1, STRIDE>::run(N, K, R, F, T, S, B, I); } - static void run_2(int N, int K, int R, int F, int T, int S) { - Run<Scalar, 2, STRIDE>::run(N, K, R, F, T, S); + static void run_2(int N, int K, int R, int F, int T, int S, int B, int I) { + Run<Scalar, 2, STRIDE>::run(N, K, R, F, T, S, B, I); } - static void run_3(int N, int K, int R, int F, int T, int S) { - Run<Scalar, 3, STRIDE>::run(N, K, R, F, T, S); + static void run_3(int N, int K, int R, int F, int T, int S, int B, int I) { + Run<Scalar, 3, STRIDE>::run(N, K, R, F, T, S, B, I); } - static void run_4(int N, int K, int R, int F, int T, int S) { - Run<Scalar, 4, STRIDE>::run(N, K, R, F, T, S); + static void run_4(int N, int K, int R, int F, int T, int S, int B, int I) { + Run<Scalar, 4, STRIDE>::run(N, K, R, F, T, S, B, I); } - static void run_5(int N, int K, int R, int F, int T, int S) { - Run<Scalar, 5, STRIDE>::run(N, K, R, F, T, S); + static void run_5(int N, int K, int R, int F, int T, int S, int B, int I) { + Run<Scalar, 5, STRIDE>::run(N, K, R, F, T, S, B, I); } - static void run_6(int N, int K, int R, int F, int T, int S) { - Run<Scalar, 6, STRIDE>::run(N, K, R, F, T, S); + static void run_6(int N, int K, int R, int F, int T, int S, int B, int I) { + Run<Scalar, 6, STRIDE>::run(N, K, R, F, T, S, B, I); } - static void run_7(int N, int K, int R, int F, int T, int S) { - Run<Scalar, 7, STRIDE>::run(N, K, R, F, T, S); + static void run_7(int N, int K, int R, int F, int T, int S, int B, int I) { + Run<Scalar, 7, STRIDE>::run(N, K, R, F, T, S, B, I); } - static void run_8(int N, int K, int R, int F, int T, int S) { - Run<Scalar, 8, STRIDE>::run(N, K, R, F, T, S); + static void run_8(int N, int K, int R, int F, int T, int S, int B, int I) { + Run<Scalar, 8, STRIDE>::run(N, K, R, F, T, S, B, I); } - static void run(int N, int K, int R, int U, int F, int T, int S) { + static void run(int N, int K, int R, int U, int F, int T, int S, int B, + int I) { if (U == 1) { - run_1(N, K, R, F, T, S); + run_1(N, K, R, F, T, S, B, I); } if (U == 2) { - run_2(N, K, R, F, T, S); + run_2(N, K, R, F, T, S, B, I); } if (U == 3) { - run_3(N, K, R, F, T, S); + run_3(N, K, R, F, T, S, B, I); } if (U == 4) { - run_4(N, K, R, F, T, S); + run_4(N, K, R, F, T, S, B, I); } if (U == 5) { - run_5(N, K, R, F, T, S); + run_5(N, K, R, F, T, S, B, I); } if (U == 6) { - run_6(N, K, R, F, T, S); + run_6(N, K, R, F, T, S, B, I); } if (U == 7) { - run_7(N, K, R, F, T, S); + run_7(N, K, R, F, T, S, B, I); } if (U == 8) { - run_8(N, K, R, F, T, S); + run_8(N, K, R, F, T, S, B, I); } } }; diff --git a/packages/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp b/packages/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp index 00ce635a489f677ff43b05c782856dcdfa1cafa9..58bf17b0bb8dc0f5a71b469d5c40aa82f4195c31 100644 --- a/packages/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp +++ b/packages/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp @@ -44,7 +44,7 @@ template <class Scalar> struct Run<Scalar, UNROLL, STRIDE> { - static void run(int N, int K, int R, int F, int T, int S) { + static void run(int N, int K, int R, int F, int T, int S, int Ba, int I) { Kokkos::View<Scalar* * [STRIDE], Kokkos::LayoutRight> A("A", N, K); Kokkos::View<Scalar* * [STRIDE], Kokkos::LayoutRight> B("B", N, K); Kokkos::View<Scalar* * [STRIDE], Kokkos::LayoutRight> C("C", N, K); @@ -54,98 +54,102 @@ struct Run<Scalar, UNROLL, STRIDE> { Kokkos::deep_copy(C, Scalar(3.5)); Kokkos::Timer timer; - Kokkos::parallel_for( - "BenchmarkKernel", - Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)), - KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { - const int n = team.league_rank(); - for (int r = 0; r < R; r++) { - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, 0, K), [&](const int& i) { - Scalar a1 = A(n, i, 0); - const Scalar b = B(n, i, 0); + for (int i = 0; i < I; ++i) { + Kokkos::parallel_for( + "BenchmarkKernel", + Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { + const int n = team.league_rank(); + for (int r = 0; r < R; r++) { + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, 0, K), [&](const int& i) { + Scalar a1 = A(n, i, 0); + const Scalar b = B(n, i, 0); #if (UNROLL > 1) - Scalar a2 = a1 * 1.3; + Scalar a2 = a1 * 1.3; #endif #if (UNROLL > 2) - Scalar a3 = a2 * 1.1; + Scalar a3 = a2 * 1.1; #endif #if (UNROLL > 3) - Scalar a4 = a3 * 1.1; + Scalar a4 = a3 * 1.1; #endif #if (UNROLL > 4) - Scalar a5 = a4 * 1.3; + Scalar a5 = a4 * 1.3; #endif #if (UNROLL > 5) - Scalar a6 = a5 * 1.1; + Scalar a6 = a5 * 1.1; #endif #if (UNROLL > 6) - Scalar a7 = a6 * 1.1; + Scalar a7 = a6 * 1.1; #endif #if (UNROLL > 7) - Scalar a8 = a7 * 1.1; + Scalar a8 = a7 * 1.1; #endif - for (int f = 0; f < F; f++) { - a1 += b * a1; + for (int f = 0; f < F; f++) { + a1 += b * a1; #if (UNROLL > 1) - a2 += b * a2; + a2 += b * a2; #endif #if (UNROLL > 2) - a3 += b * a3; + a3 += b * a3; #endif #if (UNROLL > 3) - a4 += b * a4; + a4 += b * a4; #endif #if (UNROLL > 4) - a5 += b * a5; + a5 += b * a5; #endif #if (UNROLL > 5) - a6 += b * a6; + a6 += b * a6; #endif #if (UNROLL > 6) - a7 += b * a7; + a7 += b * a7; #endif #if (UNROLL > 7) - a8 += b * a8; + a8 += b * a8; #endif - } + } #if (UNROLL == 1) - C(n, i, 0) = a1; + C(n, i, 0) = a1; #endif #if (UNROLL == 2) - C(n, i, 0) = a1 + a2; + C(n, i, 0) = a1 + a2; #endif #if (UNROLL == 3) - C(n, i, 0) = a1 + a2 + a3; + C(n, i, 0) = a1 + a2 + a3; #endif #if (UNROLL == 4) - C(n, i, 0) = a1 + a2 + a3 + a4; + C(n, i, 0) = a1 + a2 + a3 + a4; #endif #if (UNROLL == 5) - C(n, i, 0) = a1 + a2 + a3 + a4 + a5; + C(n, i, 0) = a1 + a2 + a3 + a4 + a5; #endif #if (UNROLL == 6) - C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6; + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6; #endif #if (UNROLL == 7) - C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7; + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7; #endif #if (UNROLL == 8) - C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8; + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8; #endif - }); - } - }); + }); + } + }); + } Kokkos::fence(); - double seconds = timer.seconds(); + double seconds = timer.seconds() / static_cast<double>(I); double bytes = 1.0 * N * K * R * 3 * sizeof(Scalar); + bytes /= ((Ba == 2) ? (1024 * 1024 * 1024) : (1000 * 1000 * 1000)); double flops = 1.0 * N * K * R * (F * 2 * UNROLL + 2 * (UNROLL - 1)); printf( - "NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: " + "NKRUFTSBI: %i %i %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lf%s " + "GFlop/s: " "%lf\n", - N, K, R, UNROLL, F, T, S, seconds, - 1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds); + N, K, R, UNROLL, F, T, S, Ba, I, seconds, 1.0 * bytes / seconds, + Ba == 2 ? "GiB/s" : "GB/s", 1.e-9 * flops / seconds); } }; diff --git a/packages/kokkos/benchmarks/bytes_and_flops/main.cpp b/packages/kokkos/benchmarks/bytes_and_flops/main.cpp index f05c7d9f514dd73d7e0326d1a25dac7fe78aac5a..b26b8ef5ed83fa4a8fae302a7664013e049e69bc 100644 --- a/packages/kokkos/benchmarks/bytes_and_flops/main.cpp +++ b/packages/kokkos/benchmarks/bytes_and_flops/main.cpp @@ -48,19 +48,19 @@ #include <cstdlib> extern template void run_stride_unroll<float>(int, int, int, int, int, int, int, - int); + int, int, int); extern template void run_stride_unroll<double>(int, int, int, int, int, int, - int, int); + int, int, int, int); extern template void run_stride_unroll<int32_t>(int, int, int, int, int, int, - int, int); + int, int, int, int); extern template void run_stride_unroll<int64_t>(int, int, int, int, int, int, - int, int); + int, int, int, int); int main(int argc, char* argv[]) { Kokkos::initialize(); if (argc < 10) { - printf("Arguments: N K R D U F T S\n"); + printf("Arguments: N K R D U F T S B I\n"); printf(" P: Precision (1==float, 2==double, 3==int32_t, 4==int64_t)\n"); printf(" N,K: dimensions of the 2D array to allocate\n"); printf(" R: how often to loop through the K dimension with each team\n"); @@ -72,6 +72,10 @@ int main(int argc, char* argv[]) { printf(" T: team size\n"); printf( " S: shared memory per team (used to control occupancy on GPUs)\n"); + printf( + " B: units for reported memory bandwidths (2=GiB, 10=GB, " + "default=2)\n"); + printf(" I: iterations of the kernel to time over (default=10)\n"); printf("Example Input GPU:\n"); printf(" Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n"); printf(" Cache Bound : 2 100000 1024 64 1 1 1 512 20000\n"); @@ -92,6 +96,16 @@ int main(int argc, char* argv[]) { int T = std::stoi(argv[8]); int S = std::stoi(argv[9]); + int B = 2; + if (argc >= 11) { + B = std::atoi(argv[10]); + } + + int I = 10; + if (argc >= 12) { + I = std::atoi(argv[11]); + } + if (U > 8) { printf("U must be 1-8\n"); return 0; @@ -105,17 +119,27 @@ int main(int argc, char* argv[]) { return 0; } + if ((B != 2) && (B != 10)) { + printf("B must be one of 2,10\n"); + return 0; + } + + if (I < 1) { + printf("I must be >= 1\n"); + return 0; + } + if (P == 1) { - run_stride_unroll<float>(N, K, R, D, U, F, T, S); + run_stride_unroll<float>(N, K, R, D, U, F, T, S, B, I); } if (P == 2) { - run_stride_unroll<double>(N, K, R, D, U, F, T, S); + run_stride_unroll<double>(N, K, R, D, U, F, T, S, B, I); } if (P == 3) { - run_stride_unroll<int32_t>(N, K, R, D, U, F, T, S); + run_stride_unroll<int32_t>(N, K, R, D, U, F, T, S, B, I); } if (P == 4) { - run_stride_unroll<int64_t>(N, K, R, D, U, F, T, S); + run_stride_unroll<int64_t>(N, K, R, D, U, F, T, S, B, I); } Kokkos::finalize(); diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper index 8c168412e792459bed03478e950e88ca1aa10b47..e1a208813863515b88cce5399f53c79239d4c0fe 100755 --- a/packages/kokkos/bin/nvcc_wrapper +++ b/packages/kokkos/bin/nvcc_wrapper @@ -227,7 +227,7 @@ do fi ;; #Handle known nvcc args - --dryrun|--verbose|--keep|--keep-dir*|-G|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) + --dryrun|--verbose|--keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-G|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) cuda_args="$cuda_args $1" ;; #Handle more known nvcc args @@ -242,6 +242,77 @@ do cuda_args="$cuda_args $1 $2" shift ;; + # Handle Werror. Note, we must differentiate between the ones going to nvcc and the host compiler + # --Werror kind,... OR --Werror=kind,... <- always to nvcc + --Werror) + cuda_args="$cuda_args $1 $2" + shift + ;; + --Werror=*) + cuda_args="$cuda_args $1" + ;; + # -Werror kind,... where kind is one of {all-warnings, cross-execution-space-call, reorder, default-stream-launch, missing-launch-bounds, ext-lambda-captures-this, deprecated-declarations} <- goes to nvcc + # -Werror not followed by any kind as mentioned above goes to host compiler without any arguments + -Werror) + if [ $# -gt 1 ]; then + IFS="," read -r -a kinds <<< "$2" + first_kind=${kinds[0]} + # check if the first kind is one of the allowed ones, then this must be an nvcc list so put all of them to the cuda compiler + case $first_kind in + all-warnings|cross-execution-space-call|reorder|default-stream-launch|missing-launch-bounds|ext-lambda-captures-this|deprecated-declarations) + cuda_args="$cuda_args $1 $2" + shift + ;; + *) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$1" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$1" + fi + ;; + esac + fi + ;; + # -Werror=kind,... will be split into two parts, those kinds that belong to nvcc (see above) go there, while all others go towards the host compiler + -Werror=*) + kinds_str="${1:8}" # strip -Werror= + IFS="," read -r -a kinds <<< ${kinds_str} + first_werror_cuda=1 + first_werror_host=1 + xcompiler_args_werror= + # loop over all kinds that are sparated via ',' + for kind in "${kinds[@]}" + do + case ${kind} in + all-warnings|cross-execution-space-call|reorder|default-stream-launch|missing-launch-bounds|ext-lambda-captures-this|deprecated-declarations) + if [ $first_werror_cuda -ne 0 ]; then + cuda_args="$cuda_args -Werror=" + first_werror_cuda=0 + else + cuda_args="$cuda_args," + fi + cuda_args="$cuda_args$kind" + ;; + *) + if [ $first_werror_host -eq 0 ]; then + xcompiler_args_werror="${xcompiler_args_werror}," + fi + first_werror_host=0 + xcompiler_args_werror="$xcompiler_args_werror-Werror=$kind" + ;; + esac + done + if [ $first_werror_host -eq 0 ]; then + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$xcompiler_args_werror" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$xcompiler_args_werror" + fi + fi + ;; + # End of Werror handling #Handle unsupported standard flags --std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a) fallback_std_flag="-std=c++14" @@ -310,7 +381,7 @@ do -std=c++98|--std=c++98) ;; #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor - -pedantic|-Wpedantic|-ansi) + -pedantic|-pedantic-errors|-Wpedantic|-ansi) ;; #strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C" -Woverloaded-virtual) diff --git a/packages/kokkos/cmake/Dependencies.cmake b/packages/kokkos/cmake/Dependencies.cmake index c0be9f56411311a38a0f43a9b07b1109a0135bd8..10df9fe45694f7ed74e75fac1127af4b72bc3c02 100644 --- a/packages/kokkos/cmake/Dependencies.cmake +++ b/packages/kokkos/cmake/Dependencies.cmake @@ -1,3 +1,14 @@ +IF (CMAKE_CXX_STANDARD GREATER_EQUAL 17) + SET(KOKKOS_SIMD_TEST_CLASS PT) +ELSE() + SET(KOKKOS_SIMD_TEST_CLASS EX) + IF (${PROJECT_NAME}_ENABLE_KokkosSimd) + MESSAGE(WARNING "KokkosSimd is explicitly enabled but C++17 is not available") + ELSE() + MESSAGE(STATUS "Disabling KokkosSimd by default because C++17 is not available") + ENDIF() +ENDIF() + TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS #SubPackageName Directory Class Req/Opt @@ -6,4 +17,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( Core core PS REQUIRED Containers containers PS OPTIONAL Algorithms algorithms PS OPTIONAL + Simd simd ${KOKKOS_SIMD_TEST_CLASS} OPTIONAL ) diff --git a/packages/kokkos/cmake/KokkosConfigCommon.cmake.in b/packages/kokkos/cmake/KokkosConfigCommon.cmake.in index 6788e77ade83368cfbb55a831420ea714361b5b2..23bc86cc8417080d718f01753ffa5f896d6ab8d5 100644 --- a/packages/kokkos/cmake/KokkosConfigCommon.cmake.in +++ b/packages/kokkos/cmake/KokkosConfigCommon.cmake.in @@ -4,6 +4,7 @@ SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@) SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@) SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") SET(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") +SET(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@) # These are needed by KokkosKernels FOREACH(DEV ${Kokkos_DEVICES}) diff --git a/packages/kokkos/cmake/KokkosCore_config.h.in b/packages/kokkos/cmake/KokkosCore_config.h.in index f3fd9f6d7aa127693a934e5230d5a346b7b8ef1a..34807ac2b26228a4f0c10aa3ee5c4f7951ac235f 100644 --- a/packages/kokkos/cmake/KokkosCore_config.h.in +++ b/packages/kokkos/cmake/KokkosCore_config.h.in @@ -14,6 +14,7 @@ /* Execution Spaces */ #cmakedefine KOKKOS_ENABLE_SERIAL #cmakedefine KOKKOS_ENABLE_OPENMP +#cmakedefine KOKKOS_ENABLE_OPENACC #cmakedefine KOKKOS_ENABLE_OPENMPTARGET #cmakedefine KOKKOS_ENABLE_THREADS #cmakedefine KOKKOS_ENABLE_CUDA @@ -23,14 +24,6 @@ #cmakedefine KOKKOS_ENABLE_LIBRT #cmakedefine KOKKOS_ENABLE_SYCL -#ifndef __CUDA_ARCH__ -#cmakedefine KOKKOS_ENABLE_TM -#cmakedefine KOKKOS_USE_ISA_X86_64 -#cmakedefine KOKKOS_USE_ISA_KNC -#cmakedefine KOKKOS_USE_ISA_POWERPCLE -#cmakedefine KOKKOS_USE_ISA_POWERPCBE -#endif - /* General Settings */ #cmakedefine KOKKOS_ENABLE_CXX14 #cmakedefine KOKKOS_ENABLE_CXX17 @@ -48,13 +41,11 @@ #cmakedefine KOKKOS_ENABLE_DEBUG #cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK #cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK -#cmakedefine KOKKOS_ENABLE_COMPILER_WARNINGS #cmakedefine KOKKOS_ENABLE_PROFILING_LOAD_PRINT #cmakedefine KOKKOS_ENABLE_TUNING #cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE_3 #cmakedefine KOKKOS_ENABLE_DEPRECATION_WARNINGS #cmakedefine KOKKOS_ENABLE_LARGE_MEM_TESTS -#cmakedefine KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK #cmakedefine KOKKOS_ENABLE_COMPLEX_ALIGN #cmakedefine KOKKOS_ENABLE_IMPL_DESUL_ATOMICS #cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated diff --git a/packages/kokkos/cmake/Modules/FindTPLLIBDL.cmake b/packages/kokkos/cmake/Modules/FindTPLLIBDL.cmake index 5fc6a693035cea5e05b379f5fac9d50bcaeb3f7a..8adcdcdbb8e3f02d3959c8605f1470f57425152c 100644 --- a/packages/kokkos/cmake/Modules/FindTPLLIBDL.cmake +++ b/packages/kokkos/cmake/Modules/FindTPLLIBDL.cmake @@ -1 +1 @@ -KOKKOS_FIND_IMPORTED(LIBDL HEADER dlfcn.h LIBRARY dl) +KOKKOS_FIND_IMPORTED(LIBDL HEADER dlfcn.h INTERFACE LIBRARIES ${CMAKE_DL_LIBS}) diff --git a/packages/kokkos/cmake/Modules/FindTPLTHREADS.cmake b/packages/kokkos/cmake/Modules/FindTPLTHREADS.cmake index 2d140c85c43caed2103e6da64769d282e33f20a7..0e3c9f8dd6f978d9a43a8b88f7b0fd4aa3f9a352 100644 --- a/packages/kokkos/cmake/Modules/FindTPLTHREADS.cmake +++ b/packages/kokkos/cmake/Modules/FindTPLTHREADS.cmake @@ -1,5 +1,5 @@ INCLUDE(FindPackageHandleStandardArgs) -INCLUDE("${CMAKE_SOURCE_DIR}/cmake/tpls/FindTPLPthread.cmake") +INCLUDE("${CMAKE_CURRENT_SOURCE_DIR}/cmake/tpls/FindTPLPthread.cmake") IF (TARGET Threads::Threads) SET(FOUND_THREADS TRUE) diff --git a/packages/kokkos/cmake/fake_tribits.cmake b/packages/kokkos/cmake/fake_tribits.cmake index fbd6745a602caa8976958d10cf7d9b4c1fa3c471..f39457205735d1b94fd04382e98289d1c6ec54dd 100644 --- a/packages/kokkos/cmake/fake_tribits.cmake +++ b/packages/kokkos/cmake/fake_tribits.cmake @@ -3,8 +3,6 @@ INCLUDE(CMakeParseArguments) INCLUDE(CTest) -cmake_policy(SET CMP0054 NEW) - FUNCTION(ASSERT_DEFINED VARS) FOREACH(VAR ${VARS}) IF(NOT DEFINED ${VAR}) diff --git a/packages/kokkos/cmake/kokkos_arch.cmake b/packages/kokkos/cmake/kokkos_arch.cmake index a8b5b68478ecc1e8c32410340b809bd55e0a06f7..d4c2cda651f3510bd66e9b8faff344ebf0cf666a 100644 --- a/packages/kokkos/cmake/kokkos_arch.cmake +++ b/packages/kokkos/cmake/kokkos_arch.cmake @@ -1,7 +1,7 @@ -FUNCTION(KOKKOS_ARCH_OPTION SUFFIX DEV_TYPE DESCRIPTION) +FUNCTION(KOKKOS_ARCH_OPTION SUFFIX DEV_TYPE DESCRIPTION DEPENDENCY) #all optimizations off by default - KOKKOS_OPTION(ARCH_${SUFFIX} OFF BOOL "Optimize for ${DESCRIPTION} (${DEV_TYPE})") + KOKKOS_DEPENDENT_OPTION(ARCH_${SUFFIX} "Optimize for ${DESCRIPTION} (${DEV_TYPE})" OFF "${DEPENDENCY}" OFF) SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE) SET(KOKKOS_OPTION_KEYS ${KOKKOS_OPTION_KEYS} PARENT_SCOPE) SET(KOKKOS_OPTION_VALUES ${KOKKOS_OPTION_VALUES} PARENT_SCOPE) @@ -30,51 +30,83 @@ SET(KOKKOS_ARCH_LIST) KOKKOS_DEPRECATED_LIST(ARCH ARCH) -KOKKOS_ARCH_OPTION(AMDAVX HOST "AMD chip") -KOKKOS_ARCH_OPTION(ARMV80 HOST "ARMv8.0 Compatible CPU") -KOKKOS_ARCH_OPTION(ARMV81 HOST "ARMv8.1 Compatible CPU") -KOKKOS_ARCH_OPTION(ARMV8_THUNDERX HOST "ARMv8 Cavium ThunderX CPU") -KOKKOS_ARCH_OPTION(ARMV8_THUNDERX2 HOST "ARMv8 Cavium ThunderX2 CPU") -KOKKOS_ARCH_OPTION(A64FX HOST "ARMv8.2 with SVE Support") -KOKKOS_ARCH_OPTION(WSM HOST "Intel Westmere CPU") -KOKKOS_ARCH_OPTION(SNB HOST "Intel Sandy/Ivy Bridge CPUs") -KOKKOS_ARCH_OPTION(HSW HOST "Intel Haswell CPUs") -KOKKOS_ARCH_OPTION(BDW HOST "Intel Broadwell Xeon E-class CPUs") -KOKKOS_ARCH_OPTION(SKX HOST "Intel Sky Lake Xeon E-class HPC CPUs (AVX512)") -KOKKOS_ARCH_OPTION(KNC HOST "Intel Knights Corner Xeon Phi") -KOKKOS_ARCH_OPTION(KNL HOST "Intel Knights Landing Xeon Phi") -KOKKOS_ARCH_OPTION(BGQ HOST "IBM Blue Gene Q") -KOKKOS_ARCH_OPTION(POWER7 HOST "IBM POWER7 CPUs") -KOKKOS_ARCH_OPTION(POWER8 HOST "IBM POWER8 CPUs") -KOKKOS_ARCH_OPTION(POWER9 HOST "IBM POWER9 CPUs") -KOKKOS_ARCH_OPTION(KEPLER30 GPU "NVIDIA Kepler generation CC 3.0") -KOKKOS_ARCH_OPTION(KEPLER32 GPU "NVIDIA Kepler generation CC 3.2") -KOKKOS_ARCH_OPTION(KEPLER35 GPU "NVIDIA Kepler generation CC 3.5") -KOKKOS_ARCH_OPTION(KEPLER37 GPU "NVIDIA Kepler generation CC 3.7") -KOKKOS_ARCH_OPTION(MAXWELL50 GPU "NVIDIA Maxwell generation CC 5.0") -KOKKOS_ARCH_OPTION(MAXWELL52 GPU "NVIDIA Maxwell generation CC 5.2") -KOKKOS_ARCH_OPTION(MAXWELL53 GPU "NVIDIA Maxwell generation CC 5.3") -KOKKOS_ARCH_OPTION(PASCAL60 GPU "NVIDIA Pascal generation CC 6.0") -KOKKOS_ARCH_OPTION(PASCAL61 GPU "NVIDIA Pascal generation CC 6.1") -KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0") -KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2") -KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5") -KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0") -KOKKOS_ARCH_OPTION(AMPERE86 GPU "NVIDIA Ampere generation CC 8.6") -KOKKOS_ARCH_OPTION(ZEN HOST "AMD Zen architecture") -KOKKOS_ARCH_OPTION(ZEN2 HOST "AMD Zen2 architecture") -KOKKOS_ARCH_OPTION(ZEN3 HOST "AMD Zen3 architecture") -KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900") -KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906") -KOKKOS_ARCH_OPTION(VEGA908 GPU "AMD GPU MI100 GFX908") -KOKKOS_ARCH_OPTION(VEGA90A GPU "AMD GPU MI200 GFX90A") -KOKKOS_ARCH_OPTION(INTEL_GEN GPU "Intel GPUs Gen9+") -KOKKOS_ARCH_OPTION(INTEL_DG1 GPU "Intel Iris XeMAX GPU") -KOKKOS_ARCH_OPTION(INTEL_GEN9 GPU "Intel GPU Gen9") -KOKKOS_ARCH_OPTION(INTEL_GEN11 GPU "Intel GPU Gen11") -KOKKOS_ARCH_OPTION(INTEL_GEN12LP GPU "Intel GPU Gen12LP") -KOKKOS_ARCH_OPTION(INTEL_XEHP GPU "Intel GPU Xe-HP") +SET(HOST_ARCH_ALREADY_SPECIFIED "") +MACRO(DECLARE_AND_CHECK_HOST_ARCH ARCH LABEL) + KOKKOS_ARCH_OPTION(${ARCH} HOST "${LABEL}" TRUE) + IF(KOKKOS_ARCH_${ARCH}) + IF(HOST_ARCH_ALREADY_SPECIFIED) + MESSAGE(FATAL_ERROR "Multiple host architectures given! Already have ${HOST_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") + ENDIF() + SET(HOST_ARCH_ALREADY_SPECIFIED ${ARCH}) + ENDIF() +ENDMACRO() + +DECLARE_AND_CHECK_HOST_ARCH(NATIVE "local machine") +DECLARE_AND_CHECK_HOST_ARCH(AMDAVX "AMD chip") +DECLARE_AND_CHECK_HOST_ARCH(ARMV80 "ARMv8.0 Compatible CPU") +DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU") +DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU") +DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU") +DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support") +DECLARE_AND_CHECK_HOST_ARCH(WSM "Intel Westmere CPU") +DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs") +DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs") +DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs") +DECLARE_AND_CHECK_HOST_ARCH(ICL "Intel Ice Lake Client CPUs (AVX512)") +DECLARE_AND_CHECK_HOST_ARCH(ICX "Intel Ice Lake Xeon Server CPUs (AVX512)") +DECLARE_AND_CHECK_HOST_ARCH(SKL "Intel Skylake Client CPUs") +DECLARE_AND_CHECK_HOST_ARCH(SKX "Intel Skylake Xeon Server CPUs (AVX512)") +DECLARE_AND_CHECK_HOST_ARCH(KNC "Intel Knights Corner Xeon Phi") +DECLARE_AND_CHECK_HOST_ARCH(KNL "Intel Knights Landing Xeon Phi") +DECLARE_AND_CHECK_HOST_ARCH(SPR "Intel Sapphire Rapids Xeon Server CPUs (AVX512)") +DECLARE_AND_CHECK_HOST_ARCH(BGQ "IBM Blue Gene Q") +DECLARE_AND_CHECK_HOST_ARCH(POWER7 "IBM POWER7 CPUs") +DECLARE_AND_CHECK_HOST_ARCH(POWER8 "IBM POWER8 CPUs") +DECLARE_AND_CHECK_HOST_ARCH(POWER9 "IBM POWER9 CPUs") +DECLARE_AND_CHECK_HOST_ARCH(ZEN "AMD Zen architecture") +DECLARE_AND_CHECK_HOST_ARCH(ZEN2 "AMD Zen2 architecture") +DECLARE_AND_CHECK_HOST_ARCH(ZEN3 "AMD Zen3 architecture") + +IF(Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_OPENACC OR Kokkos_ENABLE_UNSUPPORTED_ARCHS) + SET(KOKKOS_SHOW_CUDA_ARCHS ON) +ENDIF() + +KOKKOS_ARCH_OPTION(KEPLER30 GPU "NVIDIA Kepler generation CC 3.0" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(KEPLER32 GPU "NVIDIA Kepler generation CC 3.2" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(KEPLER35 GPU "NVIDIA Kepler generation CC 3.5" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(KEPLER37 GPU "NVIDIA Kepler generation CC 3.7" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(MAXWELL50 GPU "NVIDIA Maxwell generation CC 5.0" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(MAXWELL52 GPU "NVIDIA Maxwell generation CC 5.2" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(MAXWELL53 GPU "NVIDIA Maxwell generation CC 5.3" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(PASCAL60 GPU "NVIDIA Pascal generation CC 6.0" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(PASCAL61 GPU "NVIDIA Pascal generation CC 6.1" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(AMPERE86 GPU "NVIDIA Ampere generation CC 8.6" "KOKKOS_SHOW_CUDA_ARCHS") + +IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_UNSUPPORTED_ARCHS) + SET(KOKKOS_SHOW_HIP_ARCHS ON) +ENDIF() + +KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900" "KOKKOS_SHOW_HIP_ARCHS") +KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906" "KOKKOS_SHOW_HIP_ARCHS") +KOKKOS_ARCH_OPTION(VEGA908 GPU "AMD GPU MI100 GFX908" "KOKKOS_SHOW_HIP_ARCHS") +KOKKOS_ARCH_OPTION(VEGA90A GPU "AMD GPU MI200 GFX90A" "KOKKOS_SHOW_HIP_ARCHS") + +IF(Kokkos_ENABLE_SYCL OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_UNSUPPORTED_ARCHS) + SET(KOKKOS_SHOW_SYCL_ARCHS ON) +ENDIF() + +KOKKOS_ARCH_OPTION(INTEL_GEN GPU "SPIR64-based devices, e.g. Intel GPUs, using JIT" "KOKKOS_SHOW_SYCL_ARCHS") +KOKKOS_ARCH_OPTION(INTEL_DG1 GPU "Intel Iris XeMAX GPU" "KOKKOS_SHOW_SYCL_ARCHS") +KOKKOS_ARCH_OPTION(INTEL_GEN9 GPU "Intel GPU Gen9" "KOKKOS_SHOW_SYCL_ARCHS") +KOKKOS_ARCH_OPTION(INTEL_GEN11 GPU "Intel GPU Gen11" "KOKKOS_SHOW_SYCL_ARCHS") +KOKKOS_ARCH_OPTION(INTEL_GEN12LP GPU "Intel GPU Gen12LP" "KOKKOS_SHOW_SYCL_ARCHS") +KOKKOS_ARCH_OPTION(INTEL_XEHP GPU "Intel GPU Xe-HP" "KOKKOS_SHOW_SYCL_ARCHS") +KOKKOS_ARCH_OPTION(INTEL_PVC GPU "Intel GPU Ponte Vecchio" "KOKKOS_SHOW_SYCL_ARCHS") IF(KOKKOS_ENABLE_COMPILER_WARNINGS) SET(COMMON_WARNINGS @@ -92,6 +124,13 @@ IF(KOKKOS_ENABLE_COMPILER_WARNINGS) LIST(REMOVE_ITEM COMMON_WARNINGS "-Wsign-compare") ENDIF() + # NVHPC compiler does not support -Wtype-limits. + IF(KOKKOS_ENABLE_OPENACC) + IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) + LIST(REMOVE_ITEM COMMON_WARNINGS "-Wtype-limits") + ENDIF() + ENDIF() + IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) LIST(APPEND COMMON_WARNINGS "-Wimplicit-fallthrough") ENDIF() @@ -102,12 +141,15 @@ IF(KOKKOS_ENABLE_COMPILER_WARNINGS) LIST(APPEND GNU_WARNINGS "-Wimplicit-fallthrough") ENDIF() - COMPILER_SPECIFIC_FLAGS( - COMPILER_ID CMAKE_CXX_COMPILER_ID - NVHPC NO-VALUE-SPECIFIED - GNU ${GNU_WARNINGS} - DEFAULT ${COMMON_WARNINGS} - ) + # Not using COMPILER_SPECIFIC_FLAGS function so the warning flags are not passed downstream + IF(CMAKE_CXX_COMPILER_ID STREQUAL GNU) + STRING(REPLACE ";" " " WARNING_FLAGS "${GNU_WARNINGS}") + ELSEIF(CMAKE_CXX_COMPILER_ID STREQUAL NVHPC) + # FIXME_NVHPC + ELSE() + STRING(REPLACE ";" " " WARNING_FLAGS "${COMMON_WARNINGS}") + ENDIF() + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WARNING_FLAGS}") ENDIF() @@ -142,6 +184,10 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) IF (KOKKOS_ENABLE_CUDA) SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE) ENDIF() +ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) + SET(CUDA_ARCH_FLAG "-gpu") + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -cuda) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) SET(CUDA_ARCH_FLAG "-arch") ENDIF() @@ -162,10 +208,8 @@ ENDIF() #clear anything that might be in the cache GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS) IF(KOKKOS_ENABLE_HIP) - IF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) - SET(AMDGPU_ARCH_FLAG "--amdgpu-target") - ELSE() - SET(AMDGPU_ARCH_FLAG "--offload-arch") + SET(AMDGPU_ARCH_FLAG "--offload-arch") + IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS -x hip) IF(DEFINED ENV{ROCM_PATH}) GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS --rocm-path=$ENV{ROCM_PATH}) @@ -174,6 +218,13 @@ IF(KOKKOS_ENABLE_HIP) ENDIF() +IF(KOKKOS_ARCH_NATIVE) + COMPILER_SPECIFIC_FLAGS( + COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID + DEFAULT -march=native -mtune=native + ) +ENDIF() + IF (KOKKOS_ARCH_ARMV80) COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID @@ -312,13 +363,22 @@ IF (KOKKOS_ARCH_KNL) ENDIF() IF (KOKKOS_ARCH_KNC) - SET(KOKKOS_USE_ISA_KNC ON) COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID DEFAULT -mmic ) ENDIF() +IF (KOKKOS_ARCH_SKL) + COMPILER_SPECIFIC_FLAGS( + COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID + Intel -xSKYLAKE + NVHPC -tp=skylake + Cray NO-VALUE-SPECIFIED + DEFAULT -march=skylake -mtune=skylake + ) +ENDIF() + IF (KOKKOS_ARCH_SKX) #avx512-xeon SET(KOKKOS_ARCH_AVX512XEON ON) @@ -327,16 +387,32 @@ IF (KOKKOS_ARCH_SKX) Intel -xCORE-AVX512 NVHPC -tp=skylake Cray NO-VALUE-SPECIFIED - DEFAULT -march=skylake-avx512 -mtune=skylake-avx512 -mrtm + DEFAULT -march=skylake-avx512 -mtune=skylake-avx512 ) ENDIF() -IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2 OR KOKKOS_ARCH_ZEN3) - SET(KOKKOS_USE_ISA_X86_64 ON) +IF (KOKKOS_ARCH_ICL) + SET(KOKKOS_ARCH_AVX512XEON ON) + COMPILER_SPECIFIC_FLAGS( + COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID + DEFAULT -march=icelake-client -mtune=icelake-client + ) ENDIF() -IF (KOKKOS_ARCH_BDW OR KOKKOS_ARCH_SKX) - SET(KOKKOS_ENABLE_TM ON) #not a cache variable +IF (KOKKOS_ARCH_ICX) + SET(KOKKOS_ARCH_AVX512XEON ON) + COMPILER_SPECIFIC_FLAGS( + COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID + DEFAULT -march=icelake-server -mtune=icelake-server + ) +ENDIF() + +IF (KOKKOS_ARCH_SPR) + SET(KOKKOS_ARCH_AVX512XEON ON) + COMPILER_SPECIFIC_FLAGS( + COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID + DEFAULT -march=sapphirerapids -mtune=sapphirerapids + ) ENDIF() IF (KOKKOS_ARCH_POWER7) @@ -345,7 +421,6 @@ IF (KOKKOS_ARCH_POWER7) NVHPC NO-VALUE-SPECIFIED DEFAULT -mcpu=power7 -mtune=power7 ) - SET(KOKKOS_USE_ISA_POWERPCBE ON) ENDIF() IF (KOKKOS_ARCH_POWER8) @@ -364,14 +439,15 @@ IF (KOKKOS_ARCH_POWER9) ) ENDIF() -IF (KOKKOS_ARCH_POWER8 OR KOKKOS_ARCH_POWER9) - SET(KOKKOS_USE_ISA_POWERPCLE ON) -ENDIF() - IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) COMPILER_SPECIFIC_FLAGS( Clang -fcuda-rdc NVIDIA --relocatable-device-code=true + NVHPC -gpu=rdc + ) +ELSEIF(KOKKOS_ENABLE_CUDA) + COMPILER_SPECIFIC_FLAGS( + NVHPC -gpu=nordc ) ENDIF() @@ -421,8 +497,8 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") ENDIF() SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) - IF (NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMPTARGET AND NOT KOKKOS_ENABLE_SYCL) - MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") + IF (NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMPTARGET AND NOT KOKKOS_ENABLE_SYCL AND NOT KOKKOS_ENABLE_OPENACC) + MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA, Kokkos_ENABLE_OPENACC, and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) ELSE() SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE) @@ -430,9 +506,15 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) string(REPLACE "sm_" "" CMAKE_ARCH ${FLAG}) SET(CMAKE_CUDA_ARCHITECTURES ${CMAKE_ARCH} PARENT_SCOPE) ELSE() - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") - IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) + STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${FLAG}) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}") + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}") + ELSE() + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + ENDIF() ENDIF() ENDIF() ENDIF() @@ -529,13 +611,16 @@ ENDIF() IF(KOKKOS_ARCH_INTEL_XEHP) CHECK_MULTIPLE_INTEL_ARCH() ENDIF() +IF(KOKKOS_ARCH_INTEL_PVC) + CHECK_MULTIPLE_INTEL_ARCH() +ENDIF() IF (KOKKOS_ENABLE_OPENMPTARGET) SET(CLANG_CUDA_ARCH ${KOKKOS_CUDA_ARCH_FLAG}) IF (CLANG_CUDA_ARCH) STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${CLANG_CUDA_ARCH}) COMPILER_SPECIFIC_FLAGS( - Clang -Xopenmp-target -march=${CLANG_CUDA_ARCH} -fopenmp-targets=nvptx64-nvidia-cuda + Clang -Xopenmp-target -march=${CLANG_CUDA_ARCH} -fopenmp-targets=nvptx64 XL -qtgtarch=${KOKKOS_CUDA_ARCH_FLAG} NVHPC -gpu=${NVHPC_CUDA_ARCH} ) @@ -546,10 +631,47 @@ IF (KOKKOS_ENABLE_OPENMPTARGET) Clang -Xopenmp-target=amdgcn-amd-amdhsa -march=${CLANG_AMDGPU_ARCH} -fopenmp-targets=amdgcn-amd-amdhsa ) ENDIF() - IF (KOKKOS_ARCH_INTEL_GPU) + IF (KOKKOS_ARCH_INTEL_GEN) COMPILER_SPECIFIC_FLAGS( IntelLLVM -fopenmp-targets=spir64 -D__STRICT_ANSI__ ) + ELSEIF(KOKKOS_ARCH_INTEL_GEN9) + COMPILER_SPECIFIC_FLAGS( + IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen9" -D__STRICT_ANSI__ + ) + ELSEIF(KOKKOS_ARCH_INTEL_GEN11) + COMPILER_SPECIFIC_FLAGS( + IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen11" -D__STRICT_ANSI__ + ) + ELSEIF(KOKKOS_ARCH_INTEL_GEN12LP) + COMPILER_SPECIFIC_FLAGS( + IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen12lp" -D__STRICT_ANSI__ + ) + ELSEIF(KOKKOS_ARCH_INTEL_DG1) + COMPILER_SPECIFIC_FLAGS( + IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device dg1" -D__STRICT_ANSI__ + ) + ELSEIF(KOKKOS_ARCH_INTEL_XEHP) + COMPILER_SPECIFIC_FLAGS( + IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device xehp" -D__STRICT_ANSI__ + ) + ELSEIF(KOKKOS_ARCH_INTEL_PVC) + COMPILER_SPECIFIC_FLAGS( + IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.4.0" -D__STRICT_ANSI__ + ) + ENDIF() +ENDIF() + +IF (KOKKOS_ENABLE_OPENACC) + IF(KOKKOS_CUDA_ARCH_FLAG) + STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${KOKKOS_CUDA_ARCH_FLAG}) + COMPILER_SPECIFIC_FLAGS( + NVHPC -acc -gpu=${NVHPC_CUDA_ARCH} + ) + ELSE() + COMPILER_SPECIFIC_FLAGS( + NVHPC -acc + ) ENDIF() ENDIF() @@ -564,7 +686,7 @@ IF (KOKKOS_ENABLE_SYCL) ENDIF() ELSEIF(KOKKOS_ARCH_INTEL_GEN) COMPILER_SPECIFIC_FLAGS( - DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9-" + DEFAULT -fsycl-targets=spir64 ) ELSEIF(KOKKOS_ARCH_INTEL_GEN9) COMPILER_SPECIFIC_FLAGS( @@ -586,6 +708,10 @@ IF (KOKKOS_ENABLE_SYCL) COMPILER_SPECIFIC_FLAGS( DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device xehp" ) + ELSEIF(KOKKOS_ARCH_INTEL_PVC) + COMPILER_SPECIFIC_FLAGS( + DEFAULT -fsycl-targets=spir64_gen -Xsycl-target-backend "-device 12.4.0" + ) ENDIF() ENDIF() @@ -687,7 +813,7 @@ ENDIF() #Let's just always print things MESSAGE(STATUS "Built-in Execution Spaces:") -FOREACH (_BACKEND Cuda OpenMPTarget HIP SYCL) +FOREACH (_BACKEND Cuda OpenMPTarget HIP SYCL OpenACC) STRING(TOUPPER ${_BACKEND} UC_BACKEND) IF(KOKKOS_ENABLE_${UC_BACKEND}) IF(_DEVICE_PARALLEL) diff --git a/packages/kokkos/cmake/kokkos_compiler_id.cmake b/packages/kokkos/cmake/kokkos_compiler_id.cmake index f0c906e6566a0c947a9fc3c933fccafc5392e787..b9fe2ffab0bd200a497f4a504ee24ed7f6990a67 100644 --- a/packages/kokkos/cmake/kokkos_compiler_id.cmake +++ b/packages/kokkos/cmake/kokkos_compiler_id.cmake @@ -37,12 +37,16 @@ IF(Kokkos_ENABLE_CUDA) PATHS ${PROJECT_SOURCE_DIR} PATH_SUFFIXES bin) - # check if compiler was set to nvcc_wrapper + # Check if compiler was set to nvcc_wrapper kokkos_internal_have_compiler_nvcc(${CMAKE_CXX_COMPILER}) - # if launcher was found and nvcc_wrapper was not specified as - # compiler, set to use launcher. Will ensure CMAKE_CXX_COMPILER - # is replaced by nvcc_wrapper - IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + # If launcher was found and nvcc_wrapper was not specified as + # compiler and `CMAKE_CXX_COMPILIER_LAUNCHER` is not set, set to use launcher. + # Will ensure CMAKE_CXX_COMPILER is replaced by nvcc_wrapper + IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) + IF(CMAKE_CXX_COMPILER_LAUNCHER) + MESSAGE(FATAL_ERROR "Cannot use CMAKE_CXX_COMPILER_LAUNCHER if the CMAKE_CXX_COMPILER is not able to compile CUDA code, i.e. nvcc_wrapper or +clang++!") + ENDIF() # the first argument to launcher is always the C++ compiler defined by cmake # if the second argument matches the C++ compiler, it forwards the rest of the # args to nvcc_wrapper diff --git a/packages/kokkos/cmake/kokkos_enable_devices.cmake b/packages/kokkos/cmake/kokkos_enable_devices.cmake index dc3ee8c84255613c1ee6c5e6f847883bb7a15cf0..c7a454c214b0f974d357cb16324e9f4659e11d5f 100644 --- a/packages/kokkos/cmake/kokkos_enable_devices.cmake +++ b/packages/kokkos/cmake/kokkos_enable_devices.cmake @@ -86,6 +86,8 @@ IF(KOKKOS_ENABLE_OPENMP) ENDIF() ENDIF() +KOKKOS_DEVICE_OPTION(OPENACC OFF DEVICE "Whether to build the OpenACC backend") + KOKKOS_DEVICE_OPTION(OPENMPTARGET OFF DEVICE "Whether to build the OpenMP target backend") IF (KOKKOS_ENABLE_OPENMPTARGET) SET(ClangOpenMPFlag -fopenmp=libomp) diff --git a/packages/kokkos/cmake/kokkos_enable_options.cmake b/packages/kokkos/cmake/kokkos_enable_options.cmake index a581003b9e9e251ce04a5d541e3ca3382c0cf2e3..ea606bb0c96c0051ef08c4d49de578a1ddca72c1 100644 --- a/packages/kokkos/cmake/kokkos_enable_options.cmake +++ b/packages/kokkos/cmake/kokkos_enable_options.cmake @@ -61,6 +61,7 @@ KOKKOS_ENABLE_OPTION(HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF "Whether multiple ke # This option will go away eventually, but allows fallback to old implementation when needed. KOKKOS_ENABLE_OPTION(IMPL_DESUL_ATOMICS ON "Whether to use desul based atomics - option only during beta") +KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation") IF (KOKKOS_ENABLE_CUDA) SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}") diff --git a/packages/kokkos/cmake/kokkos_functions.cmake b/packages/kokkos/cmake/kokkos_functions.cmake index 02c9a911b1b827994b7d4a1e0c004cfb55afd749..11fa9b302cc239939b792610543ad72cfd5befbd 100644 --- a/packages/kokkos/cmake/kokkos_functions.cmake +++ b/packages/kokkos/cmake/kokkos_functions.cmake @@ -57,7 +57,46 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING) # Make sure this appears in the cache with the appropriate DOCSTRING SET(${CAMEL_NAME} ${DEFAULT} CACHE ${TYPE} ${DOCSTRING}) - #I don't love doing it this way because it's N^2 in number options, but cest la vie + #I don't love doing it this way because it's N^2 in number options, but c'est la vie + FOREACH(opt ${KOKKOS_GIVEN_VARIABLES}) + STRING(TOUPPER ${opt} OPT_UC) + IF ("${OPT_UC}" STREQUAL "${UC_NAME}") + IF (NOT "${opt}" STREQUAL "${CAMEL_NAME}") + IF (KOKKOS_HAS_TRILINOS) + #Allow this for now if Trilinos... we need to bootstrap our way to integration + MESSAGE(WARNING "Deprecated option ${opt} found - please change spelling to ${CAMEL_NAME}") + SET(${CAMEL_NAME} "${${opt}}" CACHE ${TYPE} ${DOCSTRING} FORCE) + UNSET(${opt} CACHE) + ELSE() + MESSAGE(FATAL_ERROR "Matching option found for ${CAMEL_NAME} with the wrong case ${opt}. Please delete your CMakeCache.txt and change option to -D${CAMEL_NAME}=${${opt}}. This is now enforced to avoid hard-to-debug CMake cache inconsistencies.") + ENDIF() + ENDIF() + ENDIF() + ENDFOREACH() + + #okay, great, we passed the validation test - use the default + IF (DEFINED ${CAMEL_NAME}) + SET(${UC_NAME} ${${CAMEL_NAME}} PARENT_SCOPE) + ELSE() + SET(${UC_NAME} ${DEFAULT} PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + +INCLUDE (CMakeDependentOption) +FUNCTION(kokkos_dependent_option CAMEL_SUFFIX DOCSTRING DEFAULT DEPENDENCY FORCE) + SET(CAMEL_NAME Kokkos_${CAMEL_SUFFIX}) + STRING(TOUPPER ${CAMEL_NAME} UC_NAME) + + LIST(APPEND KOKKOS_OPTION_KEYS ${CAMEL_SUFFIX}) + SET(KOKKOS_OPTION_KEYS ${KOKKOS_OPTION_KEYS} PARENT_SCOPE) + LIST(APPEND KOKKOS_OPTION_VALUES "${DOCSTRING}") + SET(KOKKOS_OPTION_VALUES ${KOKKOS_OPTION_VALUES} PARENT_SCOPE) + LIST(APPEND KOKKOS_OPTION_TYPES BOOL) + SET(KOKKOS_OPTION_TYPES ${KOKKOS_OPTION_TYPES} PARENT_SCOPE) + + CMAKE_DEPENDENT_OPTION(${CAMEL_NAME} ${DOCSTRING} ${DEFAULT} "${DEPENDENCY}" ${FORCE}) + + #I don't love doing it this way because it's N^2 in number options, but c'est la vie FOREACH(opt ${KOKKOS_GIVEN_VARIABLES}) STRING(TOUPPER ${opt} OPT_UC) IF ("${OPT_UC}" STREQUAL "${UC_NAME}") @@ -102,6 +141,8 @@ FUNCTION(kokkos_append_config_line LINE) ENDFUNCTION() MACRO(kokkos_export_cmake_tpl NAME) + cmake_parse_arguments(KOKKOS_EXTRA_ARG "REQUIRED" "" "COMPONENTS" ${ARGN}) + #CMake TPLs are located with a call to find_package #find_package locates XConfig.cmake files through #X_DIR or X_ROOT variables set prior to calling find_package @@ -125,7 +166,16 @@ MACRO(kokkos_export_cmake_tpl NAME) KOKKOS_APPEND_CONFIG_LINE(" SET(${NAME}_ROOT ${${NAME}_ROOT})") KOKKOS_APPEND_CONFIG_LINE("ENDIF()") ENDIF() - KOKKOS_APPEND_CONFIG_LINE("FIND_DEPENDENCY(${NAME})") + SET(KOKKOS_CONFIG_STRING "FIND_DEPENDENCY(${NAME}") + + IF(KOKKOS_EXTRA_ARG_REQUIRED) + STRING(APPEND KOKKOS_CONFIG_STRING " REQUIRED") + ENDIF() + IF(KOKKOS_EXTRA_ARG_COMPONENTS) + STRING(APPEND KOKKOS_CONFIG_STRING " COMPONENTS ${KOKKOS_EXTRA_ARG_COMPONENTS}") + ENDIF() + STRING(APPEND KOKKOS_CONFIG_STRING ")") + KOKKOS_APPEND_CONFIG_LINE(${KOKKOS_CONFIG_STRING}) ENDMACRO() MACRO(kokkos_export_imported_tpl NAME) @@ -224,12 +274,6 @@ MACRO(kokkos_import_tpl NAME) SET(TPL_IMPORTED_NAME Kokkos::${NAME}) ENDIF() - # Even though this policy gets set in the top-level CMakeLists.txt, - # I have still been getting errors about ROOT variables being ignored - # I'm not sure if this is a scope issue - but make sure - # the policy is set before we do any find_package calls - CMAKE_POLICY(SET CMP0074 NEW) - IF (KOKKOS_ENABLE_${NAME}) #Tack on a TPL here to make sure we avoid using anyone else's find FIND_PACKAGE(TPL${NAME} REQUIRED MODULE) @@ -587,11 +631,16 @@ ENDMACRO() # # ``LIBRARY <name>`` # -# If specified, this gives the name of the library to look for +# If specified, this gives the name of the library to look for. +# The full path for the library found will be used as IMPORTED_LOCATION +# for the target created. Thus, this cannot be used for interface libraries. # # ``LIBRARIES <name1> <name2> ...`` # -# If specified, this gives a list of libraries to find for the package +# If specified, this gives a list of libraries to find for the package. +# As opposed to the LIBRARY argument, this can be used with interface +# libraries. In that case, we directly use the names provided here +# for linking when creating the new target. # # ``LIBRARY_PATHS <path1> <path2> ...`` # @@ -707,6 +756,7 @@ MACRO(kokkos_find_imported NAME) SET(IMPORT_TYPE) IF (TPL_INTERFACE) SET(IMPORT_TYPE "INTERFACE") + SET(${NAME}_FOUND_LIBRARIES ${TPL_LIBRARIES}) ENDIF() KOKKOS_CREATE_IMPORTED_TPL(${TPL_IMPORTED_NAME} ${IMPORT_TYPE} @@ -790,15 +840,15 @@ FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER) SET(COMPILER ${KOKKOS_CXX_COMPILER_ID}) ENDIF() - SET(COMPILER_SPECIFIC_FLAGS_TMP) + SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_DEFAULT}) FOREACH(COMP ${COMPILERS}) IF (COMPILER STREQUAL "${COMP}") IF (PARSE_${COMPILER}) - IF (NOT "${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED") + IF ("${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED") + SET(COMPILER_SPECIFIC_FLAGS_TMP "") + ELSE() SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_${COMPILER}}) ENDIF() - ELSEIF(PARSE_DEFAULT) - SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_DEFAULT}) ENDIF() ENDIF() ENDFOREACH() diff --git a/packages/kokkos/cmake/kokkos_pick_cxx_std.cmake b/packages/kokkos/cmake/kokkos_pick_cxx_std.cmake index 015873ebd6320d78c6ab3e190d7666c1c7e84824..03f1a0d18a9a47d96d9ef8926165f40df8a4856c 100644 --- a/packages/kokkos/cmake/kokkos_pick_cxx_std.cmake +++ b/packages/kokkos/cmake/kokkos_pick_cxx_std.cmake @@ -1,13 +1,15 @@ # From CMake 3.10 documentation #This can run at any time -KOKKOS_OPTION(CXX_STANDARD "" STRING "The C++ standard for Kokkos to use: 14, 17, or 20. If empty, this will default to CMAKE_CXX_STANDARD. If both CMAKE_CXX_STANDARD and Kokkos_CXX_STANDARD are empty, this will default to 14") +KOKKOS_OPTION(CXX_STANDARD "" STRING "[[DEPRECATED - USE CMAKE_CXX_STANDARD INSTEAD]] The C++ standard for Kokkos to use: 14, 17, or 20. If empty, this will default to CMAKE_CXX_STANDARD. If both CMAKE_CXX_STANDARD and Kokkos_CXX_STANDARD are empty, this will default to 14") # Set CXX standard flags SET(KOKKOS_ENABLE_CXX14 OFF) SET(KOKKOS_ENABLE_CXX17 OFF) SET(KOKKOS_ENABLE_CXX20 OFF) IF (KOKKOS_CXX_STANDARD) + MESSAGE(DEPRECATION "Setting the variable Kokkos_CXX_STANDARD in configuration is deprecated - set CMAKE_CXX_STANDARD directly instead") + IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++98") MESSAGE(FATAL_ERROR "Kokkos no longer supports C++98 - minimum C++14") ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") diff --git a/packages/kokkos/cmake/kokkos_test_cxx_std.cmake b/packages/kokkos/cmake/kokkos_test_cxx_std.cmake index 1eb0592c7f054185e566f053faa931029f92fbc1..a5a8f40af813e3edf926eb00bafbb84fcc9c0311 100644 --- a/packages/kokkos/cmake/kokkos_test_cxx_std.cmake +++ b/packages/kokkos/cmake/kokkos_test_cxx_std.cmake @@ -128,8 +128,8 @@ IF(KOKKOS_ENABLE_CUDA) ELSEIF(CMAKE_CXX_EXTENSIONS) MESSAGE(FATAL_ERROR "Compiling CUDA code with clang doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF") ENDIF() - ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}") + ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) + MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or NVC++ or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}") ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_tpls.cmake b/packages/kokkos/cmake/kokkos_tpls.cmake index 54c6b520b4a3b1ed5648df76465d465e0d1eeaab..2c712a519bd734c1c020818a8c61d264347ccc1e 100644 --- a/packages/kokkos/cmake/kokkos_tpls.cmake +++ b/packages/kokkos/cmake/kokkos_tpls.cmake @@ -85,6 +85,11 @@ ENDIF() KOKKOS_IMPORT_TPL(ROCM INTERFACE) KOKKOS_IMPORT_TPL(LIBQUADMATH) +IF (Kokkos_ENABLE_IMPL_DESUL_ATOMICS AND Kokkos_ENABLE_DESUL_ATOMICS_EXTERNAL) + find_package(desul REQUIRED COMPONENTS atomics) + KOKKOS_EXPORT_CMAKE_TPL(desul REQUIRED COMPONENTS atomics) +ENDIF() + #Convert list to newlines (which CMake doesn't always like in cache variables) STRING(REPLACE ";" "\n" KOKKOS_TPL_EXPORT_TEMP "${KOKKOS_TPL_EXPORTS}") #Convert to a regular variable diff --git a/packages/kokkos/cmake/kokkos_tribits.cmake b/packages/kokkos/cmake/kokkos_tribits.cmake index 1ec45d19bc73965cc0558976a9677d71bfd65534..34e45ecf7247af3095ca52d7d034eddd5cd03d24 100644 --- a/packages/kokkos/cmake/kokkos_tribits.cmake +++ b/packages/kokkos/cmake/kokkos_tribits.cmake @@ -88,6 +88,9 @@ MACRO(KOKKOS_PROCESS_SUBPACKAGES) ADD_SUBDIRECTORY(core) ADD_SUBDIRECTORY(containers) ADD_SUBDIRECTORY(algorithms) + if (KOKKOS_CXX_STANDARD GREATER_EQUAL 17) + ADD_SUBDIRECTORY(simd) + endif() ADD_SUBDIRECTORY(example) endif() ENDMACRO() @@ -342,7 +345,6 @@ MACRO(KOKKOS_INSTALL_ADDITIONAL_FILES) INSTALL(PROGRAMS "${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper" "${CMAKE_CURRENT_SOURCE_DIR}/bin/hpcbind" - "${CMAKE_CURRENT_SOURCE_DIR}/bin/kokkos_launch_compiler" "${PROJECT_BINARY_DIR}/temp/kokkos_launch_compiler" DESTINATION ${CMAKE_INSTALL_BINDIR}) INSTALL(FILES diff --git a/packages/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/packages/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp index 16b74a4997e5f1e643e095e167253829d47a050a..77451bb9e8f7ae99d84fbcb3fac05ad58a80850b 100644 --- a/packages/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp +++ b/packages/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -140,9 +140,7 @@ struct find_test { void init(value_type& v) const { v = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, volatile value_type const& src) const { - dst += src; - } + void join(value_type& dst, value_type const& src) const { dst += src; } KOKKOS_INLINE_FUNCTION void operator()(size_type i, value_type& num_errors) const { diff --git a/packages/kokkos/containers/performance_tests/TestScatterView.hpp b/packages/kokkos/containers/performance_tests/TestScatterView.hpp index 8a23f59d32cdd4f6290465ad41fa70d521e39bfb..bd06be9667be33f81ad607903ac97dbd1b65895c 100644 --- a/packages/kokkos/containers/performance_tests/TestScatterView.hpp +++ b/packages/kokkos/containers/performance_tests/TestScatterView.hpp @@ -82,8 +82,8 @@ void test_scatter_view(int m, int n) { Kokkos::Timer timer; timer.reset(); for (int k = 0; k < m; ++k) { - Kokkos::parallel_for(policy, f2, - "hand_coded_duplicate_scatter_view_test"); + Kokkos::parallel_for("hand_coded_duplicate_scatter_view_test", policy, + f2); } Kokkos::fence(); auto t = timer.seconds(); @@ -102,7 +102,7 @@ void test_scatter_view(int m, int n) { Kokkos::Timer timer; timer.reset(); for (int k = 0; k < m; ++k) { - Kokkos::parallel_for(policy, f, "scatter_view_test"); + Kokkos::parallel_for("scatter_view_test", policy, f); } Kokkos::fence(); auto t = timer.seconds(); diff --git a/packages/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/packages/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp index 4547d5c35758e2eadc0e5029779f0d2e23fc4081..8ff208d6a867bc424334a953723d21235f09bac0 100644 --- a/packages/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp +++ b/packages/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -147,7 +147,7 @@ struct UnorderedMapTest { } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, const volatile value_type& src) const { + void join(value_type& dst, const value_type& src) const { dst.failed_count += src.failed_count; dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list; } diff --git a/packages/kokkos/containers/src/Kokkos_Bitset.hpp b/packages/kokkos/containers/src/Kokkos_Bitset.hpp index ea73c4f536916c7aa5928253a7fbd1bfdf04b458..05121b4e51195651c3bd05d01529f311beaa5863 100644 --- a/packages/kokkos/containers/src/Kokkos_Bitset.hpp +++ b/packages/kokkos/containers/src/Kokkos_Bitset.hpp @@ -44,14 +44,16 @@ #ifndef KOKKOS_BITSET_HPP #define KOKKOS_BITSET_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET +#endif #include <Kokkos_Core.hpp> #include <Kokkos_Functional.hpp> #include <impl/Kokkos_Bitset_impl.hpp> -#include <stdexcept> - namespace Kokkos { template <typename Device = Kokkos::DefaultExecutionSpace> @@ -403,7 +405,7 @@ class ConstBitset { template <typename DstDevice, typename SrcDevice> void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src) { if (dst.size() != src.size()) { - throw std::runtime_error( + Kokkos::Impl::throw_runtime_exception( "Error: Cannot deep_copy bitsets of different sizes!"); } @@ -418,7 +420,7 @@ void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src) { template <typename DstDevice, typename SrcDevice> void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) { if (dst.size() != src.size()) { - throw std::runtime_error( + Kokkos::Impl::throw_runtime_exception( "Error: Cannot deep_copy bitsets of different sizes!"); } @@ -433,7 +435,7 @@ void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) { template <typename DstDevice, typename SrcDevice> void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) { if (dst.size() != src.size()) { - throw std::runtime_error( + Kokkos::Impl::throw_runtime_exception( "Error: Cannot deep_copy bitsets of different sizes!"); } @@ -447,4 +449,8 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) { } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET +#endif #endif // KOKKOS_BITSET_HPP diff --git a/packages/kokkos/containers/src/Kokkos_DualView.hpp b/packages/kokkos/containers/src/Kokkos_DualView.hpp index 8c80ec55b5b3177b44c3e61befe1c7bb5f3b8a78..916c54d605545ef8fe8a39f5e98a36d000bfa85a 100644 --- a/packages/kokkos/containers/src/Kokkos_DualView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DualView.hpp @@ -50,6 +50,10 @@ #ifndef KOKKOS_DUALVIEW_HPP #define KOKKOS_DUALVIEW_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DUALVIEW +#endif #include <Kokkos_Core.hpp> #include <impl/Kokkos_Error.hpp> @@ -144,7 +148,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { using t_dev_const_randomread = View<typename traits::const_data_type, typename traits::array_layout, typename traits::device_type, - Kokkos::MemoryTraits<Kokkos::RandomAccess> >; + Kokkos::MemoryTraits<Kokkos::RandomAccess>>; /// \typedef t_host_const_randomread /// \brief The type of a const, random-access View host mirror of @@ -175,7 +179,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { using t_dev_const_randomread_um = View<typename t_host::const_data_type, typename t_host::array_layout, typename t_host::device_type, - Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::RandomAccess> >; + Kokkos::MemoryTraits<Kokkos::Unmanaged | Kokkos::RandomAccess>>; /// \typedef t_host_const_randomread /// \brief The type of a const, random-access View host mirror of @@ -232,7 +236,9 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) - : modified_flags(t_modified_flags("DualView::modified_flags")), + : modified_flags( + Kokkos::view_alloc(typename t_modified_flags::execution_space{}, + "DualView::modified_flags")), d_view(label, n0, n1, n2, n3, n4, n5, n6, n7), h_view(create_mirror_view(d_view)) // without UVM, host View mirrors {} @@ -249,16 +255,15 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { /// omit the integer arguments that follow. template <class... P> DualView(const Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<!Impl::ViewCtorProp<P...>::has_pointer, - size_t>::type const n0 = - KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) + std::enable_if_t<!Impl::ViewCtorProp<P...>::has_pointer, + size_t> const n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) : modified_flags(t_modified_flags("DualView::modified_flags")), d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7) { // without UVM, host View mirrors @@ -403,7 +408,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { impl_device_matches_tdev_exec<Device>::value, t_dev, typename std::conditional_t< impl_device_matches_tdev_memory_space<Device>::value, - t_dev, t_host> > > > > + t_dev, t_host>>>>> view() const { constexpr bool device_is_memspace = std::is_same<Device, typename Device::memory_space>::value; @@ -609,21 +614,21 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { } template <class Device> - void sync(const typename std::enable_if< + void sync(const std::enable_if_t< (std::is_same<typename traits::data_type, typename traits::non_const_data_type>::value) || (std::is_same<Device, int>::value), - int>::type& = 0) { + int>& = 0) { sync_impl<Device>(std::true_type{}); } template <class Device, class ExecutionSpace> void sync(const ExecutionSpace& exec, - const typename std::enable_if< + const std::enable_if_t< (std::is_same<typename traits::data_type, typename traits::non_const_data_type>::value) || (std::is_same<Device, int>::value), - int>::type& = 0) { + int>& = 0) { sync_impl<Device>(std::true_type{}, exec); } @@ -651,20 +656,20 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { } template <class Device> - void sync(const typename std::enable_if< + void sync(const std::enable_if_t< (!std::is_same<typename traits::data_type, typename traits::non_const_data_type>::value) || (std::is_same<Device, int>::value), - int>::type& = 0) { + int>& = 0) { sync_impl<Device>(std::false_type{}); } template <class Device, class ExecutionSpace> void sync(const ExecutionSpace& exec, - const typename std::enable_if< + const std::enable_if_t< (!std::is_same<typename traits::data_type, typename traits::non_const_data_type>::value) || (std::is_same<Device, int>::value), - int>::type& = 0) { + int>& = 0) { sync_impl<Device>(std::false_type{}, exec); } @@ -786,7 +791,10 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { std::enable_if_t<!Dummy::impl_dualview_is_single_device::value>* = nullptr> void modify() { - if (modified_flags.data() == nullptr) return; + if (modified_flags.data() == nullptr) { + modified_flags = t_modified_flags("DualView::modified_flags"); + } + int dev = get_device_side<Device>(); if (dev == 1) { // if Device is the same as DualView's device type @@ -899,21 +907,55 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { /// This discards any existing contents of the objects, and resets /// their modified flags. It does <i>not</i> copy the old contents /// of either View into the new View objects. - template <class... I> + template <class... ViewCtorArgs> void impl_realloc(const size_t n0, const size_t n1, const size_t n2, const size_t n3, const size_t n4, const size_t n5, - const size_t n6, const size_t n7, const I&... arg_prop) { + const size_t n6, const size_t n7, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::realloc " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a memory space instance!"); + const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7}; const bool sizeMismatch = Impl::size_mismatch(h_view, h_view.rank_dynamic, new_extents); if (sizeMismatch) { - ::Kokkos::realloc(arg_prop..., d_view, n0, n1, n2, n3, n4, n5, n6, n7); - h_view = create_mirror_view(arg_prop..., typename t_host::memory_space(), - d_view); - } else if (!Kokkos::Impl::has_type<Kokkos::Impl::WithoutInitializing_t, - I...>::value) { - ::Kokkos::deep_copy(d_view, typename t_dev::value_type{}); + ::Kokkos::realloc(arg_prop, d_view, n0, n1, n2, n3, n4, n5, n6, n7); + if (alloc_prop_input::initialize) { + h_view = create_mirror_view(typename t_host::memory_space(), d_view); + } else { + h_view = create_mirror_view(Kokkos::WithoutInitializing, + typename t_host::memory_space(), d_view); + } + } else if (alloc_prop_input::initialize) { + if (alloc_prop_input::has_execution_space) { + // Add execution_space if not provided to avoid need for if constexpr + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 2>, + typename t_dev::execution_space>>; + alloc_prop arg_prop_copy(arg_prop); + using execution_space_type = typename alloc_prop::execution_space; + const execution_space_type& exec_space = + static_cast< + Kokkos::Impl::ViewCtorProp<void, execution_space_type> const&>( + arg_prop_copy) + .value; + ::Kokkos::deep_copy(exec_space, d_view, typename t_dev::value_type{}); + } else + ::Kokkos::deep_copy(d_view, typename t_dev::value_type{}); } /* Reset dirty flags */ @@ -923,6 +965,19 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { modified_flags(1) = modified_flags(0) = 0; } + template <class... ViewCtorArgs> + void realloc(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + impl_realloc(n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); + } + void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -931,7 +986,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - impl_realloc(n0, n1, n2, n3, n4, n5, n6, n7); + impl_realloc(n0, n1, n2, n3, n4, n5, n6, n7, Impl::ViewCtorProp<>{}); } template <typename I> @@ -944,17 +999,32 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - impl_realloc(n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); + impl_realloc(n0, n1, n2, n3, n4, n5, n6, n7, Kokkos::view_alloc(arg_prop)); } /// \brief Resize both views, copying old contents into new if necessary. /// /// This method only copies the old contents into the new View /// objects for the device which was last marked as modified. - template <class... I> - void impl_resize(const size_t n0, const size_t n1, const size_t n2, + template <class... ViewCtorArgs> + void impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const size_t n0, const size_t n1, const size_t n2, const size_t n3, const size_t n4, const size_t n5, - const size_t n6, const size_t n7, const I&... arg_prop) { + const size_t n6, const size_t n7) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::resize " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::resize must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::resize must " + "not include a memory space instance!"); + const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7}; const bool sizeMismatch = Impl::size_mismatch(h_view, h_view.rank_dynamic, new_extents); @@ -965,22 +1035,31 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { if (modified_flags(1) >= modified_flags(0)) { /* Resize on Device */ if (sizeMismatch) { - ::Kokkos::resize(arg_prop..., d_view, n0, n1, n2, n3, n4, n5, n6, n7); - h_view = create_mirror_view(arg_prop..., - typename t_host::memory_space(), d_view); + ::Kokkos::resize(arg_prop, d_view, n0, n1, n2, n3, n4, n5, n6, n7); + if (alloc_prop_input::initialize) { + h_view = create_mirror_view(typename t_host::memory_space(), d_view); + } else { + h_view = create_mirror_view(Kokkos::WithoutInitializing, + typename t_host::memory_space(), d_view); + } /* Mark Device copy as modified */ - modified_flags(1) = modified_flags(1) + 1; + ++modified_flags(1); } } else { - /* Realloc on Device */ + /* Resize on Host */ if (sizeMismatch) { - ::Kokkos::resize(arg_prop..., h_view, n0, n1, n2, n3, n4, n5, n6, n7); - d_view = create_mirror_view(arg_prop..., typename t_dev::memory_space(), - h_view); + ::Kokkos::resize(arg_prop, h_view, n0, n1, n2, n3, n4, n5, n6, n7); + if (alloc_prop_input::initialize) { + d_view = create_mirror_view(typename t_dev::memory_space(), h_view); + + } else { + d_view = create_mirror_view(Kokkos::WithoutInitializing, + typename t_dev::memory_space(), h_view); + } /* Mark Host copy as modified */ - modified_flags(0) = modified_flags(0) + 1; + ++modified_flags(0); } } } @@ -993,7 +1072,20 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - impl_resize(n0, n1, n2, n3, n4, n5, n6, n7); + impl_resize(Impl::ViewCtorProp<>{}, n0, n1, n2, n3, n4, n5, n6, n7); + } + + template <class... ViewCtorArgs> + void resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + impl_resize(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7); } template <class I> @@ -1006,7 +1098,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - impl_resize(n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); + impl_resize(Kokkos::view_alloc(arg_prop), n0, n1, n2, n3, n4, n5, n6, n7); } //@} @@ -1027,16 +1119,16 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, size_t>::type - extent(const iType& r) const { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, size_t> + extent(const iType& r) const { return d_view.extent(r); } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, int>::type - extent_int(const iType& r) const { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, int> + extent_int(const iType& r) const { return static_cast<int>(d_view.extent(r)); } @@ -1130,6 +1222,15 @@ void resize(DualView<Properties...>& dv, Args&&... args) noexcept( dv.resize(std::forward<Args>(args)...); } +template <class... ViewCtorArgs, class... Properties, class... Args> +void resize( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + DualView<Properties...>& dv, + Args&&... args) noexcept(noexcept(dv.resize(arg_prop, + std::forward<Args>(args)...))) { + dv.resize(arg_prop, std::forward<Args>(args)...); +} + template <class I, class... Properties, class... Args> std::enable_if_t<Impl::is_view_ctor_property<I>::value> resize( const I& arg_prop, DualView<Properties...>& dv, @@ -1138,6 +1239,15 @@ std::enable_if_t<Impl::is_view_ctor_property<I>::value> resize( dv.resize(arg_prop, std::forward<Args>(args)...); } +template <class... ViewCtorArgs, class... Properties, class... Args> +void realloc(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + DualView<Properties...>& dv, + Args&&... args) noexcept(noexcept(dv + .realloc(std::forward<Args>( + args)...))) { + dv.realloc(arg_prop, std::forward<Args>(args)...); +} + template <class... Properties, class... Args> void realloc(DualView<Properties...>& dv, Args&&... args) noexcept( noexcept(dv.realloc(std::forward<Args>(args)...))) { @@ -1155,4 +1265,8 @@ std::enable_if_t<Impl::is_view_ctor_property<I>::value> realloc( } // end namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DUALVIEW +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DUALVIEW +#endif #endif diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp index 176129d25434c940721861bb558c003ed233beae..442f0d8617524dc0c1459bf10110891e97b3a6b2 100644 --- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -50,6 +50,10 @@ #ifndef KOKKOS_DYNRANKVIEW_HPP #define KOKKOS_DYNRANKVIEW_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DYNRANKVIEW +#endif #include <Kokkos_Core.hpp> #include <impl/Kokkos_Error.hpp> @@ -117,10 +121,10 @@ struct DynRankDimTraits { // Create the layout for the rank-7 view. // Non-strided Layout template <typename Layout> - KOKKOS_INLINE_FUNCTION static typename std::enable_if< + KOKKOS_INLINE_FUNCTION static std::enable_if_t< (std::is_same<Layout, Kokkos::LayoutRight>::value || std::is_same<Layout, Kokkos::LayoutLeft>::value), - Layout>::type + Layout> createLayout(const Layout& layout) { return Layout(layout.dimension[0] != unspecified ? layout.dimension[0] : 1, layout.dimension[1] != unspecified ? layout.dimension[1] : 1, @@ -134,8 +138,8 @@ struct DynRankDimTraits { // LayoutStride template <typename Layout> - KOKKOS_INLINE_FUNCTION static typename std::enable_if< - (std::is_same<Layout, Kokkos::LayoutStride>::value), Layout>::type + KOKKOS_INLINE_FUNCTION static std::enable_if_t< + (std::is_same<Layout, Kokkos::LayoutStride>::value), Layout> createLayout(const Layout& layout) { return Layout(layout.dimension[0] != unspecified ? layout.dimension[0] : 1, layout.stride[0], @@ -157,13 +161,13 @@ struct DynRankDimTraits { // Extra overload to match that for specialize types template <typename Traits, typename... P> - KOKKOS_INLINE_FUNCTION static typename std::enable_if< + KOKKOS_INLINE_FUNCTION static std::enable_if_t< (std::is_same<typename Traits::array_layout, Kokkos::LayoutRight>::value || std::is_same<typename Traits::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename Traits::array_layout, Kokkos::LayoutStride>::value), - typename Traits::array_layout>::type + typename Traits::array_layout> createLayout(const Kokkos::Impl::ViewCtorProp<P...>& /* prop */, const typename Traits::array_layout& layout) { return createLayout(layout); @@ -187,12 +191,12 @@ struct DynRankDimTraits { // Non-strided Layout template <typename Layout, typename iType> -KOKKOS_INLINE_FUNCTION static - typename std::enable_if<(std::is_same<Layout, Kokkos::LayoutRight>::value || - std::is_same<Layout, Kokkos::LayoutLeft>::value) && - std::is_integral<iType>::value, - Layout>::type - reconstructLayout(const Layout& layout, iType dynrank) { +KOKKOS_INLINE_FUNCTION static std::enable_if_t< + (std::is_same<Layout, Kokkos::LayoutRight>::value || + std::is_same<Layout, Kokkos::LayoutLeft>::value) && + std::is_integral<iType>::value, + Layout> +reconstructLayout(const Layout& layout, iType dynrank) { return Layout(dynrank > 0 ? layout.dimension[0] : KOKKOS_INVALID_INDEX, dynrank > 1 ? layout.dimension[1] : KOKKOS_INVALID_INDEX, dynrank > 2 ? layout.dimension[2] : KOKKOS_INVALID_INDEX, @@ -205,10 +209,10 @@ KOKKOS_INLINE_FUNCTION static // LayoutStride template <typename Layout, typename iType> -KOKKOS_INLINE_FUNCTION static typename std::enable_if< +KOKKOS_INLINE_FUNCTION static std::enable_if_t< (std::is_same<Layout, Kokkos::LayoutStride>::value) && std::is_integral<iType>::value, - Layout>::type + Layout> reconstructLayout(const Layout& layout, iType dynrank) { return Layout(dynrank > 0 ? layout.dimension[0] : KOKKOS_INVALID_INDEX, dynrank > 0 ? layout.stride[0] : (0), @@ -308,26 +312,25 @@ namespace Impl { template <class DstTraits, class SrcTraits> class ViewMapping< DstTraits, SrcTraits, - typename std::enable_if< - (std::is_same<typename DstTraits::memory_space, - typename SrcTraits::memory_space>::value && - std::is_same<typename DstTraits::specialize, void>::value && - std::is_same<typename SrcTraits::specialize, void>::value && - (std::is_same<typename DstTraits::array_layout, - typename SrcTraits::array_layout>::value || - ((std::is_same<typename DstTraits::array_layout, - Kokkos::LayoutLeft>::value || - std::is_same<typename DstTraits::array_layout, - Kokkos::LayoutRight>::value || - std::is_same<typename DstTraits::array_layout, - Kokkos::LayoutStride>::value) && - (std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutLeft>::value || - std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutRight>::value || - std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutStride>::value)))), - Kokkos::Impl::ViewToDynRankViewTag>::type> { + std::enable_if_t<(std::is_same<typename DstTraits::memory_space, + typename SrcTraits::memory_space>::value && + std::is_void<typename DstTraits::specialize>::value && + std::is_void<typename SrcTraits::specialize>::value && + (std::is_same<typename DstTraits::array_layout, + typename SrcTraits::array_layout>::value || + ((std::is_same<typename DstTraits::array_layout, + Kokkos::LayoutLeft>::value || + std::is_same<typename DstTraits::array_layout, + Kokkos::LayoutRight>::value || + std::is_same<typename DstTraits::array_layout, + Kokkos::LayoutStride>::value) && + (std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutLeft>::value || + std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutRight>::value || + std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutStride>::value)))), + Kokkos::Impl::ViewToDynRankViewTag>> { private: enum { is_assignable_value_type = @@ -397,7 +400,7 @@ template <class> struct is_dyn_rank_view : public std::false_type {}; template <class D, class... P> -struct is_dyn_rank_view<Kokkos::DynRankView<D, P...> > : public std::true_type { +struct is_dyn_rank_view<Kokkos::DynRankView<D, P...>> : public std::true_type { }; template <typename DataType, class... Properties> @@ -465,23 +468,20 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // enum? template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, size_t>::type - extent(const iType& r) const { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, size_t> + extent(const iType& r) const { return m_map.extent(r); } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, int>::type - extent_int(const iType& r) const { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, int> + extent_int(const iType& r) const { return static_cast<int>(m_map.extent(r)); } - KOKKOS_INLINE_FUNCTION constexpr typename traits::array_layout layout() - const { - return m_map.layout(); - } + KOKKOS_INLINE_FUNCTION constexpr typename traits::array_layout layout() const; //---------------------------------------- /* Deprecate all 'dimension' functions in favor of @@ -567,7 +567,7 @@ class DynRankView : public ViewTraits<DataType, Properties...> { is_layout_stride = std::is_same<typename traits::array_layout, Kokkos::LayoutStride>::value, - is_default_map = std::is_same<typename traits::specialize, void>::value && + is_default_map = std::is_void<typename traits::specialize>::value && (is_layout_left || is_layout_right || is_layout_stride) }; @@ -611,11 +611,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // This assumes a contiguous underlying memory (i.e. no padding, no // striding...) template <typename iType> - KOKKOS_INLINE_FUNCTION typename std::enable_if< + KOKKOS_INLINE_FUNCTION std::enable_if_t< std::is_same<typename drvtraits::value_type, typename drvtraits::scalar_array_type>::value && std::is_integral<iType>::value, - reference_type>::type + reference_type> operator[](const iType& i0) const { // Phalanx is violating this, since they use the operator to access ALL // elements in the allocation KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , @@ -626,11 +626,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // This assumes a contiguous underlying memory (i.e. no padding, no // striding... AND a Trilinos/Sacado scalar type ) template <typename iType> - KOKKOS_INLINE_FUNCTION typename std::enable_if< + KOKKOS_INLINE_FUNCTION std::enable_if_t< !std::is_same<typename drvtraits::value_type, typename drvtraits::scalar_array_type>::value && std::is_integral<iType>::value, - reference_type>::type + reference_type> operator[](const iType& i0) const { // auto map = impl_map(); const size_t dim_scalar = m_map.dimension_scalar(); @@ -640,60 +640,60 @@ class DynRankView : public ViewTraits<DataType, Properties...> { DataType*, typename traits::array_layout, typename traits::device_type, Kokkos::MemoryTraits<traits::memory_traits::is_unmanaged | traits::memory_traits::is_random_access | - traits::memory_traits::is_atomic> >; + traits::memory_traits::is_atomic>>; tmp_view_type rankone_view(this->data(), bytes, dim_scalar); return rankone_view(i0); } // Rank 1 parenthesis template <typename iType> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && - std::is_integral<iType>::value), - reference_type>::type - operator()(const iType& i0) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<(std::is_void<typename traits::specialize>::value && + std::is_integral<iType>::value), + reference_type> + operator()(const iType& i0) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) return m_map.reference(i0); } template <typename iType> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename traits::specialize, void>::value && - std::is_integral<iType>::value), - reference_type>::type - operator()(const iType& i0) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename traits::specialize>::value && + std::is_integral<iType>::value), + reference_type> + operator()(const iType& i0) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) return m_map.reference(i0, 0, 0, 0, 0, 0, 0); } // Rank 2 template <typename iType0, typename iType1> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value), - reference_type>::type + reference_type> operator()(const iType0& i0, const iType1& i1) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) return m_map.reference(i0, i1); } template <typename iType0, typename iType1> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - operator()(const iType0& i0, const iType1& i1) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + operator()(const iType0& i0, const iType1& i1) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) return m_map.reference(i0, i1, 0, 0, 0, 0, 0); } // Rank 3 template <typename iType0, typename iType1, typename iType2> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value), - reference_type>::type + reference_type> operator()(const iType0& i0, const iType1& i1, const iType2& i2) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3, this->rank(), m_track, m_map, i0, i1, i2)) @@ -701,11 +701,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> { } template <typename iType0, typename iType1, typename iType2> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - operator()(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + operator()(const iType0& i0, const iType1& i1, const iType2& i2) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3, this->rank(), m_track, m_map, i0, i1, i2)) return m_map.reference(i0, i1, i2, 0, 0, 0, 0); @@ -713,11 +713,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 4 template <typename iType0, typename iType1, typename iType2, typename iType3> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), - reference_type>::type + reference_type> operator()(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( @@ -726,12 +726,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { } template <typename iType0, typename iType1, typename iType2, typename iType3> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - operator()(const iType0& i0, const iType1& i1, const iType2& i2, - const iType3& i3) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) return m_map.reference(i0, i1, i2, i3, 0, 0, 0); @@ -740,12 +740,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 5 template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), - reference_type>::type + reference_type> operator()(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, const iType4& i4) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( @@ -755,12 +755,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - operator()(const iType0& i0, const iType1& i1, const iType2& i2, - const iType3& i3, const iType4& i4) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) return m_map.reference(i0, i1, i2, i3, i4, 0, 0); @@ -769,12 +769,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 6 template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), - reference_type>::type + reference_type> operator()(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, const iType4& i4, const iType5& i5) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( @@ -784,12 +784,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - operator()(const iType0& i0, const iType1& i1, const iType2& i2, - const iType3& i3, const iType4& i4, const iType5& i5) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4, const iType5& i5) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) return m_map.reference(i0, i1, i2, i3, i4, i5, 0); @@ -798,12 +798,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 7 template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5, typename iType6> - KOKKOS_INLINE_FUNCTION typename std::enable_if< + KOKKOS_INLINE_FUNCTION std::enable_if_t< (std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), - reference_type>::type + reference_type> operator()(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, const iType4& i4, const iType5& i5, const iType6& i6) const { @@ -823,53 +823,53 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 1 // Rank 1 parenthesis template <typename iType> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && - std::is_integral<iType>::value), - reference_type>::type - access(const iType& i0) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<(std::is_void<typename traits::specialize>::value && + std::is_integral<iType>::value), + reference_type> + access(const iType& i0) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) return m_map.reference(i0); } template <typename iType> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename traits::specialize, void>::value && - std::is_integral<iType>::value), - reference_type>::type - access(const iType& i0) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename traits::specialize>::value && + std::is_integral<iType>::value), + reference_type> + access(const iType& i0) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) return m_map.reference(i0, 0, 0, 0, 0, 0, 0); } // Rank 2 template <typename iType0, typename iType1> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value), - reference_type>::type + reference_type> access(const iType0& i0, const iType1& i1) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) return m_map.reference(i0, i1); } template <typename iType0, typename iType1> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - access(const iType0& i0, const iType1& i1) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + access(const iType0& i0, const iType1& i1) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) return m_map.reference(i0, i1, 0, 0, 0, 0, 0); } // Rank 3 template <typename iType0, typename iType1, typename iType2> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value), - reference_type>::type + reference_type> access(const iType0& i0, const iType1& i1, const iType2& i2) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3, this->rank(), m_track, m_map, i0, i1, i2)) @@ -877,11 +877,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> { } template <typename iType0, typename iType1, typename iType2> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - access(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + access(const iType0& i0, const iType1& i1, const iType2& i2) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3, this->rank(), m_track, m_map, i0, i1, i2)) return m_map.reference(i0, i1, i2, 0, 0, 0, 0); @@ -889,11 +889,11 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 4 template <typename iType0, typename iType1, typename iType2, typename iType3> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), - reference_type>::type + reference_type> access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( @@ -902,12 +902,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { } template <typename iType0, typename iType1, typename iType2, typename iType3> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - access(const iType0& i0, const iType1& i1, const iType2& i2, - const iType3& i3) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + access(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) return m_map.reference(i0, i1, i2, i3, 0, 0, 0); @@ -916,12 +916,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 5 template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), - reference_type>::type + reference_type> access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, const iType4& i4) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( @@ -931,12 +931,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, - const iType4& i4) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + access(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) return m_map.reference(i0, i1, i2, i3, i4, 0, 0); @@ -945,12 +945,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 6 template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - (std::is_same<typename traits::specialize, void>::value && + KOKKOS_INLINE_FUNCTION std::enable_if_t< + (std::is_void<typename traits::specialize>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), - reference_type>::type + reference_type> access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, const iType4& i4, const iType5& i5) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( @@ -960,12 +960,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5> - KOKKOS_INLINE_FUNCTION typename std::enable_if< - !(std::is_same<typename drvtraits::specialize, void>::value && - std::is_integral<iType0>::value), - reference_type>::type - access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, - const iType4& i4, const iType5& i5) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!(std::is_void<typename drvtraits::specialize>::value && + std::is_integral<iType0>::value), + reference_type> + access(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4, const iType5& i5) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) return m_map.reference(i0, i1, i2, i3, i4, i5, 0); @@ -974,12 +974,12 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // Rank 7 template <typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5, typename iType6> - KOKKOS_INLINE_FUNCTION typename std::enable_if< + KOKKOS_INLINE_FUNCTION std::enable_if_t< (std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), - reference_type>::type + reference_type> access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, const iType4& i4, const iType5& i5, const iType6& i6) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( @@ -1092,9 +1092,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <class... P> explicit inline DynRankView( const Kokkos::Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<!Kokkos::Impl::ViewCtorProp<P...>::has_pointer, - typename traits::array_layout>::type const& - arg_layout) + std::enable_if_t<!Kokkos::Impl::ViewCtorProp<P...>::has_pointer, + typename traits::array_layout> const& arg_layout) : m_track(), m_map(), m_rank(Impl::DynRankDimTraits<typename traits::specialize>:: @@ -1107,17 +1106,14 @@ class DynRankView : public ViewTraits<DataType, Properties...> { // to avoid duplicate class error. using alloc_prop = Kokkos::Impl::ViewCtorProp< P..., - typename std::conditional<alloc_prop_input::has_label, - std::integral_constant<unsigned, 0>, - typename std::string>::type, - typename std::conditional< - alloc_prop_input::has_memory_space, - std::integral_constant<unsigned, 1>, - typename traits::device_type::memory_space>::type, - typename std::conditional< - alloc_prop_input::has_execution_space, - std::integral_constant<unsigned, 2>, - typename traits::device_type::execution_space>::type>; + std::conditional_t<alloc_prop_input::has_label, + std::integral_constant<unsigned, 0>, std::string>, + std::conditional_t<alloc_prop_input::has_memory_space, + std::integral_constant<unsigned, 1>, + typename traits::device_type::memory_space>, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned, 2>, + typename traits::device_type::execution_space>>; static_assert(traits::is_managed, "View allocation constructor requires managed memory"); @@ -1152,7 +1148,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> { Kokkos::Impl::SharedAllocationRecord<>* record = m_map.allocate_shared( prop_copy, Impl::DynRankDimTraits<typename traits::specialize>:: - template createLayout<traits, P...>(arg_prop, arg_layout)); + template createLayout<traits, P...>(arg_prop, arg_layout), + Impl::ViewCtorProp<P...>::has_execution_space); //------------------------------------------------------------ #if defined(KOKKOS_ENABLE_CUDA) @@ -1172,9 +1169,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <class... P> explicit KOKKOS_INLINE_FUNCTION DynRankView( const Kokkos::Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<Kokkos::Impl::ViewCtorProp<P...>::has_pointer, - typename traits::array_layout>::type const& - arg_layout) + std::enable_if_t<Kokkos::Impl::ViewCtorProp<P...>::has_pointer, + typename traits::array_layout> const& arg_layout) : m_track() // No memory tracking , m_map(arg_prop, @@ -1197,15 +1193,15 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <class... P> explicit inline DynRankView( const Kokkos::Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<!Kokkos::Impl::ViewCtorProp<P...>::has_pointer, - size_t>::type const arg_N0 = KOKKOS_INVALID_INDEX, - const size_t arg_N1 = KOKKOS_INVALID_INDEX, - const size_t arg_N2 = KOKKOS_INVALID_INDEX, - const size_t arg_N3 = KOKKOS_INVALID_INDEX, - const size_t arg_N4 = KOKKOS_INVALID_INDEX, - const size_t arg_N5 = KOKKOS_INVALID_INDEX, - const size_t arg_N6 = KOKKOS_INVALID_INDEX, - const size_t arg_N7 = KOKKOS_INVALID_INDEX) + std::enable_if_t<!Kokkos::Impl::ViewCtorProp<P...>::has_pointer, + size_t> const arg_N0 = KOKKOS_INVALID_INDEX, + const size_t arg_N1 = KOKKOS_INVALID_INDEX, + const size_t arg_N2 = KOKKOS_INVALID_INDEX, + const size_t arg_N3 = KOKKOS_INVALID_INDEX, + const size_t arg_N4 = KOKKOS_INVALID_INDEX, + const size_t arg_N5 = KOKKOS_INVALID_INDEX, + const size_t arg_N6 = KOKKOS_INVALID_INDEX, + const size_t arg_N7 = KOKKOS_INVALID_INDEX) : DynRankView(arg_prop, typename traits::array_layout( arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)) {} @@ -1213,15 +1209,15 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <class... P> explicit KOKKOS_INLINE_FUNCTION DynRankView( const Kokkos::Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<Kokkos::Impl::ViewCtorProp<P...>::has_pointer, - size_t>::type const arg_N0 = KOKKOS_INVALID_INDEX, - const size_t arg_N1 = KOKKOS_INVALID_INDEX, - const size_t arg_N2 = KOKKOS_INVALID_INDEX, - const size_t arg_N3 = KOKKOS_INVALID_INDEX, - const size_t arg_N4 = KOKKOS_INVALID_INDEX, - const size_t arg_N5 = KOKKOS_INVALID_INDEX, - const size_t arg_N6 = KOKKOS_INVALID_INDEX, - const size_t arg_N7 = KOKKOS_INVALID_INDEX) + std::enable_if_t<Kokkos::Impl::ViewCtorProp<P...>::has_pointer, + size_t> const arg_N0 = KOKKOS_INVALID_INDEX, + const size_t arg_N1 = KOKKOS_INVALID_INDEX, + const size_t arg_N2 = KOKKOS_INVALID_INDEX, + const size_t arg_N3 = KOKKOS_INVALID_INDEX, + const size_t arg_N4 = KOKKOS_INVALID_INDEX, + const size_t arg_N5 = KOKKOS_INVALID_INDEX, + const size_t arg_N6 = KOKKOS_INVALID_INDEX, + const size_t arg_N7 = KOKKOS_INVALID_INDEX) : DynRankView(arg_prop, typename traits::array_layout( arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)) {} @@ -1230,9 +1226,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <typename Label> explicit inline DynRankView( const Label& arg_label, - typename std::enable_if<Kokkos::Impl::is_view_label<Label>::value, - typename traits::array_layout>::type const& - arg_layout) + std::enable_if_t<Kokkos::Impl::is_view_label<Label>::value, + typename traits::array_layout> const& arg_layout) : DynRankView(Kokkos::Impl::ViewCtorProp<std::string>(arg_label), arg_layout) {} @@ -1240,15 +1235,15 @@ class DynRankView : public ViewTraits<DataType, Properties...> { template <typename Label> explicit inline DynRankView( const Label& arg_label, - typename std::enable_if<Kokkos::Impl::is_view_label<Label>::value, - const size_t>::type arg_N0 = KOKKOS_INVALID_INDEX, - const size_t arg_N1 = KOKKOS_INVALID_INDEX, - const size_t arg_N2 = KOKKOS_INVALID_INDEX, - const size_t arg_N3 = KOKKOS_INVALID_INDEX, - const size_t arg_N4 = KOKKOS_INVALID_INDEX, - const size_t arg_N5 = KOKKOS_INVALID_INDEX, - const size_t arg_N6 = KOKKOS_INVALID_INDEX, - const size_t arg_N7 = KOKKOS_INVALID_INDEX) + std::enable_if_t<Kokkos::Impl::is_view_label<Label>::value, const size_t> + arg_N0 = KOKKOS_INVALID_INDEX, + const size_t arg_N1 = KOKKOS_INVALID_INDEX, + const size_t arg_N2 = KOKKOS_INVALID_INDEX, + const size_t arg_N3 = KOKKOS_INVALID_INDEX, + const size_t arg_N4 = KOKKOS_INVALID_INDEX, + const size_t arg_N5 = KOKKOS_INVALID_INDEX, + const size_t arg_N6 = KOKKOS_INVALID_INDEX, + const size_t arg_N7 = KOKKOS_INVALID_INDEX) : DynRankView( Kokkos::Impl::ViewCtorProp<std::string>(arg_label), typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, @@ -1298,7 +1293,7 @@ class DynRankView : public ViewTraits<DataType, Properties...> { (arg_N4 != KOKKOS_INVALID_INDEX) + (arg_N5 != KOKKOS_INVALID_INDEX) + (arg_N6 != KOKKOS_INVALID_INDEX) + (arg_N7 != KOKKOS_INVALID_INDEX); - if (std::is_same<typename traits::specialize, void>::value && + if (std::is_void<typename traits::specialize>::value && num_passed_args != traits::rank_dynamic) { Kokkos::abort( "Kokkos::View::shmem_size() rank_dynamic != number of arguments.\n"); @@ -1365,15 +1360,14 @@ namespace Impl { template <class SrcTraits, class... Args> class ViewMapping< - typename std::enable_if< - (std::is_same<typename SrcTraits::specialize, void>::value && - (std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutLeft>::value || - std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutRight>::value || - std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutStride>::value)), - Kokkos::Impl::DynRankSubviewTag>::type, + std::enable_if_t<(std::is_void<typename SrcTraits::specialize>::value && + (std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutLeft>::value || + std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutRight>::value || + std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutStride>::value)), + Kokkos::Impl::DynRankSubviewTag>, SrcTraits, Args...> { private: enum { @@ -1445,22 +1439,21 @@ class ViewMapping< Args... args) { using DstType = ViewMapping<traits_type, typename traits_type::specialize>; - using DstDimType = typename std::conditional< + using DstDimType = std::conditional_t< (rank == 0), ViewDimension<>, - typename std::conditional< + std::conditional_t< (rank == 1), ViewDimension<0>, - typename std::conditional< + std::conditional_t< (rank == 2), ViewDimension<0, 0>, - typename std::conditional< + std::conditional_t< (rank == 3), ViewDimension<0, 0, 0>, - typename std::conditional< + std::conditional_t< (rank == 4), ViewDimension<0, 0, 0, 0>, - typename std::conditional< + std::conditional_t< (rank == 5), ViewDimension<0, 0, 0, 0, 0>, - typename std::conditional< + std::conditional_t< (rank == 6), ViewDimension<0, 0, 0, 0, 0, 0>, - ViewDimension<0, 0, 0, 0, 0, 0, 0> >::type>:: - type>::type>::type>::type>::type>::type; + ViewDimension<0, 0, 0, 0, 0, 0, 0>>>>>>>>; using dst_offset_type = ViewOffset<DstDimType, Kokkos::LayoutStride>; using dst_handle_type = typename DstType::handle_type; @@ -1621,8 +1614,7 @@ struct DynRankViewFill { }; template <class OutputView> -struct DynRankViewFill<OutputView, - typename std::enable_if<OutputView::Rank == 0>::type> { +struct DynRankViewFill<OutputView, std::enable_if_t<OutputView::Rank == 0>> { DynRankViewFill(const OutputView& dst, const typename OutputView::const_value_type& src) { Kokkos::Impl::DeepCopy<typename OutputView::memory_space, @@ -1645,6 +1637,24 @@ struct DynRankViewRemap { const size_t n6; const size_t n7; + DynRankViewRemap(const ExecSpace& exec_space, const OutputView& arg_out, + const InputView& arg_in) + : output(arg_out), + input(arg_in), + n0(std::min((size_t)arg_out.extent(0), (size_t)arg_in.extent(0))), + n1(std::min((size_t)arg_out.extent(1), (size_t)arg_in.extent(1))), + n2(std::min((size_t)arg_out.extent(2), (size_t)arg_in.extent(2))), + n3(std::min((size_t)arg_out.extent(3), (size_t)arg_in.extent(3))), + n4(std::min((size_t)arg_out.extent(4), (size_t)arg_in.extent(4))), + n5(std::min((size_t)arg_out.extent(5), (size_t)arg_in.extent(5))), + n6(std::min((size_t)arg_out.extent(6), (size_t)arg_in.extent(6))), + n7(std::min((size_t)arg_out.extent(7), (size_t)arg_in.extent(7))) { + using Policy = Kokkos::RangePolicy<ExecSpace>; + + Kokkos::parallel_for("Kokkos::DynRankViewRemap", Policy(exec_space, 0, n0), + *this); + } + DynRankViewRemap(const OutputView& arg_out, const InputView& arg_in) : output(arg_out), input(arg_in), @@ -1691,14 +1701,19 @@ namespace Impl { underlying memory, to facilitate implementation of deep_copy() and other routines that are defined on View */ template <unsigned N, typename T, typename... Args> -auto as_view_of_rank_n(DynRankView<T, Args...> v) { +KOKKOS_FUNCTION auto as_view_of_rank_n(DynRankView<T, Args...> v) { if (v.rank() != N) { - Kokkos::Impl::throw_runtime_exception( - "Converting DynRankView of rank " + std::to_string(v.rank()) + - " to a View of mis-matched rank " + std::to_string(N)); + KOKKOS_IF_ON_HOST( + const std::string message = + "Converting DynRankView of rank " + std::to_string(v.rank()) + + " to a View of mis-matched rank " + std::to_string(N) + "!"; + Kokkos::abort(message.c_str());) + KOKKOS_IF_ON_DEVICE( + Kokkos::abort("Converting DynRankView to a View of mis-matched rank!");) } - return View<typename RankDataType<T, N>::type, Args...>(v.data(), v.layout()); + return View<typename RankDataType<T, N>::type, Args...>( + v.data(), v.impl_map().layout()); } template <typename Function, typename... Args> @@ -1713,22 +1728,54 @@ void apply_to_view_of_static_rank(Function&& f, DynRankView<Args...> a) { case 6: f(as_view_of_rank_n<6>(a)); break; case 7: f(as_view_of_rank_n<7>(a)); break; default: - Kokkos::Impl::throw_runtime_exception( - "Trying to apply a function to a view of unexpected rank " + - std::to_string(rank(a))); + KOKKOS_IF_ON_HOST( + Kokkos::abort( + std::string( + "Trying to apply a function to a view of unexpected rank " + + std::to_string(rank(a))) + .c_str());) + KOKKOS_IF_ON_DEVICE( + Kokkos::abort( + "Trying to apply a function to a view of unexpected rank");) } } } // namespace Impl +template <typename D, class... P> +KOKKOS_INLINE_FUNCTION constexpr auto DynRankView<D, P...>::layout() const -> + typename traits::array_layout { + switch (rank()) { + case 0: return Impl::as_view_of_rank_n<0>(*this).layout(); + case 1: return Impl::as_view_of_rank_n<1>(*this).layout(); + case 2: return Impl::as_view_of_rank_n<2>(*this).layout(); + case 3: return Impl::as_view_of_rank_n<3>(*this).layout(); + case 4: return Impl::as_view_of_rank_n<4>(*this).layout(); + case 5: return Impl::as_view_of_rank_n<5>(*this).layout(); + case 6: return Impl::as_view_of_rank_n<6>(*this).layout(); + case 7: return Impl::as_view_of_rank_n<7>(*this).layout(); + default: + KOKKOS_IF_ON_HOST( + Kokkos::abort( + std::string( + "Calling DynRankView::layout on DRV of unexpected rank " + + std::to_string(rank())) + .c_str());) + KOKKOS_IF_ON_DEVICE( + Kokkos::abort( + "Calling DynRankView::layout on DRV of unexpected rank");) + } + // control flow should never reach here + return m_map.layout(); +} + /** \brief Deep copy a value from Host memory into a view. */ template <class ExecSpace, class DT, class... DP> inline void deep_copy( const ExecSpace& e, const DynRankView<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { static_assert( std::is_same<typename ViewTraits<DT, DP...>::non_const_value_type, typename ViewTraits<DT, DP...>::value_type>::value, @@ -1742,9 +1789,8 @@ template <class DT, class... DP> inline void deep_copy( const DynRankView<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { Impl::apply_to_view_of_static_rank([=](auto view) { deep_copy(view, value); }, dst); } @@ -1755,8 +1801,8 @@ inline void deep_copy( const ExecSpace& e, typename ViewTraits<ST, SP...>::non_const_value_type& dst, const DynRankView<ST, SP...>& src, - typename std::enable_if<std::is_same< - typename ViewTraits<ST, SP...>::specialize, void>::value>::type* = 0) { + std::enable_if_t<std::is_same<typename ViewTraits<ST, SP...>::specialize, + void>::value>* = 0) { deep_copy(e, dst, Impl::as_view_of_rank_n<0>(src)); } @@ -1764,8 +1810,8 @@ template <class ST, class... SP> inline void deep_copy( typename ViewTraits<ST, SP...>::non_const_value_type& dst, const DynRankView<ST, SP...>& src, - typename std::enable_if<std::is_same< - typename ViewTraits<ST, SP...>::specialize, void>::value>::type* = 0) { + std::enable_if_t<std::is_same<typename ViewTraits<ST, SP...>::specialize, + void>::value>* = 0) { deep_copy(dst, Impl::as_view_of_rank_n<0>(src)); } @@ -1778,11 +1824,11 @@ inline void deep_copy( template <class ExecSpace, class DstType, class SrcType> inline void deep_copy( const ExecSpace& exec_space, const DstType& dst, const SrcType& src, - typename std::enable_if< - (std::is_same<typename DstType::traits::specialize, void>::value && - std::is_same<typename SrcType::traits::specialize, void>::value && + std::enable_if_t< + (std::is_void<typename DstType::traits::specialize>::value && + std::is_void<typename SrcType::traits::specialize>::value && (Kokkos::is_dyn_rank_view<DstType>::value || - Kokkos::is_dyn_rank_view<SrcType>::value))>::type* = nullptr) { + Kokkos::is_dyn_rank_view<SrcType>::value))>* = nullptr) { static_assert( std::is_same<typename DstType::traits::value_type, typename DstType::traits::non_const_value_type>::value, @@ -1831,11 +1877,11 @@ inline void deep_copy( template <class DstType, class SrcType> inline void deep_copy( const DstType& dst, const SrcType& src, - typename std::enable_if< - (std::is_same<typename DstType::traits::specialize, void>::value && - std::is_same<typename SrcType::traits::specialize, void>::value && + std::enable_if_t< + (std::is_void<typename DstType::traits::specialize>::value && + std::is_void<typename SrcType::traits::specialize>::value && (Kokkos::is_dyn_rank_view<DstType>::value || - Kokkos::is_dyn_rank_view<SrcType>::value))>::type* = nullptr) { + Kokkos::is_dyn_rank_view<SrcType>::value))>* = nullptr) { static_assert( std::is_same<typename DstType::traits::value_type, typename DstType::traits::non_const_value_type>::value, @@ -1910,8 +1956,8 @@ struct MirrorDRViewType { using dest_view_type = Kokkos::DynRankView<data_type, array_layout, Space>; // If it is the same memory_space return the existsing view_type // This will also keep the unmanaged trait if necessary - using view_type = typename std::conditional<is_same_memspace, src_view_type, - dest_view_type>::type; + using view_type = + std::conditional_t<is_same_memspace, src_view_type, dest_view_type>; }; template <class Space, class T, class... P> @@ -1936,124 +1982,352 @@ struct MirrorDRVType { } // namespace Impl -template <class T, class... P> +namespace Impl { +template <class T, class... P, class... ViewCtorArgs> inline typename DynRankView<T, P...>::HostMirror create_mirror( const DynRankView<T, P...>& src, - typename std::enable_if< - std::is_same<typename ViewTraits<T, P...>::specialize, void>::value && - !std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout, - Kokkos::LayoutStride>::value>::type* = nullptr) { + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = + nullptr) { using src_type = DynRankView<T, P...>; using dst_type = typename src_type::HostMirror; - return dst_type(std::string(src.label()).append("_mirror"), - Impl::reconstructLayout(src.layout(), src.rank())); + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert( + !alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::create_mirror " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::allow_padding, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not explicitly allow padding!"); + + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + std::string(src.label()).append("_mirror"); + + return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank())); } +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror( + const DynRankView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + std::enable_if_t<Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = + nullptr) { + using dst_type = typename Impl::MirrorDRVType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::view_type; + + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert( + !alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::create_mirror " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::allow_padding, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not explicitly allow padding!"); + + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + std::string(src.label()).append("_mirror"); + + return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank())); +} + +} // namespace Impl + +// Create a mirror in host space template <class T, class... P> inline typename DynRankView<T, P...>::HostMirror create_mirror( const DynRankView<T, P...>& src, - typename std::enable_if< - std::is_same<typename ViewTraits<T, P...>::specialize, void>::value && - std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout, - Kokkos::LayoutStride>::value>::type* = 0) { - using src_type = DynRankView<T, P...>; - using dst_type = typename src_type::HostMirror; + std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize, + void>::value>* = nullptr) { + return Impl::create_mirror(src, Kokkos::Impl::ViewCtorProp<>{}); +} - return dst_type(std::string(src.label()).append("_mirror"), - Impl::reconstructLayout(src.layout(), src.rank())); +template <class T, class... P> +inline typename DynRankView<T, P...>::HostMirror create_mirror( + Kokkos::Impl::WithoutInitializing_t wi, const DynRankView<T, P...>& src, + std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize, + void>::value>* = nullptr) { + return Impl::create_mirror(src, Kokkos::view_alloc(wi)); +} + +template <class T, class... P, class... ViewCtorArgs> +inline typename DynRankView<T, P...>::HostMirror create_mirror( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const DynRankView<T, P...>& src, + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + !Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = nullptr) { + return Impl::create_mirror(src, arg_prop); +} + +// Create a mirror in a new space +template <class Space, class T, class... P, + typename Enable = std::enable_if_t< + Kokkos::is_space<Space>::value && + std::is_void<typename ViewTraits<T, P...>::specialize>::value>> +typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror( + const Space&, const Kokkos::DynRankView<T, P...>& src) { + return Impl::create_mirror( + src, Kokkos::view_alloc(typename Space::memory_space{})); } -// Create a mirror in a new space (specialization for different space) template <class Space, class T, class... P> typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror( + Kokkos::Impl::WithoutInitializing_t wi, const Space&, + const Kokkos::DynRankView<T, P...>& src, + std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize, + void>::value>* = nullptr) { + return Impl::create_mirror( + src, Kokkos::view_alloc(wi, typename Space::memory_space{})); +} + +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const DynRankView<T, P...>& src, + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = nullptr) { + using ReturnType = typename Impl::MirrorDRVType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::view_type; + return ReturnType{Impl::create_mirror(src, arg_prop)}; +} + +namespace Impl { +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + std::is_same< + typename DynRankView<T, P...>::memory_space, + typename DynRankView<T, P...>::HostMirror::memory_space>::value && + std::is_same< + typename DynRankView<T, P...>::data_type, + typename DynRankView<T, P...>::HostMirror::data_type>::value, + typename DynRankView<T, P...>::HostMirror> +create_mirror_view(const DynRankView<T, P...>& src, + const typename Impl::ViewCtorProp<ViewCtorArgs...>&) { + return src; +} + +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + !(std::is_same< + typename DynRankView<T, P...>::memory_space, + typename DynRankView<T, P...>::HostMirror::memory_space>::value && + std::is_same< + typename DynRankView<T, P...>::data_type, + typename DynRankView<T, P...>::HostMirror::data_type>::value), + typename DynRankView<T, P...>::HostMirror> +create_mirror_view( + const DynRankView<T, P...>& src, + const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + return Kokkos::Impl::create_mirror(src, arg_prop); +} + +template <class Space, class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + Kokkos::is_space<Space>::value && + Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace, + typename Impl::MirrorDRViewType<Space, T, P...>::view_type> +create_mirror_view(const Space&, const Kokkos::DynRankView<T, P...>& src, + const typename Impl::ViewCtorProp<ViewCtorArgs...>&) { + return src; +} + +template <class Space, class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + Kokkos::is_space<Space>::value && + !Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace, + typename Impl::MirrorDRViewType<Space, T, P...>::view_type> +create_mirror_view( const Space&, const Kokkos::DynRankView<T, P...>& src, - typename std::enable_if<std::is_same< - typename ViewTraits<T, P...>::specialize, void>::value>::type* = - nullptr) { - return typename Impl::MirrorDRVType<Space, T, P...>::view_type( - src.label(), Impl::reconstructLayout(src.layout(), src.rank())); + const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using MemorySpace = typename Space::memory_space; + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., MemorySpace>; + alloc_prop prop_copy(arg_prop); + + return Kokkos::Impl::create_mirror(src, prop_copy); } +} // namespace Impl +// Create a mirror view in host space template <class T, class... P> -inline typename DynRankView<T, P...>::HostMirror create_mirror_view( - const DynRankView<T, P...>& src, - typename std::enable_if< - (std::is_same< - typename DynRankView<T, P...>::memory_space, - typename DynRankView<T, P...>::HostMirror::memory_space>::value && - std::is_same<typename DynRankView<T, P...>::data_type, - typename DynRankView<T, P...>::HostMirror::data_type>:: - value)>::type* = nullptr) { +inline std::enable_if_t< + (std::is_same< + typename DynRankView<T, P...>::memory_space, + typename DynRankView<T, P...>::HostMirror::memory_space>::value && + std::is_same<typename DynRankView<T, P...>::data_type, + typename DynRankView<T, P...>::HostMirror::data_type>::value), + typename DynRankView<T, P...>::HostMirror> +create_mirror_view(const Kokkos::DynRankView<T, P...>& src) { return src; } template <class T, class... P> -inline typename DynRankView<T, P...>::HostMirror create_mirror_view( - const DynRankView<T, P...>& src, - typename std::enable_if< - !(std::is_same< - typename DynRankView<T, P...>::memory_space, - typename DynRankView<T, P...>::HostMirror::memory_space>::value && - std::is_same<typename DynRankView<T, P...>::data_type, - typename DynRankView<T, P...>::HostMirror::data_type>:: - value)>::type* = nullptr) { +inline std::enable_if_t< + !(std::is_same< + typename DynRankView<T, P...>::memory_space, + typename DynRankView<T, P...>::HostMirror::memory_space>::value && + std::is_same< + typename DynRankView<T, P...>::data_type, + typename DynRankView<T, P...>::HostMirror::data_type>::value), + typename DynRankView<T, P...>::HostMirror> +create_mirror_view(const Kokkos::DynRankView<T, P...>& src) { return Kokkos::create_mirror(src); } -// Create a mirror view in a new space (specialization for same space) -template <class Space, class T, class... P> -typename Impl::MirrorDRViewType<Space, T, P...>::view_type create_mirror_view( +template <class T, class... P> +inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, + const DynRankView<T, P...>& src) { + return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); +} + +// Create a mirror view in a new space +// FIXME_C++17 Improve SFINAE here. +template <class Space, class T, class... P, + class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> +inline typename Impl::MirrorDRViewType<Space, T, P...>::view_type +create_mirror_view( const Space&, const Kokkos::DynRankView<T, P...>& src, - typename std::enable_if< - Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = - nullptr) { + std::enable_if_t< + Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>* = nullptr) { return src; } -// Create a mirror view in a new space (specialization for different space) -template <class Space, class T, class... P> -typename Impl::MirrorDRViewType<Space, T, P...>::view_type create_mirror_view( - const Space&, const Kokkos::DynRankView<T, P...>& src, - typename std::enable_if< - !Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = - nullptr) { - return typename Impl::MirrorDRViewType<Space, T, P...>::view_type( - src.label(), Impl::reconstructLayout(src.layout(), src.rank())); +// FIXME_C++17 Improve SFINAE here. +template <class Space, class T, class... P, + class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> +inline typename Impl::MirrorDRViewType<Space, T, P...>::view_type +create_mirror_view( + const Space& space, const Kokkos::DynRankView<T, P...>& src, + std::enable_if_t< + !Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>* = nullptr) { + return Kokkos::create_mirror(space, src); } -// Create a mirror view and deep_copy in a new space (specialization for same -// space) template <class Space, class T, class... P> -typename Impl::MirrorDRViewType<Space, T, P...>::view_type -create_mirror_view_and_copy( - const Space&, const Kokkos::DynRankView<T, P...>& src, - std::string const& name = "", - typename std::enable_if< - Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = - nullptr) { - (void)name; +inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, + const Space& space, + const Kokkos::DynRankView<T, P...>& src) { + return Impl::create_mirror_view(space, src, Kokkos::view_alloc(wi)); +} + +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror_view( + const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::DynRankView<T, P...>& src) { + return Impl::create_mirror_view(src, arg_prop); +} + +template <class... ViewCtorArgs, class T, class... P> +auto create_mirror_view_and_copy( + const Impl::ViewCtorProp<ViewCtorArgs...>&, + const Kokkos::DynRankView<T, P...>& src, + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + Impl::MirrorDRViewType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::is_same_memspace>* = nullptr) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + static_assert( + alloc_prop_input::has_memory_space, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must include a memory space!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not include a pointer!"); + static_assert(!alloc_prop_input::allow_padding, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not explicitly allow padding!"); + + // same behavior as deep_copy(src, src) + if (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src view"); return src; } -// Create a mirror view and deep_copy in a new space (specialization for -// different space) -template <class Space, class T, class... P> -typename Impl::MirrorDRViewType<Space, T, P...>::view_type -create_mirror_view_and_copy( - const Space&, const Kokkos::DynRankView<T, P...>& src, - std::string const& name = "", - typename std::enable_if< - !Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = - nullptr) { +template <class... ViewCtorArgs, class T, class... P> +auto create_mirror_view_and_copy( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::DynRankView<T, P...>& src, + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + !Impl::MirrorDRViewType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::is_same_memspace>* = nullptr) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + static_assert( + alloc_prop_input::has_memory_space, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must include a memory space!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not include a pointer!"); + static_assert(!alloc_prop_input::allow_padding, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not explicitly allow padding!"); + using Space = typename alloc_prop_input::memory_space; using Mirror = typename Impl::MirrorDRViewType<Space, T, P...>::view_type; - std::string label = name.empty() ? src.label() : name; - auto mirror = Mirror(view_alloc(WithoutInitializing, label), - Impl::reconstructLayout(src.layout(), src.rank())); - deep_copy(mirror, src); + + // Add some properties if not provided to avoid need for if constexpr + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., + std::conditional_t<alloc_prop_input::has_label, + std::integral_constant<unsigned int, 12>, std::string>, + std::conditional_t<!alloc_prop_input::initialize, + std::integral_constant<unsigned int, 13>, + Impl::WithoutInitializing_t>, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 14>, + typename Space::execution_space>>; + alloc_prop arg_prop_copy(arg_prop); + + std::string& label = + static_cast<Impl::ViewCtorProp<void, std::string>&>(arg_prop_copy).value; + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type{ + arg_prop_copy, Impl::reconstructLayout(src.layout(), src.rank())}; + if (alloc_prop_input::has_execution_space) { + using ExecutionSpace = typename alloc_prop::execution_space; + deep_copy( + static_cast<Impl::ViewCtorProp<void, ExecutionSpace>&>(arg_prop_copy) + .value, + mirror, src); + } else + deep_copy(mirror, src); return mirror; } +template <class Space, class T, class... P> +auto create_mirror_view_and_copy(const Space&, + const Kokkos::DynRankView<T, P...>& src, + std::string const& name = "") { + return create_mirror_view_and_copy( + Kokkos::view_alloc(typename Space::memory_space{}, name), src); +} + } // namespace Kokkos //---------------------------------------------------------------------------- @@ -2062,20 +2336,47 @@ create_mirror_view_and_copy( namespace Kokkos { /** \brief Resize a view with copying old data to new data at the corresponding * indices. */ -template <class... I, class T, class... P> -inline void impl_resize(DynRankView<T, P...>& v, const size_t n0, +template <class... ViewCtorArgs, class T, class... P> +inline void impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + DynRankView<T, P...>& v, const size_t n0, const size_t n1, const size_t n2, const size_t n3, const size_t n4, const size_t n5, const size_t n6, - const size_t n7, const I&... arg_prop) { - using drview_type = DynRankView<T, P...>; + const size_t n7) { + using drview_type = DynRankView<T, P...>; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; static_assert(Kokkos::ViewTraits<T, P...>::is_managed, "Can only resize managed views"); - - drview_type v_resized(view_alloc(v.label(), arg_prop...), n0, n1, n2, n3, n4, - n5, n6, n7); - - Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v); + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::resize " + "must not include a label!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::resize must " + "not include a pointer!"); + static_assert(!alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::resize must " + "not include a memory space instance!"); + + // Add execution space here to avoid the need for if constexpr below + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., std::string, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 10>, + typename drview_type::execution_space>>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + v.label(); + + drview_type v_resized(prop_copy, n0, n1, n2, n3, n4, n5, n6, n7); + + if (alloc_prop_input::has_execution_space) + Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>( + static_cast<const Impl::ViewCtorProp< + void, typename alloc_prop::execution_space>&>(prop_copy) + .value, + v_resized, v); + else + Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v); v = v_resized; } @@ -2090,7 +2391,21 @@ inline void resize(DynRankView<T, P...>& v, const size_t n5 = KOKKOS_INVALID_INDEX, const size_t n6 = KOKKOS_INVALID_INDEX, const size_t n7 = KOKKOS_INVALID_INDEX) { - impl_resize(v, n0, n1, n2, n3, n4, n5, n6, n7); + impl_resize(Impl::ViewCtorProp<>{}, v, n0, n1, n2, n3, n4, n5, n6, n7); +} + +template <class... ViewCtorArgs, class T, class... P> +void resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + DynRankView<T, P...>& v, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + impl_resize(arg_prop, v, n0, n1, n2, n3, n4, n5, n6, n7); } template <class I, class T, class... P> @@ -2104,26 +2419,53 @@ inline std::enable_if_t<Impl::is_view_ctor_property<I>::value> resize( const size_t n5 = KOKKOS_INVALID_INDEX, const size_t n6 = KOKKOS_INVALID_INDEX, const size_t n7 = KOKKOS_INVALID_INDEX) { - impl_resize(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); + impl_resize(Kokkos::view_alloc(arg_prop), v, n0, n1, n2, n3, n4, n5, n6, n7); } /** \brief Resize a view with copying old data to new data at the corresponding * indices. */ -template <class... I, class T, class... P> +template <class... ViewCtorArgs, class T, class... P> inline void impl_realloc(DynRankView<T, P...>& v, const size_t n0, const size_t n1, const size_t n2, const size_t n3, const size_t n4, const size_t n5, const size_t n6, - const size_t n7, const I&... arg_prop) { - using drview_type = DynRankView<T, P...>; + const size_t n7, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using drview_type = DynRankView<T, P...>; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; static_assert(Kokkos::ViewTraits<T, P...>::is_managed, "Can only realloc managed views"); - - const std::string label = v.label(); + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a label!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a pointer!"); + static_assert(!alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a memory space instance!"); + + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop arg_prop_copy(arg_prop); + static_cast<Kokkos::Impl::ViewCtorProp<void, std::string>&>(arg_prop_copy) + .value = v.label(); v = drview_type(); // Deallocate first, if the only view to allocation - v = drview_type(view_alloc(label, arg_prop...), n0, n1, n2, n3, n4, n5, n6, - n7); + v = drview_type(arg_prop_copy, n0, n1, n2, n3, n4, n5, n6, n7); +} + +template <class T, class... P, class... ViewCtorArgs> +inline void realloc(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + DynRankView<T, P...>& v, + const size_t n0 = KOKKOS_INVALID_INDEX, + const size_t n1 = KOKKOS_INVALID_INDEX, + const size_t n2 = KOKKOS_INVALID_INDEX, + const size_t n3 = KOKKOS_INVALID_INDEX, + const size_t n4 = KOKKOS_INVALID_INDEX, + const size_t n5 = KOKKOS_INVALID_INDEX, + const size_t n6 = KOKKOS_INVALID_INDEX, + const size_t n7 = KOKKOS_INVALID_INDEX) { + impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); } template <class T, class... P> @@ -2136,7 +2478,7 @@ inline void realloc(DynRankView<T, P...>& v, const size_t n5 = KOKKOS_INVALID_INDEX, const size_t n6 = KOKKOS_INVALID_INDEX, const size_t n7 = KOKKOS_INVALID_INDEX) { - impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7); + impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, Impl::ViewCtorProp<>{}); } template <class I, class T, class... P> @@ -2150,9 +2492,13 @@ inline std::enable_if_t<Impl::is_view_ctor_property<I>::value> realloc( const size_t n5 = KOKKOS_INVALID_INDEX, const size_t n6 = KOKKOS_INVALID_INDEX, const size_t n7 = KOKKOS_INVALID_INDEX) { - impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); + impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, Kokkos::view_alloc(arg_prop)); } } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DYNRANKVIEW +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DYNRANKVIEW +#endif #endif diff --git a/packages/kokkos/containers/src/Kokkos_DynamicView.hpp b/packages/kokkos/containers/src/Kokkos_DynamicView.hpp index 91904d7cc986589bcea7ecf8680fe9eca0be896c..015a75cb0b02c602db2a3bded219497c3414595c 100644 --- a/packages/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_DYNAMIC_VIEW_HPP #define KOKKOS_DYNAMIC_VIEW_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DYNAMICVIEW +#endif #include <cstdio> @@ -118,8 +122,7 @@ struct ChunkedArrayManager { template <typename Space> static ChunkedArrayManager<Space, ValueType> create_mirror( ChunkedArrayManager<MemorySpace, ValueType> const& other, - typename std::enable_if<IsAccessibleFrom<Space>::value>::type* = - nullptr) { + std::enable_if_t<IsAccessibleFrom<Space>::value>* = nullptr) { return ChunkedArrayManager<Space, ValueType>{ ACCESSIBLE_TAG{}, other.m_chunks, other.m_chunk_max}; } @@ -127,8 +130,7 @@ struct ChunkedArrayManager { template <typename Space> static ChunkedArrayManager<Space, ValueType> create_mirror( ChunkedArrayManager<MemorySpace, ValueType> const& other, - typename std::enable_if<!IsAccessibleFrom<Space>::value>::type* = - nullptr) { + std::enable_if_t<!IsAccessibleFrom<Space>::value>* = nullptr) { using tag_type = typename ChunkedArrayManager<Space, ValueType>::INACCESSIBLE_TAG; return ChunkedArrayManager<Space, ValueType>{tag_type{}, other.m_chunk_max, @@ -217,17 +219,15 @@ struct ChunkedArrayManager { pointer_type* get_ptr() const { return m_chunks; } - template <typename Space> - typename std::enable_if<!IsAccessibleFrom<Space>::value>::type deep_copy_to( - ChunkedArrayManager<Space, ValueType> const& other) { - Kokkos::Impl::DeepCopy<Space, MemorySpace>( - other.m_chunks, m_chunks, sizeof(pointer_type) * (m_chunk_max + 2)); - } - - template <typename Space> - typename std::enable_if<IsAccessibleFrom<Space>::value>::type deep_copy_to( - ChunkedArrayManager<Space, ValueType> const&) { - // no-op + template <typename OtherMemorySpace, typename ExecutionSpace> + void deep_copy_to( + const ExecutionSpace& exec_space, + ChunkedArrayManager<OtherMemorySpace, ValueType> const& other) const { + if (other.m_chunks != m_chunks) { + Kokkos::Impl::DeepCopy<OtherMemorySpace, MemorySpace, ExecutionSpace>( + exec_space, other.m_chunks, m_chunks, + sizeof(pointer_type) * (m_chunk_max + 2)); + } } KOKKOS_INLINE_FUNCTION @@ -278,7 +278,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> { // It is assumed that the value_type is trivially copyable; // when this is not the case, potential problems can occur. - static_assert(std::is_same<typename traits::specialize, void>::value, + static_assert(std::is_void<typename traits::specialize>::value, "DynamicView only implemented for non-specialized View type"); private: @@ -339,6 +339,9 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> { KOKKOS_INLINE_FUNCTION size_t chunk_size() const noexcept { return m_chunk_size; } + KOKKOS_INLINE_FUNCTION + size_t chunk_max() const noexcept { return m_chunk_max; } + KOKKOS_INLINE_FUNCTION size_t size() const noexcept { size_t extent_0 = @@ -411,33 +414,14 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> { "space"); // Which chunk is being indexed. - const uintptr_t ic = uintptr_t(i0 >> m_chunk_shift); - - typename traits::value_type* volatile* const ch = m_chunks + ic; + const uintptr_t ic = uintptr_t(i0) >> m_chunk_shift; - // Do bounds checking if enabled or if the chunk pointer is zero. - // If not bounds checking then we assume a non-zero pointer is valid. - -#if !defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) - if (nullptr == *ch) +#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) + const uintptr_t n = *reinterpret_cast<uintptr_t*>(m_chunks + m_chunk_max); + if (n <= ic) Kokkos::abort("Kokkos::DynamicView array bounds error"); #endif - { - // Verify that allocation of the requested chunk in in progress. - - // The allocated chunk counter is m_chunks[ m_chunk_max ] - const uintptr_t n = - *reinterpret_cast<uintptr_t volatile*>(m_chunks + m_chunk_max); - - if (n <= ic) { - Kokkos::abort("Kokkos::DynamicView array bounds error"); - } - - // Allocation of this chunk is in progress - // so wait for allocation to complete. - while (nullptr == *ch) - ; - } + typename traits::value_type** const ch = m_chunks + ic; return (*ch)[i0 & m_chunk_mask]; } @@ -481,7 +465,10 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> { // *m_chunks_host[m_chunk_max+1] stores the 'extent' requested by resize *(pc + 1) = n; - m_chunks_host.deep_copy_to(m_chunks); + typename device_space::execution_space exec{}; + m_chunks_host.deep_copy_to(exec, m_chunks); + exec.fence( + "DynamicView::resize_serial: Fence after copying chunks to the device"); } KOKKOS_INLINE_FUNCTION bool is_allocated() const { @@ -496,6 +483,12 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> { } } + KOKKOS_FUNCTION const device_accessor& impl_get_chunks() const { + return m_chunks; + } + + KOKKOS_FUNCTION device_accessor& impl_get_chunks() { return m_chunks; } + //---------------------------------------------------------------------- ~DynamicView() = default; @@ -525,9 +518,10 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> { * A maximum size is required in order to allocate a * chunk-pointer array. */ - explicit inline DynamicView(const std::string& arg_label, - const unsigned min_chunk_size, - const unsigned max_extent) + template <class... Prop> + DynamicView(const Kokkos::Impl::ViewCtorProp<Prop...>& arg_prop, + const unsigned min_chunk_size, + const unsigned max_extent) : // The chunk size is guaranteed to be a power of two m_chunk_shift(Kokkos::Impl::integral_power_of_two_that_contains( min_chunk_size)) // div ceil(log2(min_chunk_size)) @@ -540,33 +534,336 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> { m_chunk_size(2 << (m_chunk_shift - 1)) { m_chunks = device_accessor(m_chunk_max, m_chunk_size); + const std::string& label = + static_cast<Kokkos::Impl::ViewCtorProp<void, std::string> const&>( + arg_prop) + .value; + if (device_accessor::template IsAccessibleFrom<host_space>::value) { - m_chunks.template allocate_with_destroy<device_space>(arg_label); + m_chunks.template allocate_with_destroy<device_space>(label); m_chunks.initialize(); m_chunks_host = device_accessor::template create_mirror<host_space>(m_chunks); } else { - m_chunks.allocate_device(arg_label); + m_chunks.allocate_device(label); m_chunks_host = device_accessor::template create_mirror<host_space>(m_chunks); m_chunks_host.template allocate_with_destroy<device_space>( - arg_label, m_chunks.get_ptr()); + label, m_chunks.get_ptr()); m_chunks_host.initialize(); - m_chunks_host.deep_copy_to(m_chunks); + + // Add some properties if not provided to avoid need for if constexpr + using alloc_prop_input = Kokkos::Impl::ViewCtorProp<Prop...>; + using alloc_prop = Kokkos::Impl::ViewCtorProp< + Prop..., std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 15>, + typename device_space::execution_space>>; + alloc_prop arg_prop_copy(arg_prop); + + const auto& exec = static_cast<const Kokkos::Impl::ViewCtorProp< + void, typename alloc_prop::execution_space>&>(arg_prop_copy) + .value; + m_chunks_host.deep_copy_to(exec, m_chunks); + if (!alloc_prop_input::has_execution_space) + exec.fence( + "DynamicView::DynamicView(): Fence after copying chunks to the " + "device"); } } + + DynamicView(const std::string& arg_label, const unsigned min_chunk_size, + const unsigned max_extent) + : DynamicView(Kokkos::view_alloc(arg_label), min_chunk_size, max_extent) { + } }; } // namespace Experimental + +template <class> +struct is_dynamic_view : public std::false_type {}; + +template <class D, class... P> +struct is_dynamic_view<Kokkos::Experimental::DynamicView<D, P...>> + : public std::true_type {}; + } // namespace Kokkos namespace Kokkos { +namespace Impl { + +// Deduce Mirror Types +template <class Space, class T, class... P> +struct MirrorDynamicViewType { + // The incoming view_type + using src_view_type = typename Kokkos::Experimental::DynamicView<T, P...>; + // The memory space for the mirror view + using memory_space = typename Space::memory_space; + // Check whether it is the same memory space + enum { + is_same_memspace = + std::is_same<memory_space, typename src_view_type::memory_space>::value + }; + // The array_layout + using array_layout = typename src_view_type::array_layout; + // The data type (we probably want it non-const since otherwise we can't even + // deep_copy to it.) + using data_type = typename src_view_type::non_const_data_type; + // The destination view type if it is not the same memory space + using dest_view_type = + Kokkos::Experimental::DynamicView<data_type, array_layout, Space>; + // If it is the same memory_space return the existing view_type + // This will also keep the unmanaged trait if necessary + using view_type = + std::conditional_t<is_same_memspace, src_view_type, dest_view_type>; +}; +} // namespace Impl + +namespace Impl { +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror( + const Kokkos::Experimental::DynamicView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = + nullptr) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert( + !alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::create_mirror " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::allow_padding, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not explicitly allow padding!"); + + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + std::string(src.label()).append("_mirror"); + + auto ret = typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror( + prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); + + ret.resize_serial(src.extent(0)); + + return ret; +} + +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror( + const Kokkos::Experimental::DynamicView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + std::enable_if_t<Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = + nullptr) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert( + !alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::create_mirror " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::allow_padding, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not explicitly allow padding!"); + + using MemorySpace = typename alloc_prop_input::memory_space; + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + std::string(src.label()).append("_mirror"); + + auto ret = typename Kokkos::Impl::MirrorDynamicViewType< + MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(), + src.chunk_max() * src.chunk_size()); + + ret.resize_serial(src.extent(0)); + + return ret; +} +} // namespace Impl + +// Create a mirror in host space +template <class T, class... P> +inline auto create_mirror( + const Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); +} + template <class T, class... P> -inline typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror -create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src) { +inline auto create_mirror( + Kokkos::Impl::WithoutInitializing_t wi, + const Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror(src, Kokkos::view_alloc(wi)); +} + +// Create a mirror in a new space +template <class Space, class T, class... P> +inline auto create_mirror( + const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror( + src, Impl::ViewCtorProp<>{typename Space::memory_space{}}); +} + +template <class Space, class T, class... P> +typename Kokkos::Impl::MirrorDynamicViewType<Space, T, P...>::view_type +create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, + const Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror( + src, Kokkos::view_alloc(wi, typename Space::memory_space{})); +} + +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror(src, arg_prop); +} + +namespace Impl { +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + (std::is_same< + typename Kokkos::Experimental::DynamicView<T, P...>::memory_space, + typename Kokkos::Experimental::DynamicView< + T, P...>::HostMirror::memory_space>::value && + std::is_same< + typename Kokkos::Experimental::DynamicView<T, P...>::data_type, + typename Kokkos::Experimental::DynamicView< + T, P...>::HostMirror::data_type>::value), + typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror> +create_mirror_view( + const typename Kokkos::Experimental::DynamicView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>&) { + return src; +} + +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + !(std::is_same< + typename Kokkos::Experimental::DynamicView<T, P...>::memory_space, + typename Kokkos::Experimental::DynamicView< + T, P...>::HostMirror::memory_space>::value && + std::is_same< + typename Kokkos::Experimental::DynamicView<T, P...>::data_type, + typename Kokkos::Experimental::DynamicView< + T, P...>::HostMirror::data_type>::value), + typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror> +create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + return Kokkos::create_mirror(arg_prop, src); +} + +template <class Space, class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + Impl::MirrorDynamicViewType<Space, T, P...>::is_same_memspace, + typename Kokkos::Impl::MirrorDynamicViewType<Space, T, P...>::view_type> +create_mirror_view(const Space&, + const Kokkos::Experimental::DynamicView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>&) { return src; } +} // namespace Impl + +// Create a mirror view in host space +template <class T, class... P> +inline auto create_mirror_view( + const typename Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{}); +} + +template <class T, class... P> +inline auto create_mirror_view( + Kokkos::Impl::WithoutInitializing_t wi, + const typename Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); +} + +// Create a mirror in a new space +template <class Space, class T, class... P> +inline auto create_mirror_view( + const Space& space, const Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror_view(space, src, Impl::ViewCtorProp<>{}); +} + +template <class Space, class T, class... P> +inline auto create_mirror_view( + Kokkos::Impl::WithoutInitializing_t wi, const Space&, + const Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror_view( + src, Kokkos::view_alloc(wi, typename Space::memory_space{})); +} + +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror_view( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::Experimental::DynamicView<T, P...>& src) { + return Impl::create_mirror_view(src, arg_prop); +} + +template <class T, class... DP, class... SP> +inline void deep_copy(const Kokkos::Experimental::DynamicView<T, DP...>& dst, + const Kokkos::Experimental::DynamicView<T, SP...>& src) { + using dst_type = Kokkos::Experimental::DynamicView<T, DP...>; + using src_type = Kokkos::Experimental::DynamicView<T, SP...>; + + using dst_execution_space = typename ViewTraits<T, DP...>::execution_space; + using src_execution_space = typename ViewTraits<T, SP...>::execution_space; + using dst_memory_space = typename ViewTraits<T, DP...>::memory_space; + using src_memory_space = typename ViewTraits<T, SP...>::memory_space; + + constexpr bool DstExecCanAccessSrc = + Kokkos::SpaceAccessibility<dst_execution_space, + src_memory_space>::accessible; + constexpr bool SrcExecCanAccessDst = + Kokkos::SpaceAccessibility<src_execution_space, + dst_memory_space>::accessible; + + if (DstExecCanAccessSrc) + Kokkos::Impl::ViewRemap<dst_type, src_type, dst_execution_space>(dst, src); + else if (SrcExecCanAccessDst) + Kokkos::Impl::ViewRemap<dst_type, src_type, src_execution_space>(dst, src); + else + src.impl_get_chunks().deep_copy_to(dst_execution_space{}, + dst.impl_get_chunks()); + Kokkos::fence("Kokkos::deep_copy(DynamicView)"); +} + +template <class ExecutionSpace, class T, class... DP, class... SP> +inline void deep_copy(const ExecutionSpace& exec, + const Kokkos::Experimental::DynamicView<T, DP...>& dst, + const Kokkos::Experimental::DynamicView<T, SP...>& src) { + using dst_type = Kokkos::Experimental::DynamicView<T, DP...>; + using src_type = Kokkos::Experimental::DynamicView<T, SP...>; + + using dst_execution_space = typename ViewTraits<T, DP...>::execution_space; + using src_execution_space = typename ViewTraits<T, SP...>::execution_space; + using dst_memory_space = typename ViewTraits<T, DP...>::memory_space; + using src_memory_space = typename ViewTraits<T, SP...>::memory_space; + + constexpr bool DstExecCanAccessSrc = + Kokkos::SpaceAccessibility<dst_execution_space, + src_memory_space>::accessible; + constexpr bool SrcExecCanAccessDst = + Kokkos::SpaceAccessibility<src_execution_space, + dst_memory_space>::accessible; + + // FIXME use execution space + if (DstExecCanAccessSrc) + Kokkos::Impl::ViewRemap<dst_type, src_type, dst_execution_space>(dst, src); + else if (SrcExecCanAccessDst) + Kokkos::Impl::ViewRemap<dst_type, src_type, src_execution_space>(dst, src); + else + src.impl_get_chunks().deep_copy_to(exec, dst.impl_get_chunks()); +} template <class T, class... DP, class... SP> inline void deep_copy(const View<T, DP...>& dst, @@ -587,6 +884,7 @@ inline void deep_copy(const View<T, DP...>& dst, // Copying data between views in accessible memory spaces and either // non-contiguous or incompatible shape. Kokkos::Impl::ViewRemap<dst_type, src_type>(dst, src); + Kokkos::fence("Kokkos::deep_copy(DynamicView)"); } else { Kokkos::Impl::throw_runtime_exception( "deep_copy given views that would require a temporary allocation"); @@ -612,6 +910,7 @@ inline void deep_copy(const Kokkos::Experimental::DynamicView<T, DP...>& dst, // Copying data between views in accessible memory spaces and either // non-contiguous or incompatible shape. Kokkos::Impl::ViewRemap<dst_type, src_type>(dst, src); + Kokkos::fence("Kokkos::deep_copy(DynamicView)"); } else { Kokkos::Impl::throw_runtime_exception( "deep_copy given views that would require a temporary allocation"); @@ -698,6 +997,105 @@ struct ViewCopy<Kokkos::Experimental::DynamicView<DP...>, }; } // namespace Impl + +template <class... ViewCtorArgs, class T, class... P> +auto create_mirror_view_and_copy( + const Impl::ViewCtorProp<ViewCtorArgs...>&, + const Kokkos::Experimental::DynamicView<T, P...>& src, + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + Impl::MirrorDynamicViewType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::is_same_memspace>* = nullptr) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + static_assert( + alloc_prop_input::has_memory_space, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must include a memory space!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not include a pointer!"); + static_assert(!alloc_prop_input::allow_padding, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not explicitly allow padding!"); + + // same behavior as deep_copy(src, src) + if (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src view"); + return src; +} + +template <class... ViewCtorArgs, class T, class... P> +auto create_mirror_view_and_copy( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::Experimental::DynamicView<T, P...>& src, + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + !Impl::MirrorDynamicViewType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::is_same_memspace>* = nullptr) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + static_assert( + alloc_prop_input::has_memory_space, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must include a memory space!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not include a pointer!"); + static_assert(!alloc_prop_input::allow_padding, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not explicitly allow padding!"); + using Space = typename alloc_prop_input::memory_space; + using Mirror = + typename Impl::MirrorDynamicViewType<Space, T, P...>::view_type; + + // Add some properties if not provided to avoid need for if constexpr + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., + std::conditional_t<alloc_prop_input::has_label, + std::integral_constant<unsigned int, 12>, std::string>, + std::conditional_t<!alloc_prop_input::initialize, + std::integral_constant<unsigned int, 13>, + Impl::WithoutInitializing_t>, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 14>, + typename Space::execution_space>>; + alloc_prop arg_prop_copy(arg_prop); + + std::string& label = + static_cast<Impl::ViewCtorProp<void, std::string>&>(arg_prop_copy).value; + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type( + arg_prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); + mirror.resize_serial(src.extent(0)); + if (alloc_prop_input::has_execution_space) { + using ExecutionSpace = typename alloc_prop::execution_space; + deep_copy( + static_cast<Impl::ViewCtorProp<void, ExecutionSpace>&>(arg_prop_copy) + .value, + mirror, src); + } else + deep_copy(mirror, src); + return mirror; +} + +template <class Space, class T, class... P> +auto create_mirror_view_and_copy( + const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src, + std::string const& name = "") { + return create_mirror_view_and_copy( + Kokkos::view_alloc(typename Space::memory_space{}, name), src); +} + } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DYNAMICVIEW +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DYNAMICVIEW +#endif #endif /* #ifndef KOKKOS_DYNAMIC_VIEW_HPP */ diff --git a/packages/kokkos/containers/src/Kokkos_ErrorReporter.hpp b/packages/kokkos/containers/src/Kokkos_ErrorReporter.hpp index 629b437c22d4e2c5517a07ee42042e383b570833..8affa0bac4f87a95df1e5200540ac191b1f817a3 100644 --- a/packages/kokkos/containers/src/Kokkos_ErrorReporter.hpp +++ b/packages/kokkos/containers/src/Kokkos_ErrorReporter.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP #define KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ERRORREPORTER +#endif #include <vector> #include <Kokkos_Core.hpp> @@ -192,4 +196,8 @@ void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size) { } // namespace Experimental } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ERRORREPORTER +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ERRORREPORTER +#endif #endif diff --git a/packages/kokkos/containers/src/Kokkos_Functional.hpp b/packages/kokkos/containers/src/Kokkos_Functional.hpp index 2e1fa336f7bc062cbfce346b5a1bb39e4354a15a..478a087d0c32185f87da8b9f8cc664bdcb7fa170 100644 --- a/packages/kokkos/containers/src/Kokkos_Functional.hpp +++ b/packages/kokkos/containers/src/Kokkos_Functional.hpp @@ -42,6 +42,10 @@ #ifndef KOKKOS_FUNCTIONAL_HPP #define KOKKOS_FUNCTIONAL_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_FUNCTIONAL +#endif #include <Kokkos_Macros.hpp> #include <impl/Kokkos_Functional_impl.hpp> @@ -52,10 +56,12 @@ namespace Kokkos { template <typename T> struct pod_hash { - using argument_type = T; - using first_argument_type = T; - using second_argument_type = uint32_t; - using result_type = uint32_t; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using argument_type KOKKOS_DEPRECATED = T; + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = uint32_t; + using result_type KOKKOS_DEPRECATED = uint32_t; +#endif KOKKOS_FORCEINLINE_FUNCTION uint32_t operator()(T const& t) const { @@ -70,9 +76,11 @@ struct pod_hash { template <typename T> struct pod_equal_to { - using first_argument_type = T; - using second_argument_type = T; - using result_type = bool; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = T; + using result_type KOKKOS_DEPRECATED = bool; +#endif KOKKOS_FORCEINLINE_FUNCTION bool operator()(T const& a, T const& b) const { @@ -82,9 +90,11 @@ struct pod_equal_to { template <typename T> struct pod_not_equal_to { - using first_argument_type = T; - using second_argument_type = T; - using result_type = bool; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = T; + using result_type KOKKOS_DEPRECATED = bool; +#endif KOKKOS_FORCEINLINE_FUNCTION bool operator()(T const& a, T const& b) const { @@ -94,9 +104,11 @@ struct pod_not_equal_to { template <typename T> struct equal_to { - using first_argument_type = T; - using second_argument_type = T; - using result_type = bool; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = T; + using result_type KOKKOS_DEPRECATED = bool; +#endif KOKKOS_FORCEINLINE_FUNCTION bool operator()(T const& a, T const& b) const { return a == b; } @@ -104,9 +116,11 @@ struct equal_to { template <typename T> struct not_equal_to { - using first_argument_type = T; - using second_argument_type = T; - using result_type = bool; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = T; + using result_type KOKKOS_DEPRECATED = bool; +#endif KOKKOS_FORCEINLINE_FUNCTION bool operator()(T const& a, T const& b) const { return a != b; } @@ -114,9 +128,11 @@ struct not_equal_to { template <typename T> struct greater { - using first_argument_type = T; - using second_argument_type = T; - using result_type = bool; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = T; + using result_type KOKKOS_DEPRECATED = bool; +#endif KOKKOS_FORCEINLINE_FUNCTION bool operator()(T const& a, T const& b) const { return a > b; } @@ -124,9 +140,11 @@ struct greater { template <typename T> struct less { - using first_argument_type = T; - using second_argument_type = T; - using result_type = bool; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = T; + using result_type KOKKOS_DEPRECATED = bool; +#endif KOKKOS_FORCEINLINE_FUNCTION bool operator()(T const& a, T const& b) const { return a < b; } @@ -134,9 +152,11 @@ struct less { template <typename T> struct greater_equal { - using first_argument_type = T; - using second_argument_type = T; - using result_type = bool; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = T; + using result_type KOKKOS_DEPRECATED = bool; +#endif KOKKOS_FORCEINLINE_FUNCTION bool operator()(T const& a, T const& b) const { return a >= b; } @@ -144,9 +164,11 @@ struct greater_equal { template <typename T> struct less_equal { - using first_argument_type = T; - using second_argument_type = T; - using result_type = bool; +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + using first_argument_type KOKKOS_DEPRECATED = T; + using second_argument_type KOKKOS_DEPRECATED = T; + using result_type KOKKOS_DEPRECATED = bool; +#endif KOKKOS_FORCEINLINE_FUNCTION bool operator()(T const& a, T const& b) const { return a <= b; } @@ -154,4 +176,8 @@ struct less_equal { } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_FUNCTIONAL +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_FUNCTIONAL +#endif #endif // KOKKOS_FUNCTIONAL_HPP diff --git a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp index 9d97dc08f874b775d4c55b627bd4a7acbbade824..0b54d1bdd952f33e433f17b05c56ef415ee286b4 100644 --- a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp +++ b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp @@ -7,6 +7,10 @@ #ifndef KOKKOS_OFFSETVIEW_HPP_ #define KOKKOS_OFFSETVIEW_HPP_ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_OFFSETVIEW +#endif #include <Kokkos_Core.hpp> @@ -25,26 +29,25 @@ template <class> struct is_offset_view : public std::false_type {}; template <class D, class... P> -struct is_offset_view<OffsetView<D, P...> > : public std::true_type {}; +struct is_offset_view<OffsetView<D, P...>> : public std::true_type {}; template <class D, class... P> -struct is_offset_view<const OffsetView<D, P...> > : public std::true_type {}; +struct is_offset_view<const OffsetView<D, P...>> : public std::true_type {}; #define KOKKOS_INVALID_OFFSET int64_t(0x7FFFFFFFFFFFFFFFLL) #define KOKKOS_INVALID_INDEX_RANGE \ { KOKKOS_INVALID_OFFSET, KOKKOS_INVALID_OFFSET } -template <typename iType, - typename std::enable_if<std::is_integral<iType>::value && - std::is_signed<iType>::value, - iType>::type = 0> +template <typename iType, std::enable_if_t<std::is_integral<iType>::value && + std::is_signed<iType>::value, + iType> = 0> using IndexRange = Kokkos::Array<iType, 2>; using index_list_type = std::initializer_list<int64_t>; // template <typename iType, -// typename std::enable_if< std::is_integral<iType>::value && -// std::is_signed<iType>::value, iType >::type = 0> using min_index_type = +// std::enable_if_t< std::is_integral<iType>::value && +// std::is_signed<iType>::value, iType > = 0> using min_index_type = // std::initializer_list<iType>; namespace Impl { @@ -191,9 +194,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> { enum { Rank = map_type::Rank }; using begins_type = Kokkos::Array<int64_t, Rank>; - template < - typename iType, - typename std::enable_if<std::is_integral<iType>::value, iType>::type = 0> + template <typename iType, + std::enable_if_t<std::is_integral<iType>::value, iType> = 0> KOKKOS_INLINE_FUNCTION int64_t begin(const iType local_dimension) const { return local_dimension < Rank ? m_begins[local_dimension] : KOKKOS_INVALID_OFFSET; @@ -202,9 +204,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> { KOKKOS_INLINE_FUNCTION begins_type begins() const { return m_begins; } - template < - typename iType, - typename std::enable_if<std::is_integral<iType>::value, iType>::type = 0> + template <typename iType, + std::enable_if_t<std::is_integral<iType>::value, iType> = 0> KOKKOS_INLINE_FUNCTION int64_t end(const iType local_dimension) const { return begin(local_dimension) + m_map.extent(local_dimension); } @@ -249,16 +250,16 @@ class OffsetView : public ViewTraits<DataType, Properties...> { // constexpr unsigned rank() { return map_type::Rank; } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, size_t>::type - extent(const iType& r) const { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, size_t> + extent(const iType& r) const { return m_map.extent(r); } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, int>::type - extent_int(const iType& r) const { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, int> + extent_int(const iType& r) const { return static_cast<int>(m_map.extent(r)); } @@ -299,9 +300,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, size_t>::type - stride(iType r) const { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, size_t> + stride(iType r) const { return ( r == 0 ? m_map.stride_0() @@ -368,7 +369,7 @@ class OffsetView : public ViewTraits<DataType, Properties...> { std::is_same<typename traits::array_layout, Kokkos::LayoutStride>::value; static constexpr bool is_default_map = - std::is_same<typename traits::specialize, void>::value && + std::is_void<typename traits::specialize>::value && (is_layout_left || is_layout_right || is_layout_stride); #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) @@ -401,11 +402,10 @@ class OffsetView : public ViewTraits<DataType, Properties...> { // Rank 1 operator() template <typename I0> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && !is_default_map), - reference_type>::type - operator()(const I0& i0) const { + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::are_integral<I0>::value && (1 == Rank) && !is_default_map), + reference_type> + operator()(const I0& i0) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0)) const size_t j0 = i0 - m_begins[0]; return m_map.reference(j0); @@ -413,10 +413,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && is_default_map && - !is_layout_stride), - reference_type>::type + std::enable_if_t<(Kokkos::Impl::are_integral<I0>::value && (1 == Rank) && + is_default_map && !is_layout_stride), + reference_type> operator()(const I0& i0) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0)) const size_t j0 = i0 - m_begins[0]; @@ -425,10 +424,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && is_default_map && - is_layout_stride), - reference_type>::type + std::enable_if_t<(Kokkos::Impl::are_integral<I0>::value && (1 == Rank) && + is_default_map && is_layout_stride), + reference_type> operator()(const I0& i0) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0)) const size_t j0 = i0 - m_begins[0]; @@ -438,11 +436,10 @@ class OffsetView : public ViewTraits<DataType, Properties...> { // Rank 1 operator[] template <typename I0> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && !is_default_map), - reference_type>::type - operator[](const I0& i0) const { + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::are_integral<I0>::value && (1 == Rank) && !is_default_map), + reference_type> + operator[](const I0& i0) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0)) const size_t j0 = i0 - m_begins[0]; return m_map.reference(j0); @@ -450,10 +447,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && is_default_map && - !is_layout_stride), - reference_type>::type + std::enable_if_t<(Kokkos::Impl::are_integral<I0>::value && (1 == Rank) && + is_default_map && !is_layout_stride), + reference_type> operator[](const I0& i0) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0)) const size_t j0 = i0 - m_begins[0]; @@ -462,10 +458,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && is_default_map && - is_layout_stride), - reference_type>::type + std::enable_if_t<(Kokkos::Impl::are_integral<I0>::value && (1 == Rank) && + is_default_map && is_layout_stride), + reference_type> operator[](const I0& i0) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0)) const size_t j0 = i0 - m_begins[0]; @@ -477,9 +472,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && !is_default_map), - reference_type>::type + std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1>::value && + (2 == Rank) && !is_default_map), + reference_type> operator()(const I0& i0, const I1& i1) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) const size_t j0 = i0 - m_begins[0]; @@ -488,12 +483,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> { } template <typename I0, typename I1> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_left && (traits::rank_dynamic == 0)), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::are_integral<I0, I1>::value && (2 == Rank) && + is_default_map && is_layout_left && (traits::rank_dynamic == 0)), + reference_type> + operator()(const I0& i0, const I1& i1) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) const size_t j0 = i0 - m_begins[0]; const size_t j1 = i1 - m_begins[1]; @@ -501,12 +495,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> { } template <typename I0, typename I1> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_left && (traits::rank_dynamic != 0)), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::are_integral<I0, I1>::value && (2 == Rank) && + is_default_map && is_layout_left && (traits::rank_dynamic != 0)), + reference_type> + operator()(const I0& i0, const I1& i1) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) const size_t j0 = i0 - m_begins[0]; const size_t j1 = i1 - m_begins[1]; @@ -514,12 +507,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> { } template <typename I0, typename I1> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_right && (traits::rank_dynamic == 0)), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::are_integral<I0, I1>::value && (2 == Rank) && + is_default_map && is_layout_right && (traits::rank_dynamic == 0)), + reference_type> + operator()(const I0& i0, const I1& i1) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) const size_t j0 = i0 - m_begins[0]; const size_t j1 = i1 - m_begins[1]; @@ -527,12 +519,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> { } template <typename I0, typename I1> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_right && (traits::rank_dynamic != 0)), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::are_integral<I0, I1>::value && (2 == Rank) && + is_default_map && is_layout_right && (traits::rank_dynamic != 0)), + reference_type> + operator()(const I0& i0, const I1& i1) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) const size_t j0 = i0 - m_begins[0]; const size_t j1 = i1 - m_begins[1]; @@ -541,10 +532,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_stride), - reference_type>::type + std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1>::value && + (2 == Rank) && is_default_map && is_layout_stride), + reference_type> operator()(const I0& i0, const I1& i1) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) const size_t j0 = i0 - m_begins[0]; @@ -558,9 +548,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1, typename I2> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1, I2>::value && - (3 == Rank) && is_default_map), - reference_type>::type + std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1, I2>::value && + (3 == Rank) && is_default_map), + reference_type> operator()(const I0& i0, const I1& i1, const I2& i2) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track, m_map, m_begins, i0, i1, i2)) @@ -572,9 +562,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1, typename I2> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1, I2>::value && - (3 == Rank) && !is_default_map), - reference_type>::type + std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1, I2>::value && + (3 == Rank) && !is_default_map), + reference_type> operator()(const I0& i0, const I1& i1, const I2& i2) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track, m_map, m_begins, i0, i1, i2)) @@ -588,11 +578,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> { // Rank 4 template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3>::value && (4 == Rank) && - is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const { + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1, I2, I3>::value && + (4 == Rank) && is_default_map), + reference_type> + operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track, m_map, m_begins, i0, i1, i2, i3)) const size_t j0 = i0 - m_begins[0]; @@ -603,11 +593,11 @@ class OffsetView : public ViewTraits<DataType, Properties...> { } template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3>::value && (4 == Rank) && - !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const { + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1, I2, I3>::value && + (4 == Rank) && !is_default_map), + reference_type> + operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track, m_map, m_begins, i0, i1, i2, i3)) const size_t j0 = i0 - m_begins[0]; @@ -621,12 +611,12 @@ class OffsetView : public ViewTraits<DataType, Properties...> { // Rank 5 template <typename I0, typename I1, typename I2, typename I3, typename I4> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4>::value && (5 == Rank) && - is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4) const { + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1, I2, I3, I4>::value && + (5 == Rank) && is_default_map), + reference_type> + operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, + const I4& i4) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track, m_map, m_begins, i0, i1, i2, i3, i4)) const size_t j0 = i0 - m_begins[0]; @@ -638,12 +628,12 @@ class OffsetView : public ViewTraits<DataType, Properties...> { } template <typename I0, typename I1, typename I2, typename I3, typename I4> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4>::value && (5 == Rank) && - !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4) const { + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1, I2, I3, I4>::value && + (5 == Rank) && !is_default_map), + reference_type> + operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, + const I4& i4) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track, m_map, m_begins, i0, i1, i2, i3, i4)) const size_t j0 = i0 - m_begins[0]; @@ -659,10 +649,10 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1, typename I2, typename I3, typename I4, typename I5> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5>::value && (6 == Rank) && is_default_map), - reference_type>::type + reference_type> operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( @@ -678,10 +668,10 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1, typename I2, typename I3, typename I4, typename I5> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5>::value && (6 == Rank) && !is_default_map), - reference_type>::type + reference_type> operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( @@ -700,10 +690,10 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1, typename I2, typename I3, typename I4, typename I5, typename I6> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6>::value && (7 == Rank) && is_default_map), - reference_type>::type + reference_type> operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5, const I6& i6) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( @@ -720,10 +710,10 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1, typename I2, typename I3, typename I4, typename I5, typename I6> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6>::value && (7 == Rank) && !is_default_map), - reference_type>::type + reference_type> operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5, const I6& i6) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( @@ -743,10 +733,10 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1, typename I2, typename I3, typename I4, typename I5, typename I6, typename I7> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6, I7>::value && (8 == Rank) && is_default_map), - reference_type>::type + reference_type> operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5, const I6& i6, const I7& i7) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( @@ -765,10 +755,10 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <typename I0, typename I1, typename I2, typename I3, typename I4, typename I5, typename I6, typename I7> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6, I7>::value && (8 == Rank) && !is_default_map), - reference_type>::type + reference_type> operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5, const I6& i6, const I7& i7) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( @@ -1104,40 +1094,75 @@ class OffsetView : public ViewTraits<DataType, Properties...> { return m_track.template get_label<typename traits::memory_space>(); } + // Choosing std::pair as type for the arguments allows constructing an + // OffsetView using list initialization syntax, e.g., + // OffsetView dummy("dummy", {-1, 3}, {-2,2}); + // We could allow arbitrary types RangeType that support + // std::get<{0,1}>(RangeType const&) with std::tuple_size<RangeType>::value==2 + // but this wouldn't allow using the syntax in the example above. template <typename Label> explicit inline OffsetView( const Label& arg_label, - typename std::enable_if<Kokkos::Impl::is_view_label<Label>::value, - const index_list_type>::type range0, + std::enable_if_t<Kokkos::Impl::is_view_label<Label>::value, + const std::pair<int64_t, int64_t>> + range0, + const std::pair<int64_t, int64_t> range1 = KOKKOS_INVALID_INDEX_RANGE, + const std::pair<int64_t, int64_t> range2 = KOKKOS_INVALID_INDEX_RANGE, + const std::pair<int64_t, int64_t> range3 = KOKKOS_INVALID_INDEX_RANGE, + const std::pair<int64_t, int64_t> range4 = KOKKOS_INVALID_INDEX_RANGE, + const std::pair<int64_t, int64_t> range5 = KOKKOS_INVALID_INDEX_RANGE, + const std::pair<int64_t, int64_t> range6 = KOKKOS_INVALID_INDEX_RANGE, + const std::pair<int64_t, int64_t> range7 = KOKKOS_INVALID_INDEX_RANGE + + ) + : OffsetView( + Kokkos::Impl::ViewCtorProp<std::string>(arg_label), + typename traits::array_layout(range0.second - range0.first + 1, + range1.second - range1.first + 1, + range2.second - range2.first + 1, + range3.second - range3.first + 1, + range4.second - range4.first + 1, + range5.second - range5.first + 1, + range6.second - range6.first + 1, + range7.second - range7.first + 1), + {range0.first, range1.first, range2.first, range3.first, + range4.first, range5.first, range6.first, range7.first}) {} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 + template <typename Label> + KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the constructor taking std::pair<int64_t, int64_t> arguments " + "instead!") + explicit inline OffsetView( + const Label& arg_label, + std::enable_if_t<Kokkos::Impl::is_view_label<Label>::value, + const index_list_type> + range0, const index_list_type range1 = KOKKOS_INVALID_INDEX_RANGE, const index_list_type range2 = KOKKOS_INVALID_INDEX_RANGE, const index_list_type range3 = KOKKOS_INVALID_INDEX_RANGE, const index_list_type range4 = KOKKOS_INVALID_INDEX_RANGE, const index_list_type range5 = KOKKOS_INVALID_INDEX_RANGE, const index_list_type range6 = KOKKOS_INVALID_INDEX_RANGE, - const index_list_type range7 = KOKKOS_INVALID_INDEX_RANGE - - ) - : OffsetView(Kokkos::Impl::ViewCtorProp<std::string>(arg_label), - typename traits::array_layout( - range0.begin()[1] - range0.begin()[0] + 1, - range1.begin()[1] - range1.begin()[0] + 1, - range2.begin()[1] - range2.begin()[0] + 1, - range3.begin()[1] - range3.begin()[0] + 1, - range4.begin()[1] - range4.begin()[0] + 1, - range5.begin()[1] - range5.begin()[0] + 1, - range6.begin()[1] - range6.begin()[0] + 1, - range7.begin()[1] - range7.begin()[0] + 1), - {range0.begin()[0], range1.begin()[0], range2.begin()[0], - range3.begin()[0], range4.begin()[0], range5.begin()[0], - range6.begin()[0], range7.begin()[0]}) {} + const index_list_type range7 = KOKKOS_INVALID_INDEX_RANGE) + : OffsetView( + arg_label, + std::pair<int64_t, int64_t>(range0.begin()[0], range0.begin()[1]), + std::pair<int64_t, int64_t>(range1.begin()[0], range1.begin()[1]), + std::pair<int64_t, int64_t>(range2.begin()[0], range2.begin()[1]), + std::pair<int64_t, int64_t>(range3.begin()[0], range3.begin()[1]), + std::pair<int64_t, int64_t>(range4.begin()[0], range4.begin()[1]), + std::pair<int64_t, int64_t>(range5.begin()[0], range5.begin()[1]), + std::pair<int64_t, int64_t>(range6.begin()[0], range6.begin()[1]), + std::pair<int64_t, int64_t>(range7.begin()[0], range7.begin()[1])) { + } +#endif template <class... P> explicit KOKKOS_INLINE_FUNCTION OffsetView( const Kokkos::Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<Kokkos::Impl::ViewCtorProp<P...>::has_pointer, - typename traits::array_layout>::type const& - arg_layout, + std::enable_if_t<Kokkos::Impl::ViewCtorProp<P...>::has_pointer, + typename traits::array_layout> const& arg_layout, const index_list_type minIndices) : m_track() // No memory tracking , @@ -1155,9 +1180,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> { template <class... P> explicit inline OffsetView( const Kokkos::Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<!Kokkos::Impl::ViewCtorProp<P...>::has_pointer, - typename traits::array_layout>::type const& - arg_layout, + std::enable_if_t<!Kokkos::Impl::ViewCtorProp<P...>::has_pointer, + typename traits::array_layout> const& arg_layout, const index_list_type minIndices) : m_track(), m_map() @@ -1172,17 +1196,14 @@ class OffsetView : public ViewTraits<DataType, Properties...> { // to avoid duplicate class error. using alloc_prop = Kokkos::Impl::ViewCtorProp< P..., - typename std::conditional<alloc_prop_input::has_label, - std::integral_constant<unsigned, 0>, - typename std::string>::type, - typename std::conditional< - alloc_prop_input::has_memory_space, - std::integral_constant<unsigned, 1>, - typename traits::device_type::memory_space>::type, - typename std::conditional< - alloc_prop_input::has_execution_space, - std::integral_constant<unsigned, 2>, - typename traits::device_type::execution_space>::type>; + std::conditional_t<alloc_prop_input::has_label, + std::integral_constant<unsigned, 0>, std::string>, + std::conditional_t<alloc_prop_input::has_memory_space, + std::integral_constant<unsigned, 1>, + typename traits::device_type::memory_space>, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned, 2>, + typename traits::device_type::execution_space>>; static_assert(traits::is_managed, "OffsetView allocation constructor requires managed memory"); @@ -1214,8 +1235,9 @@ class OffsetView : public ViewTraits<DataType, Properties...> { #endif //------------------------------------------------------------ - Kokkos::Impl::SharedAllocationRecord<>* record = - m_map.allocate_shared(prop_copy, arg_layout); + Kokkos::Impl::SharedAllocationRecord<>* record = m_map.allocate_shared( + prop_copy, arg_layout, + Kokkos::Impl::ViewCtorProp<P...>::has_execution_space); //------------------------------------------------------------ #if defined(KOKKOS_ENABLE_CUDA) @@ -1252,9 +1274,8 @@ KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const OffsetView<D, P...>& V) { namespace Impl { template <class T> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<std::is_integral<T>::value, T>::type - shift_input(const T arg, const int64_t offset) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_integral<T>::value, T> +shift_input(const T arg, const int64_t offset) { return arg - offset; } @@ -1265,22 +1286,21 @@ Kokkos::Impl::ALL_t shift_input(const Kokkos::Impl::ALL_t arg, } template <class T> -KOKKOS_INLINE_FUNCTION typename std::enable_if<std::is_integral<T>::value, - Kokkos::pair<T, T> >::type -shift_input(const Kokkos::pair<T, T> arg, const int64_t offset) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<std::is_integral<T>::value, Kokkos::pair<T, T>> + shift_input(const Kokkos::pair<T, T> arg, const int64_t offset) { return Kokkos::make_pair<T, T>(arg.first - offset, arg.second - offset); } template <class T> -inline - typename std::enable_if<std::is_integral<T>::value, std::pair<T, T> >::type - shift_input(const std::pair<T, T> arg, const int64_t offset) { +inline std::enable_if_t<std::is_integral<T>::value, std::pair<T, T>> +shift_input(const std::pair<T, T> arg, const int64_t offset) { return std::make_pair<T, T>(arg.first - offset, arg.second - offset); } template <size_t N, class Arg, class A> KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin( const size_t i, Kokkos::Array<int64_t, N>& subviewBegins, - typename std::enable_if<N != 0, const Arg>::type shiftedArg, const Arg arg, + std::enable_if_t<N != 0, const Arg> shiftedArg, const Arg arg, const A viewBegins, size_t& counter) { if (!std::is_integral<Arg>::value) { subviewBegins[counter] = shiftedArg == arg ? viewBegins[i] : 0; @@ -1291,8 +1311,8 @@ KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin( template <size_t N, class Arg, class A> KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin( const size_t /*i*/, Kokkos::Array<int64_t, N>& /*subviewBegins*/, - typename std::enable_if<N == 0, const Arg>::type /*shiftedArg*/, - const Arg /*arg*/, const A /*viewBegins*/, size_t& /*counter*/) {} + std::enable_if_t<N == 0, const Arg> /*shiftedArg*/, const Arg /*arg*/, + const A /*viewBegins*/, size_t& /*counter*/) {} template <class D, class... P, class T> KOKKOS_INLINE_FUNCTION @@ -1774,9 +1794,8 @@ template <class DT, class... DP> inline void deep_copy( const Experimental::OffsetView<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { static_assert( std::is_same<typename ViewTraits<DT, DP...>::non_const_value_type, typename ViewTraits<DT, DP...>::value_type>::value, @@ -1790,9 +1809,8 @@ template <class DT, class... DP, class ST, class... SP> inline void deep_copy( const Experimental::OffsetView<DT, DP...>& dst, const Experimental::OffsetView<ST, SP...>& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { static_assert( std::is_same<typename ViewTraits<DT, DP...>::value_type, typename ViewTraits<ST, SP...>::non_const_value_type>::value, @@ -1805,9 +1823,8 @@ template <class DT, class... DP, class ST, class... SP> inline void deep_copy( const Experimental::OffsetView<DT, DP...>& dst, const View<ST, SP...>& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { static_assert( std::is_same<typename ViewTraits<DT, DP...>::value_type, typename ViewTraits<ST, SP...>::non_const_value_type>::value, @@ -1821,9 +1838,8 @@ template <class DT, class... DP, class ST, class... SP> inline void deep_copy( const View<DT, DP...>& dst, const Experimental::OffsetView<ST, SP...>& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { static_assert( std::is_same<typename ViewTraits<DT, DP...>::value_type, typename ViewTraits<ST, SP...>::non_const_value_type>::value, @@ -1856,8 +1872,8 @@ struct MirrorOffsetViewType { Kokkos::Experimental::OffsetView<data_type, array_layout, Space>; // If it is the same memory_space return the existing view_type // This will also keep the unmanaged trait if necessary - using view_type = typename std::conditional<is_same_memspace, src_view_type, - dest_view_type>::type; + using view_type = + std::conditional_t<is_same_memspace, src_view_type, dest_view_type>; }; template <class Space, class T, class... P> @@ -1883,163 +1899,202 @@ struct MirrorOffsetType { } // namespace Impl -template <class T, class... P> +namespace Impl { +template <class T, class... P, class... ViewCtorArgs> inline typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror -create_mirror( - const Kokkos::Experimental::OffsetView<T, P...>& src, - typename std::enable_if< - !std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout, - Kokkos::LayoutStride>::value>::type* = nullptr) { - using src_type = Experimental::OffsetView<T, P...>; - using dst_type = typename src_type::HostMirror; - - return dst_type( - Kokkos::Impl::ViewCtorProp<std::string>( - std::string(src.label()).append("_mirror")), - typename Kokkos::ViewTraits<T, P...>::array_layout( - src.extent(0), src.extent(1), src.extent(2), src.extent(3), - src.extent(4), src.extent(5), src.extent(6), src.extent(7)), +create_mirror(const Kokkos::Experimental::OffsetView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + return typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror( + Kokkos::create_mirror(arg_prop, src.view()), src.begins()); +} + +template <class Space, class T, class... P, class... ViewCtorArgs> +inline typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type +create_mirror(const Space&, + const Kokkos::Experimental::OffsetView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert( + !alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::create_mirror " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a memory space instance!"); + static_assert( + !alloc_prop_input::allow_padding, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not explicitly allow padding!"); + + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + std::string(src.label()).append("_mirror"); + + return typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type( + prop_copy, src.layout(), {src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), src.begin(5), src.begin(6), src.begin(7)}); } +} // namespace Impl + +// Create a mirror in host space +template <class T, class... P> +inline auto create_mirror( + const Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); +} template <class T, class... P> -inline typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror -create_mirror( - const Kokkos::Experimental::OffsetView<T, P...>& src, - typename std::enable_if< - std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout, - Kokkos::LayoutStride>::value>::type* = nullptr) { - using src_type = Experimental::OffsetView<T, P...>; - using dst_type = typename src_type::HostMirror; - - Kokkos::LayoutStride layout; - - layout.dimension[0] = src.extent(0); - layout.dimension[1] = src.extent(1); - layout.dimension[2] = src.extent(2); - layout.dimension[3] = src.extent(3); - layout.dimension[4] = src.extent(4); - layout.dimension[5] = src.extent(5); - layout.dimension[6] = src.extent(6); - layout.dimension[7] = src.extent(7); - - layout.stride[0] = src.stride_0(); - layout.stride[1] = src.stride_1(); - layout.stride[2] = src.stride_2(); - layout.stride[3] = src.stride_3(); - layout.stride[4] = src.stride_4(); - layout.stride[5] = src.stride_5(); - layout.stride[6] = src.stride_6(); - layout.stride[7] = src.stride_7(); - - return dst_type(std::string(src.label()).append("_mirror"), layout, - {src.begin(0), src.begin(1), src.begin(2), src.begin(3), - src.begin(4), src.begin(5), src.begin(6), src.begin(7)}); +inline auto create_mirror( + Kokkos::Impl::WithoutInitializing_t wi, + const Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror(src, Kokkos::view_alloc(wi)); +} + +// Create a mirror in a new space +template <class Space, class T, class... P, + typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> +inline auto create_mirror( + const Space& space, const Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror(space, src, Impl::ViewCtorProp<>{}); } -// Create a mirror in a new space (specialization for different space) template <class Space, class T, class... P> typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type -create_mirror(const Space&, +create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space& space, const Kokkos::Experimental::OffsetView<T, P...>& src) { - return typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type( - src.label(), src.layout(), - {src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), - src.begin(5), src.begin(6), src.begin(7)}); + return Impl::create_mirror(space, src, Kokkos::view_alloc(wi)); } -template <class T, class... P> -inline typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror(src, arg_prop); +} + +namespace Impl { +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + (std::is_same< + typename Kokkos::Experimental::OffsetView<T, P...>::memory_space, + typename Kokkos::Experimental::OffsetView< + T, P...>::HostMirror::memory_space>::value && + std::is_same<typename Kokkos::Experimental::OffsetView<T, P...>::data_type, + typename Kokkos::Experimental::OffsetView< + T, P...>::HostMirror::data_type>::value), + typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror> create_mirror_view( const typename Kokkos::Experimental::OffsetView<T, P...>& src, - typename std::enable_if< - (std::is_same< - typename Kokkos::Experimental::OffsetView<T, P...>::memory_space, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::OffsetView<T, P...>::data_type, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::data_type>::value)>::type* = nullptr) { + const Impl::ViewCtorProp<ViewCtorArgs...>&) { return src; } -template <class T, class... P> -inline typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror -create_mirror_view( - const Kokkos::Experimental::OffsetView<T, P...>& src, - typename std::enable_if< - !(std::is_same< - typename Kokkos::Experimental::OffsetView<T, P...>::memory_space, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::OffsetView<T, P...>::data_type, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::data_type>::value)>::type* = nullptr) { - return Kokkos::create_mirror(src); +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + !(std::is_same< + typename Kokkos::Experimental::OffsetView<T, P...>::memory_space, + typename Kokkos::Experimental::OffsetView< + T, P...>::HostMirror::memory_space>::value && + std::is_same< + typename Kokkos::Experimental::OffsetView<T, P...>::data_type, + typename Kokkos::Experimental::OffsetView< + T, P...>::HostMirror::data_type>::value), + typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror> +create_mirror_view(const Kokkos::Experimental::OffsetView<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + return Kokkos::create_mirror(arg_prop, src); } -// Create a mirror view in a new space (specialization for same space) -template <class Space, class T, class... P> -typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type +template <class Space, class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + Impl::MirrorOffsetViewType<Space, T, P...>::is_same_memspace, + Kokkos::Experimental::OffsetView<T, P...>> create_mirror_view(const Space&, const Kokkos::Experimental::OffsetView<T, P...>& src, - typename std::enable_if<Impl::MirrorOffsetViewType< - Space, T, P...>::is_same_memspace>::type* = nullptr) { + const Impl::ViewCtorProp<ViewCtorArgs...>&) { return src; } -// Create a mirror view in a new space (specialization for different space) -template <class Space, class T, class... P> -typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type -create_mirror_view(const Space&, +template <class Space, class T, class... P, class... ViewCtorArgs> +std::enable_if_t< + !Impl::MirrorOffsetViewType<Space, T, P...>::is_same_memspace, + typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type> +create_mirror_view(const Space& space, const Kokkos::Experimental::OffsetView<T, P...>& src, - typename std::enable_if<!Impl::MirrorOffsetViewType< - Space, T, P...>::is_same_memspace>::type* = nullptr) { - return typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type( - src.label(), src.layout(), - {src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), - src.begin(5), src.begin(6), src.begin(7)}); + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + return create_mirror(space, src, arg_prop); +} +} // namespace Impl + +// Create a mirror view in host space +template <class T, class... P> +inline auto create_mirror_view( + const typename Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{}); +} + +template <class T, class... P> +inline auto create_mirror_view( + Kokkos::Impl::WithoutInitializing_t wi, + const typename Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); +} + +// Create a mirror view in a new space +template <class Space, class T, class... P, + typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> +inline auto create_mirror_view( + const Space& space, const Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror_view(space, src, Impl::ViewCtorProp<>{}); } -// -// // Create a mirror view and deep_copy in a new space (specialization for -// same space) template<class Space, class T, class ... P> typename -// Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type -// create_mirror_view_and_copy(const Space& , const -// Kokkos::Experimental::OffsetView<T,P...> & src -// , std::string const& name = "" -// , typename -// std::enable_if<Impl::MirrorViewType<Space,T,P -// ...>::is_same_memspace>::type* = nullptr) { -// (void)name; -// return src; -// } -// -// // Create a mirror view and deep_copy in a new space (specialization for -// different space) template<class Space, class T, class ... P> typename -// Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type -// create_mirror_view_and_copy(const Space& , const -// Kokkos::Experimental::OffsetView<T,P...> & src -// , std::string const& name = "" -// , typename -// std::enable_if<!Impl::MirrorViewType<Space,T,P -// ...>::is_same_memspace>::type* = nullptr) { -// using Mirror = typename -// Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type; -// std::string label = name.empty() ? src.label() : name; -// auto mirror = Mirror(view_alloc(WithoutInitializing, label), src.layout(), -// { src.begin(0), src.begin(1), src.begin(2), -// src.begin(3), src.begin(4), -// src.begin(5), src.begin(6), src.begin(7) }); -// deep_copy(mirror, src); -// return mirror; -// } +template <class Space, class T, class... P> +inline auto create_mirror_view( + Kokkos::Impl::WithoutInitializing_t wi, const Space& space, + const Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror_view(space, src, Kokkos::view_alloc(wi)); +} + +template <class T, class... P, class... ViewCtorArgs> +inline auto create_mirror_view( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::Experimental::OffsetView<T, P...>& src) { + return Impl::create_mirror_view(src, arg_prop); +} + +// Create a mirror view and deep_copy in a new space +template <class... ViewCtorArgs, class T, class... P> +typename Kokkos::Impl::MirrorOffsetViewType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::view_type +create_mirror_view_and_copy( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::Experimental::OffsetView<T, P...>& src) { + return {create_mirror_view_and_copy(arg_prop, src.view()), src.begins()}; +} + +template <class Space, class T, class... P> +typename Kokkos::Impl::MirrorOffsetViewType<Space, T, P...>::view_type +create_mirror_view_and_copy( + const Space& space, const Kokkos::Experimental::OffsetView<T, P...>& src, + std::string const& name = "") { + return {create_mirror_view_and_copy(space, src.view(), name), src.begins()}; +} } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_OFFSETVIEW +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_OFFSETVIEW +#endif #endif /* KOKKOS_OFFSETVIEW_HPP_ */ diff --git a/packages/kokkos/containers/src/Kokkos_ScatterView.hpp b/packages/kokkos/containers/src/Kokkos_ScatterView.hpp index e4dd9531fc3555299fdafedf447b585f8e5cd0b2..a9529d1c87c4f13dcb5d0054f5cd79ba1d2d3bfd 100644 --- a/packages/kokkos/containers/src/Kokkos_ScatterView.hpp +++ b/packages/kokkos/containers/src/Kokkos_ScatterView.hpp @@ -50,6 +50,10 @@ #ifndef KOKKOS_SCATTER_VIEW_HPP #define KOKKOS_SCATTER_VIEW_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SCATTERVIEW +#endif #include <Kokkos_Core.hpp> #include <utility> @@ -300,11 +304,6 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType, Kokkos::atomic_add(&dest, src); } - KOKKOS_INLINE_FUNCTION - void join(volatile ValueType& dest, const volatile ValueType& src) const { - Kokkos::atomic_add(&dest, src); - } - KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join(value, rhs); } @@ -374,11 +373,6 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType, atomic_prod(&dest, src); } - KOKKOS_INLINE_FUNCTION - void join(volatile ValueType& dest, const volatile ValueType& src) const { - atomic_prod(&dest, src); - } - KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { atomic_prod(&value, rhs); } @@ -433,11 +427,6 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType, atomic_min(&dest, src); } - KOKKOS_INLINE_FUNCTION - void join(volatile ValueType& dest, const volatile ValueType& src) const { - atomic_min(dest, src); - } - KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join(value, rhs); } @@ -492,11 +481,6 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType, atomic_max(&dest, src); } - KOKKOS_INLINE_FUNCTION - void join(volatile ValueType& dest, const volatile ValueType& src) const { - atomic_max(dest, src); - } - KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join(value, rhs); } @@ -836,6 +820,19 @@ class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated, ::Kokkos::resize(internal_view, n0, n1, n2, n3, n4, n5, n6, n7); } + template <class... ViewCtorArgs> + void resize(const ::Kokkos::Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + ::Kokkos::resize(arg_prop, internal_view, n0, n1, n2, n3, n4, n5, n6, n7); + } + template <class I> std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> resize( const I& arg_prop, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -849,6 +846,19 @@ class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated, ::Kokkos::resize(arg_prop, internal_view, n0, n1, n2, n3, n4, n5, n6, n7); } + template <class... ViewCtorArgs> + void realloc(const Kokkos::Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + ::Kokkos::realloc(arg_prop, internal_view, n0, n1, n2, n3, n4, n5, n6, n7); + } + void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -909,11 +919,10 @@ class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated, } template <typename Arg> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<view_type::original_view_type::rank == 1 && - std::is_integral<Arg>::value, - value_type>::type - operator[](Arg arg) const { + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + view_type::original_view_type::rank == 1 && std::is_integral<Arg>::value, + value_type> + operator[](Arg arg) const { return view.at(arg); } @@ -1108,6 +1117,19 @@ class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op, n6); } + template <class... ViewCtorArgs> + void resize(const ::Kokkos::Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + ::Kokkos::resize(arg_prop, internal_view, unique_token.size(), n0, n1, n2, + n3, n4, n5, n6); + } + template <class I> std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> resize( const I& arg_prop, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -1121,6 +1143,19 @@ class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op, n3, n4, n5, n6); } + template <class... ViewCtorArgs> + void realloc(const ::Kokkos::Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + ::Kokkos::realloc(arg_prop, internal_view, unique_token.size(), n0, n1, n2, + n3, n4, n5, n6); + } + void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -1443,11 +1478,10 @@ class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterDuplicated, } template <typename Arg> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<view_type::original_view_type::rank == 1 && - std::is_integral<Arg>::value, - value_type>::type - operator[](Arg arg) const { + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + view_type::original_view_type::rank == 1 && std::is_integral<Arg>::value, + value_type> + operator[](Arg arg) const { return view.at(thread_id, arg); } @@ -1482,16 +1516,16 @@ ScatterView< RT, typename ViewTraits<RT, RP...>::array_layout, typename ViewTraits<RT, RP...>::device_type, Op, std::conditional_t< - std::is_same<Duplication, void>::value, + std::is_void<Duplication>::value, typename Kokkos::Impl::Experimental::DefaultDuplication< typename ViewTraits<RT, RP...>::execution_space>::type, Duplication>, std::conditional_t< - std::is_same<Contribution, void>::value, + std::is_void<Contribution>::value, typename Kokkos::Impl::Experimental::DefaultContribution< typename ViewTraits<RT, RP...>::execution_space, typename std::conditional_t< - std::is_same<Duplication, void>::value, + std::is_void<Duplication>::value, typename Kokkos::Impl::Experimental::DefaultDuplication< typename ViewTraits<RT, RP...>::execution_space>::type, Duplication>>::type, @@ -1552,6 +1586,15 @@ void contribute( namespace Kokkos { +template <typename DT, typename LY, typename ES, typename OP, typename CT, + typename DP, typename... IS, class... ViewCtorArgs> +void realloc( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view, + IS... is) { + scatter_view.realloc(arg_prop, is...); +} + template <typename DT, typename LY, typename ES, typename OP, typename CT, typename DP, typename... IS> void realloc( @@ -1577,6 +1620,15 @@ void resize( scatter_view.resize(is...); } +template <class... ViewCtorArgs, typename DT, typename LY, typename ES, + typename OP, typename CT, typename DP, typename... IS> +void resize( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view, + IS... is) { + scatter_view.resize(arg_prop, is...); +} + template <typename I, typename DT, typename LY, typename ES, typename OP, typename CT, typename DP, typename... IS> std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> resize( @@ -1588,4 +1640,8 @@ std::enable_if_t<Kokkos::Impl::is_view_ctor_property<I>::value> resize( } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SCATTERVIEW +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SCATTERVIEW +#endif #endif diff --git a/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp index cd633e40310177b116f04220c7030545ba37039d..219b08b4b40cd8c9f9788a36fb31d488c52f7af3 100644 --- a/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp +++ b/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_STATICCRSGRAPH_HPP #define KOKKOS_STATICCRSGRAPH_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STATICCRSGRAPH +#endif #include <string> #include <vector> @@ -214,8 +218,7 @@ struct GraphRowViewConst { const typename GraphType::entries_type& colidx_in, const ordinal_type& stride, const ordinal_type& count, const OffsetType& idx, - const typename std::enable_if<std::is_integral<OffsetType>::value, - int>::type& = 0) + const std::enable_if_t<std::is_integral<OffsetType>::value, int>& = 0) : colidx_(&colidx_in(idx)), stride_(stride), length(count) {} /// \brief Number of entries in the row. @@ -471,8 +474,7 @@ struct StaticCrsGraphMaximumEntry { void init(value_type& update) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { + void join(value_type& update, const value_type& input) const { if (update < input) update = input; } }; @@ -498,4 +500,8 @@ DataType maximum_entry(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STATICCRSGRAPH +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STATICCRSGRAPH +#endif #endif /* #ifndef KOKKOS_CRSARRAY_HPP */ diff --git a/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp index fbef0a0131faa4af8824ce806d901e426132890c..6c112644c9bc289d1758cd620d06ce4ffaf77331 100644 --- a/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -50,6 +50,10 @@ #ifndef KOKKOS_UNORDERED_MAP_HPP #define KOKKOS_UNORDERED_MAP_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_UNORDEREDMAP +#endif #include <Kokkos_Core.hpp> #include <Kokkos_Functional.hpp> @@ -62,7 +66,6 @@ #include <iostream> #include <cstdint> -#include <stdexcept> namespace Kokkos { @@ -200,10 +203,9 @@ class UnorderedMapInsertResult { /// <tt>Key</tt>. The default will do a bitwise equality comparison. /// template <typename Key, typename Value, - typename Device = Kokkos::DefaultExecutionSpace, - typename Hasher = pod_hash<typename std::remove_const<Key>::type>, - typename EqualTo = - pod_equal_to<typename std::remove_const<Key>::type>> + typename Device = Kokkos::DefaultExecutionSpace, + typename Hasher = pod_hash<std::remove_const_t<Key>>, + typename EqualTo = pod_equal_to<std::remove_const_t<Key>>> class UnorderedMap { private: using host_mirror_space = @@ -215,13 +217,13 @@ class UnorderedMap { // key_types using declared_key_type = Key; - using key_type = typename std::remove_const<declared_key_type>::type; - using const_key_type = typename std::add_const<key_type>::type; + using key_type = std::remove_const_t<declared_key_type>; + using const_key_type = std::add_const_t<key_type>; // value_types using declared_value_type = Value; - using value_type = typename std::remove_const<declared_value_type>::type; - using const_value_type = typename std::add_const<value_type>::type; + using value_type = std::remove_const_t<declared_value_type>; + using const_value_type = std::add_const_t<value_type>; using device_type = Device; using execution_space = typename Device::execution_space; @@ -241,7 +243,7 @@ class UnorderedMap { using const_map_type = UnorderedMap<const_key_type, const_value_type, device_type, hasher_type, equal_to_type>; - static const bool is_set = std::is_same<void, value_type>::value; + static const bool is_set = std::is_void<value_type>::value; static const bool has_const_key = std::is_same<const_key_type, declared_key_type>::value; static const bool has_const_value = @@ -318,7 +320,7 @@ class UnorderedMap { #endif m_scalars("UnorderedMap scalars") { if (!is_insertable_map) { - throw std::runtime_error( + Kokkos::Impl::throw_runtime_exception( "Cannot construct a non-insertable (i.e. const key_type) " "unordered_map"); } @@ -742,10 +744,10 @@ class UnorderedMap { template <typename SKey, typename SValue> UnorderedMap( UnorderedMap<SKey, SValue, Device, Hasher, EqualTo> const &src, - typename std::enable_if< + std::enable_if_t< Impl::UnorderedMapCanAssign<declared_key_type, declared_value_type, SKey, SValue>::value, - int>::type = 0) + int> = 0) : m_bounded_insert(src.m_bounded_insert), m_hasher(src.m_hasher), m_equal_to(src.m_equal_to), @@ -758,10 +760,10 @@ class UnorderedMap { m_scalars(src.m_scalars) {} template <typename SKey, typename SValue> - typename std::enable_if< + std::enable_if_t< Impl::UnorderedMapCanAssign<declared_key_type, declared_value_type, SKey, SValue>::value, - declared_map_type &>::type + declared_map_type &> operator=(UnorderedMap<SKey, SValue, Device, Hasher, EqualTo> const &src) { m_bounded_insert = src.m_bounded_insert; m_hasher = src.m_hasher; @@ -777,10 +779,8 @@ class UnorderedMap { } template <typename SKey, typename SValue, typename SDevice> - typename std::enable_if< - std::is_same<typename std::remove_const<SKey>::type, key_type>::value && - std::is_same<typename std::remove_const<SValue>::type, - value_type>::value>::type + std::enable_if_t<std::is_same<std::remove_const_t<SKey>, key_type>::value && + std::is_same<std::remove_const_t<SValue>, value_type>::value> create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher, EqualTo> const &src) { if (m_hash_lists.data() != src.m_hash_lists.data()) { @@ -915,4 +915,8 @@ inline void deep_copy( } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_UNORDEREDMAP +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_UNORDEREDMAP +#endif #endif // KOKKOS_UNORDERED_MAP_HPP diff --git a/packages/kokkos/containers/src/Kokkos_Vector.hpp b/packages/kokkos/containers/src/Kokkos_Vector.hpp index eddb87800321fd0eeac446eb6a37db6b59d6e8ca..8dd080737b49b52a63704611fc86cb71eee5491b 100644 --- a/packages/kokkos/containers/src/Kokkos_Vector.hpp +++ b/packages/kokkos/containers/src/Kokkos_Vector.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_VECTOR_HPP #define KOKKOS_VECTOR_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_VECTOR +#endif #include <Kokkos_Core_fwd.hpp> #include <Kokkos_DualView.hpp> @@ -185,8 +189,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> { public: // TODO: can use detection idiom to generate better error message here later template <typename InputIterator> - typename std::enable_if<impl_is_input_iterator<InputIterator>::value, - iterator>::type + std::enable_if_t<impl_is_input_iterator<InputIterator>::value, iterator> insert(iterator it, InputIterator b, InputIterator e) { ptrdiff_t count = std::distance(b, e); @@ -333,4 +336,8 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> { }; } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_VECTOR +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_VECTOR +#endif #endif diff --git a/packages/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp b/packages/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp index 9512f2d4a20e509af321d315c8963693076a0d58..134b30769fb1d8a65e6e5d779a39c93f255268ad 100644 --- a/packages/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp +++ b/packages/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp @@ -86,9 +86,7 @@ struct BitsetCount { void init(value_type& count) const { count = 0u; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& count, const volatile size_type& incr) const { - count += incr; - } + void join(value_type& count, const size_type& incr) const { count += incr; } KOKKOS_INLINE_FUNCTION void operator()(size_type i, value_type& count) const { diff --git a/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp b/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp index 9fb6a4e1ce043f4c75421b026734e3d14015475f..b81b1eee1e2475c2966e7aaa517611452ea37a9d 100644 --- a/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp +++ b/packages/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp @@ -56,11 +56,10 @@ template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type, typename SizeType> inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>::HostMirror -create_mirror_view( - const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>& - view, - typename std::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type, - Arg3Type>::is_hostspace>::type* = 0) { +create_mirror_view(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, + SizeType>& view, + std::enable_if_t<ViewTraits<DataType, Arg1Type, Arg2Type, + Arg3Type>::is_hostspace>* = 0) { return view; } @@ -99,11 +98,10 @@ template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type, typename SizeType> inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>::HostMirror -create_mirror_view( - const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>& - view, - typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type, - Arg3Type>::is_hostspace>::type* = 0) { +create_mirror_view(const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, + SizeType>& view, + std::enable_if_t<!ViewTraits<DataType, Arg1Type, Arg2Type, + Arg3Type>::is_hostspace>* = 0) { return create_mirror(view); } } // namespace Kokkos diff --git a/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp b/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp index e10e256b6a8d1e6e48f8e80b205cd097a3486723..fc861992f0e6d4f9f5ee2ccab96ee9bb31072444 100644 --- a/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp +++ b/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_UnorderedMap.hpp> namespace Kokkos { diff --git a/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp b/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp index 80494139d2705e06df25d8a6bb6486a1c7829594..5acba244f60801f3541ef85e31896180685181eb 100644 --- a/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp +++ b/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp @@ -76,8 +76,15 @@ struct UnorderedMapRehash { *this); } - KOKKOS_INLINE_FUNCTION - void operator()(size_type i) const { + template <typename Dummy = typename map_type::value_type> + KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_void<Dummy>::value> + operator()(size_type i) const { + if (m_src.valid_at(i)) m_dst.insert(m_src.key_at(i)); + } + + template <typename Dummy = typename map_type::value_type> + KOKKOS_INLINE_FUNCTION std::enable_if_t<!std::is_void<Dummy>::value> + operator()(size_type i) const { if (m_src.valid_at(i)) m_dst.insert(m_src.key_at(i), m_src.value_at(i)); } }; diff --git a/packages/kokkos/containers/unit_tests/TestBitset.hpp b/packages/kokkos/containers/unit_tests/TestBitset.hpp index 6810ae101aff68f7137c28689cf4d98f13194a4f..6cb03d6c5648556e97ca7e794742948cb15f7530 100644 --- a/packages/kokkos/containers/unit_tests/TestBitset.hpp +++ b/packages/kokkos/containers/unit_tests/TestBitset.hpp @@ -75,9 +75,7 @@ struct TestBitset { void init(value_type& v) const { v = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, const volatile value_type& src) const { - dst += src; - } + void join(value_type& dst, const value_type& src) const { dst += src; } KOKKOS_INLINE_FUNCTION void operator()(uint32_t i, value_type& v) const { @@ -116,9 +114,7 @@ struct TestBitsetTest { void init(value_type& v) const { v = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, const volatile value_type& src) const { - dst += src; - } + void join(value_type& dst, const value_type& src) const { dst += src; } KOKKOS_INLINE_FUNCTION void operator()(uint32_t i, value_type& v) const { @@ -148,9 +144,7 @@ struct TestBitsetAny { void init(value_type& v) const { v = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, const volatile value_type& src) const { - dst += src; - } + void join(value_type& dst, const value_type& src) const { dst += src; } KOKKOS_INLINE_FUNCTION void operator()(uint32_t i, value_type& v) const { diff --git a/packages/kokkos/containers/unit_tests/TestDualView.hpp b/packages/kokkos/containers/unit_tests/TestDualView.hpp index 75829e0769312c2844c239bfe3ac81a6966048e2..3085f091cee81672f048903b3242d2fc49d24399 100644 --- a/packages/kokkos/containers/unit_tests/TestDualView.hpp +++ b/packages/kokkos/containers/unit_tests/TestDualView.hpp @@ -466,7 +466,8 @@ namespace { * that we keep the semantics of UVM DualViews intact. */ // modify if we have other UVM enabled backends -#ifdef KOKKOS_ENABLE_CUDA // OR other UVM builds +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_SYCL) || \ + defined(KOKKOS_ENABLE_HIP) // OR other UVM builds #define UVM_ENABLED_BUILD #endif @@ -482,6 +483,20 @@ struct UVMSpaceFor<Kokkos::Cuda> { }; #endif +#ifdef KOKKOS_ENABLE_SYCL // specific to SYCL +template <> +struct UVMSpaceFor<Kokkos::Experimental::SYCL> { + using type = Kokkos::Experimental::SYCLSharedUSMSpace; +}; +#endif + +#ifdef KOKKOS_ENABLE_HIP // specific to HIP +template <> +struct UVMSpaceFor<Kokkos::Experimental::HIP> { + using type = Kokkos::Experimental::HIPManagedSpace; +}; +#endif + #ifdef UVM_ENABLED_BUILD template <> struct UVMSpaceFor<Kokkos::DefaultHostExecutionSpace> { diff --git a/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp index 321f1228a6d4dcfeb296c307ee0fa99283023b58..5fbd32956053e097c62247cc0a366866c34d8983 100644 --- a/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp +++ b/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp @@ -45,7 +45,6 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <stdexcept> #include <sstream> #include <iostream> #include <Kokkos_DynRankView.hpp> @@ -108,8 +107,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 7> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& input) { + static void join(value_type& update, const value_type& input) { update |= input; } @@ -193,8 +191,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 6> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& input) { + static void join(value_type& update, const value_type& input) { update |= input; } @@ -275,8 +272,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 5> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& input) { + static void join(value_type& update, const value_type& input) { update |= input; } @@ -370,8 +366,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 4> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& input) { + static void join(value_type& update, const value_type& input) { update |= input; } @@ -445,8 +440,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 3> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& input) { + static void join(value_type& update, const value_type& input) { update |= input; } @@ -543,8 +537,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 2> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& input) { + static void join(value_type& update, const value_type& input) { update |= input; } @@ -623,8 +616,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& input) { + static void join(value_type& update, const value_type& input) { update |= input; } @@ -724,6 +716,7 @@ class TestDynViewAPI { run_test_subview_strided(); run_test_vector(); run_test_as_view_of_rank_n(); + run_test_layout(); } static void run_operator_test_rank12345() { @@ -1158,9 +1151,6 @@ class TestDynViewAPI { #endif // MDRangePolict Rank < 7 #endif // defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) - - // Error checking test - EXPECT_ANY_THROW({ auto v_copy = Kokkos::Impl::as_view_of_rank_n<2>(d); }); } static void run_test_scalar() { @@ -1898,6 +1888,28 @@ class TestDynViewAPI { typename smultivector_type::const_type cmvX(cmv); typename const_smultivector_type::const_type ccmvX(cmv); } + + static void run_test_layout() { + Kokkos::DynRankView<double> d("source", 1, 2, 3, 4); + Kokkos::DynRankView<double> e("dest"); + + auto props = Kokkos::view_alloc(Kokkos::WithoutInitializing, d.label()); + e = Kokkos::DynRankView<double>(props, d.layout()); + + ASSERT_EQ(d.rank(), 4u); + ASSERT_EQ(e.rank(), 4u); + ASSERT_EQ(e.label(), "source"); + + auto ulayout = e.layout(); + ASSERT_EQ(ulayout.dimension[0], 1u); + ASSERT_EQ(ulayout.dimension[1], 2u); + ASSERT_EQ(ulayout.dimension[2], 3u); + ASSERT_EQ(ulayout.dimension[3], 4u); + ASSERT_EQ(ulayout.dimension[4], KOKKOS_INVALID_INDEX); + ASSERT_EQ(ulayout.dimension[5], KOKKOS_INVALID_INDEX); + ASSERT_EQ(ulayout.dimension[6], KOKKOS_INVALID_INDEX); + ASSERT_EQ(ulayout.dimension[7], KOKKOS_INVALID_INDEX); + } }; } // namespace Test diff --git a/packages/kokkos/containers/unit_tests/TestOffsetView.hpp b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp index a127c250e1c87f6fb13d7a4f1029403bfee16c7b..9e9edc80b51236b6bbed60a59f6c66e501f8fe03 100644 --- a/packages/kokkos/containers/unit_tests/TestOffsetView.hpp +++ b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp @@ -52,7 +52,6 @@ #include <cstdio> #include <Kokkos_Timer.hpp> #include <Kokkos_OffsetView.hpp> -#include <KokkosExp_MDRangePolicy.hpp> using std::cout; using std::endl; @@ -64,14 +63,19 @@ void test_offsetview_construction() { using offset_view_type = Kokkos::Experimental::OffsetView<Scalar**, Device>; using view_type = Kokkos::View<Scalar**, Device>; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 Kokkos::Experimental::index_list_type range0 = {-1, 3}; Kokkos::Experimental::index_list_type range1 = {-2, 2}; +#else + std::pair<int64_t, int64_t> range0 = {-1, 3}; + std::pair<int64_t, int64_t> range1 = {-2, 2}; +#endif { offset_view_type o1; ASSERT_FALSE(o1.is_allocated()); - o1 = offset_view_type("o1", range0, range1); + o1 = offset_view_type("o1", {-1, 3}, {-2, 2}); offset_view_type o2(o1); offset_view_type o3("o3", range0, range1); diff --git a/packages/kokkos/containers/unit_tests/TestScatterView.hpp b/packages/kokkos/containers/unit_tests/TestScatterView.hpp index 9fddfdcca0e1cbc850e244abfb5d24f7991633d6..b2f5c5a91309d2514505b2c8817410a837091d14 100644 --- a/packages/kokkos/containers/unit_tests/TestScatterView.hpp +++ b/packages/kokkos/containers/unit_tests/TestScatterView.hpp @@ -90,7 +90,7 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, scatterSize = n; auto policy = Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n); - Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum"); + Kokkos::parallel_for("scatter_view_test: Sum", policy, *this); } KOKKOS_INLINE_FUNCTION @@ -235,7 +235,7 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, scatterSize = n; auto policy = Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n); - Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + Kokkos::parallel_for("scatter_view_test: Prod", policy, *this); } KOKKOS_INLINE_FUNCTION @@ -259,12 +259,10 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); - EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14) + EXPECT_NEAR(val0, 65536.0, 1e-14 * 65536.0) << "Data differs at index " << i; + EXPECT_NEAR(val1, 256.0, 1e-14 * 256.0) << "Data differs at index " << i; + EXPECT_NEAR(val2, 1.0, 1e-14 * 1.0) << "Data differs at index " << i; } } @@ -282,9 +280,9 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, auto val2 = host_view(i, 2); if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) { // is in subview - EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14); - EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14); - EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14); + EXPECT_NEAR(val0, 65536.0, 1e-14 * 65536.0); + EXPECT_NEAR(val1, 256.0, 1e-14 * 256.0); + EXPECT_NEAR(val2, 1.0, 1e-14 * 1.0); } else { // is outside of subview EXPECT_NEAR(val0, NumberType(1), 1e-14) @@ -338,7 +336,7 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, scatterSize = n; auto policy = Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n); - Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + Kokkos::parallel_for("scatter_view_test: Prod", policy, *this); } KOKKOS_INLINE_FUNCTION @@ -362,12 +360,9 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); - EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14) - << "Data differs at index " << i; + EXPECT_NEAR(val0, 4.0, 1e-14 * 4.0) << "Data differs at index " << i; + EXPECT_NEAR(val1, 2.0, 1e-14 * 2.0) << "Data differs at index " << i; + EXPECT_NEAR(val2, 1.0, 1e-14 * 1.0) << "Data differs at index " << i; } } @@ -385,12 +380,9 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, auto val2 = host_view(i, 2); if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) { // is in subview - EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14) - << "Data differs at index " << i; + EXPECT_NEAR(val0, 4.0, 1e-14 * 4.0) << "Data differs at index " << i; + EXPECT_NEAR(val1, 2.0, 1e-14 * 2.0) << "Data differs at index " << i; + EXPECT_NEAR(val2, 1.0, 1e-14 * 1.0) << "Data differs at index " << i; } else { // is outside of subview EXPECT_NEAR(val0, NumberType(999999), 1e-14) @@ -443,7 +435,7 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, void run_parallel(int n) { scatterSize = n; Kokkos::RangePolicy<typename DeviceType::execution_space, int> policy(0, n); - Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + Kokkos::parallel_for("scatter_view_test: Prod", policy, *this); } KOKKOS_INLINE_FUNCTION @@ -467,12 +459,9 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); - EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14) - << "Data differs at index " << i; + EXPECT_NEAR(val0, 16.0, 1e-14 * 16.0) << "Data differs at index " << i; + EXPECT_NEAR(val1, 8.0, 1e-14 * 8.0) << "Data differs at index " << i; + EXPECT_NEAR(val2, 4.0, 1e-14 * 4.0) << "Data differs at index " << i; } } @@ -490,12 +479,9 @@ struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution, auto val2 = host_view(i, 2); if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) { // is in subview - EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14) - << "Data differs at index " << i; - EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14) - << "Data differs at index " << i; + EXPECT_NEAR(val0, 16.0, 1e-14 * 16.0) << "Data differs at index " << i; + EXPECT_NEAR(val1, 8.0, 1e-14 * 8.0) << "Data differs at index " << i; + EXPECT_NEAR(val2, 4.0, 1e-14 * 4.0) << "Data differs at index " << i; } else { // is outside of subview EXPECT_NEAR(val0, NumberType(0), 1e-14) @@ -888,7 +874,7 @@ TEST(TEST_CATEGORY, scatterview_devicetype) { #else using device_execution_space = Kokkos::Experimental::HIP; using device_memory_space = Kokkos::Experimental::HIPSpace; - using host_accessible_space = Kokkos::Experimental::HIPHostPinnedSpace; + using host_accessible_space = Kokkos::Experimental::HIPManagedSpace; #endif if (std::is_same<TEST_EXECSPACE, device_execution_space>::value) { using device_device_type = diff --git a/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp index 1550ca7b5be4ac9514e7488381883eb6a98c37c2..3f5f97d6bbae887f7e6084cb6f5cdf5705ed3299 100644 --- a/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp +++ b/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp @@ -87,8 +87,7 @@ struct TestInsert { void init(value_type &failed_count) const { failed_count = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type &failed_count, - const volatile value_type &count) const { + void join(value_type &failed_count, const value_type &count) const { failed_count += count; } @@ -156,9 +155,7 @@ struct TestFind { static void init(value_type &dst) { dst = 0; } KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &dst, const volatile value_type &src) { - dst += src; - } + static void join(value_type &dst, const value_type &src) { dst += src; } KOKKOS_INLINE_FUNCTION void operator()(typename execution_space::size_type i, @@ -337,6 +334,9 @@ TEST(TEST_CATEGORY, UnorderedMap_clear_zero_size) { m.insert(5); m.insert(7); ASSERT_EQ(4u, m.size()); + m.rehash(0); + ASSERT_EQ(128u, m.capacity()); + ASSERT_EQ(4u, m.size()); m.clear(); ASSERT_EQ(0u, m.size()); diff --git a/packages/kokkos/containers/unit_tests/TestVector.hpp b/packages/kokkos/containers/unit_tests/TestVector.hpp index c093c7b0c9d8b897c2cec9d6f7e8330501f021f2..efb21fe131e4d90abc1551e9340b814659986115 100644 --- a/packages/kokkos/containers/unit_tests/TestVector.hpp +++ b/packages/kokkos/containers/unit_tests/TestVector.hpp @@ -298,6 +298,19 @@ TEST(TEST_CATEGORY, vector_insert) { Impl::test_vector_insert<int, TEST_EXECSPACE>(3057); } +// The particular scenario below triggered a bug where empty modified_flags +// would cause resize in push_back to be executed on the device overwriting the +// values that were stored on the host previously. +TEST(TEST_CATEGORY, vector_push_back_default_exec) { + Kokkos::vector<int, TEST_EXECSPACE> V; + V.clear(); + V.push_back(4); + ASSERT_EQ(V[0], 4); + V.push_back(3); + ASSERT_EQ(V[1], 3); + ASSERT_EQ(V[0], 4); +} + } // namespace Test #endif // KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/packages/kokkos/containers/unit_tests/TestWithoutInitializing.hpp b/packages/kokkos/containers/unit_tests/TestWithoutInitializing.hpp index feae32179b2f1dce72e0346146338658f638e2c7..174773f19886ad48779756859cce504ffb7b9221 100644 --- a/packages/kokkos/containers/unit_tests/TestWithoutInitializing.hpp +++ b/packages/kokkos/containers/unit_tests/TestWithoutInitializing.hpp @@ -45,7 +45,9 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> #include <Kokkos_DualView.hpp> +#include <Kokkos_DynamicView.hpp> #include <Kokkos_DynRankView.hpp> +#include <Kokkos_OffsetView.hpp> #include <Kokkos_ScatterView.hpp> #include <../../core/unit_test/tools/include/ToolTestingUtilities.hpp> @@ -59,7 +61,12 @@ TEST(TEST_CATEGORY, resize_realloc_no_init_dualview) { auto success = validate_absence( [&]() { Kokkos::resize(Kokkos::WithoutInitializing, bla, 5, 6, 7, 9); + EXPECT_EQ(bla.template view<TEST_EXECSPACE>().label(), "bla"); Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 8, 8, 8); + EXPECT_EQ(bla.template view<TEST_EXECSPACE>().label(), "bla"); + Kokkos::realloc(Kokkos::view_alloc(Kokkos::WithoutInitializing), bla, 5, + 6, 7, 8); + EXPECT_EQ(bla.template view<TEST_EXECSPACE>().label(), "bla"); }, [&](BeginParallelForEvent event) { if (event.descriptor().find("initialization") != std::string::npos) @@ -85,7 +92,9 @@ TEST(TEST_CATEGORY, resize_realloc_no_alloc_dualview) { auto success = validate_absence( [&]() { Kokkos::resize(bla, 8, 7, 6, 5); + EXPECT_EQ(bla.template view<TEST_EXECSPACE>().label(), "bla"); Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 7, 6, 5); + EXPECT_EQ(bla.template view<TEST_EXECSPACE>().label(), "bla"); }, [&](BeginParallelForEvent) { return MatchDiagnostic{true, {"Found begin event"}}; @@ -103,6 +112,74 @@ TEST(TEST_CATEGORY, resize_realloc_no_alloc_dualview) { listen_tool_events(Config::DisableAll()); } +TEST(TEST_CATEGORY, resize_exec_space_dualview) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences(), + Config::EnableKernels()); + Kokkos::DualView<int*** * [1][2][3][4], TEST_EXECSPACE> bla("bla", 8, 7, 6, + 5); + + auto success = validate_absence( + [&]() { + Kokkos::resize( + Kokkos::view_alloc(TEST_EXECSPACE{}, Kokkos::WithoutInitializing), + bla, 5, 6, 7, 8); + EXPECT_EQ(bla.template view<TEST_EXECSPACE>().label(), "bla"); + }, + [&](BeginFenceEvent event) { + if (event.descriptor().find("Kokkos::resize(View)") != + std::string::npos) + return MatchDiagnostic{true, {"Found begin event"}}; + return MatchDiagnostic{false}; + }, + [&](EndFenceEvent event) { + if (event.descriptor().find("Kokkos::resize(View)") != + std::string::npos) + return MatchDiagnostic{true, {"Found end event"}}; + return MatchDiagnostic{false}; + }, + [&](BeginParallelForEvent event) { + if (event.descriptor().find("initialization") != std::string::npos) + return MatchDiagnostic{true, {"Found begin event"}}; + return MatchDiagnostic{false}; + }, + [&](EndParallelForEvent event) { + if (event.descriptor().find("initialization") != std::string::npos) + return MatchDiagnostic{true, {"Found end event"}}; + return MatchDiagnostic{false}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, realloc_exec_space_dualview) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<typename TEST_EXECSPACE::memory_space, + Kokkos::CudaUVMSpace>::value) + GTEST_SKIP() << "skipping since CudaUVMSpace requires additional fences"; +#endif + + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences()); + using view_type = Kokkos::DualView<int*, TEST_EXECSPACE>; + view_type v(Kokkos::view_alloc(TEST_EXECSPACE{}, "bla"), 8); + + auto success = validate_absence( + [&]() { + Kokkos::realloc(Kokkos::view_alloc(TEST_EXECSPACE{}), v, 8); + EXPECT_EQ(v.template view<TEST_EXECSPACE>().label(), "bla"); + }, + [&](BeginFenceEvent event) { + if ((event.descriptor().find("Debug Only Check for Execution Error") != + std::string::npos) || + (event.descriptor().find("HostSpace fence") != std::string::npos)) + return MatchDiagnostic{false}; + return MatchDiagnostic{true, {"Found fence event!"}}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + TEST(TEST_CATEGORY, resize_realloc_no_init_dynrankview) { using namespace Kokkos::Test::Tools; listen_tool_events(Config::DisableAll(), Config::EnableKernels()); @@ -111,7 +188,51 @@ TEST(TEST_CATEGORY, resize_realloc_no_init_dynrankview) { auto success = validate_absence( [&]() { Kokkos::resize(Kokkos::WithoutInitializing, bla, 5, 6, 7, 9); + EXPECT_EQ(bla.label(), "bla"); Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 8, 8, 8); + EXPECT_EQ(bla.label(), "bla"); + Kokkos::realloc(Kokkos::view_alloc(Kokkos::WithoutInitializing), bla, 5, + 6, 7, 8); + EXPECT_EQ(bla.label(), "bla"); + }, + [&](BeginParallelForEvent event) { + if (event.descriptor().find("initialization") != std::string::npos) + return MatchDiagnostic{true, {"Found begin event"}}; + return MatchDiagnostic{false}; + }, + [&](EndParallelForEvent event) { + if (event.descriptor().find("initialization") != std::string::npos) + return MatchDiagnostic{true, {"Found end event"}}; + return MatchDiagnostic{false}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, resize_exec_space_dynrankview) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences(), + Config::EnableKernels()); + Kokkos::DynRankView<int, TEST_EXECSPACE> bla("bla", 8, 7, 6, 5); + + auto success = validate_absence( + [&]() { + Kokkos::resize( + Kokkos::view_alloc(TEST_EXECSPACE{}, Kokkos::WithoutInitializing), + bla, 5, 6, 7, 8); + EXPECT_EQ(bla.label(), "bla"); + }, + [&](BeginFenceEvent event) { + if (event.descriptor().find("Kokkos::resize(View)") != + std::string::npos) + return MatchDiagnostic{true, {"Found begin event"}}; + return MatchDiagnostic{false}; + }, + [&](EndFenceEvent event) { + if (event.descriptor().find("Kokkos::resize(View)") != + std::string::npos) + return MatchDiagnostic{true, {"Found end event"}}; + return MatchDiagnostic{false}; }, [&](BeginParallelForEvent event) { if (event.descriptor().find("initialization") != std::string::npos) @@ -127,6 +248,45 @@ TEST(TEST_CATEGORY, resize_realloc_no_init_dynrankview) { listen_tool_events(Config::DisableAll()); } +TEST(TEST_CATEGORY, realloc_exec_space_dynrankview) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<typename TEST_EXECSPACE::memory_space, + Kokkos::CudaUVMSpace>::value) + GTEST_SKIP() << "skipping since CudaUVMSpace requires additional fences"; +#endif +// FIXME_THREADS The Threads backend fences every parallel_for +#ifdef KOKKOS_ENABLE_THREADS + if (std::is_same<TEST_EXECSPACE, Kokkos::Threads>::value) + GTEST_SKIP() << "skipping since the Threads backend isn't asynchronous"; +#endif + + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences()); + using view_type = Kokkos::DynRankView<int, TEST_EXECSPACE>; + view_type outer_view, outer_view2; + + auto success = validate_absence( + [&]() { + view_type inner_view(Kokkos::view_alloc(TEST_EXECSPACE{}, "bla"), 8); + // Avoid testing the destructor + outer_view = inner_view; + Kokkos::realloc( + Kokkos::view_alloc(Kokkos::WithoutInitializing, TEST_EXECSPACE{}), + inner_view, 10); + EXPECT_EQ(inner_view.label(), "bla"); + outer_view2 = inner_view; + }, + [&](BeginFenceEvent event) { + if ((event.descriptor().find("Debug Only Check for Execution Error") != + std::string::npos) || + (event.descriptor().find("HostSpace fence") != std::string::npos)) + return MatchDiagnostic{false}; + return MatchDiagnostic{true, {"Found fence event!"}}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + TEST(TEST_CATEGORY, resize_realloc_no_init_scatterview) { using namespace Kokkos::Test::Tools; listen_tool_events(Config::DisableAll(), Config::EnableKernels()); @@ -137,7 +297,12 @@ TEST(TEST_CATEGORY, resize_realloc_no_init_scatterview) { auto success = validate_absence( [&]() { Kokkos::resize(Kokkos::WithoutInitializing, bla, 4, 5, 6, 8); + EXPECT_EQ(bla.subview().label(), "bla"); Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 8, 8, 8); + EXPECT_EQ(bla.subview().label(), "bla"); + Kokkos::realloc(Kokkos::view_alloc(Kokkos::WithoutInitializing), bla, 5, + 6, 7, 8); + EXPECT_EQ(bla.subview().label(), "bla"); }, [&](BeginParallelForEvent event) { if (event.descriptor().find("initialization") != std::string::npos) @@ -164,7 +329,9 @@ TEST(TEST_CATEGORY, resize_realloc_no_alloc_scatterview) { auto success = validate_absence( [&]() { Kokkos::resize(bla, 7, 6, 5, 4); + EXPECT_EQ(bla.subview().label(), "bla"); Kokkos::realloc(Kokkos::WithoutInitializing, bla, 7, 6, 5, 4); + EXPECT_EQ(bla.subview().label(), "bla"); }, [&](BeginParallelForEvent) { return MatchDiagnostic{true, {"Found begin event"}}; @@ -181,3 +348,388 @@ TEST(TEST_CATEGORY, resize_realloc_no_alloc_scatterview) { ASSERT_TRUE(success); listen_tool_events(Config::DisableAll()); } + +TEST(TEST_CATEGORY, resize_exec_space_scatterview) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences(), + Config::EnableKernels()); + Kokkos::Experimental::ScatterView< + int*** * [1][2][3], typename TEST_EXECSPACE::array_layout, TEST_EXECSPACE> + bla("bla", 7, 6, 5, 4); + + auto success = validate_absence( + [&]() { + Kokkos::resize( + Kokkos::view_alloc(TEST_EXECSPACE{}, Kokkos::WithoutInitializing), + bla, 5, 6, 7, 8); + EXPECT_EQ(bla.subview().label(), "bla"); + }, + [&](BeginFenceEvent event) { + if (event.descriptor().find("Kokkos::resize(View)") != + std::string::npos) + return MatchDiagnostic{true, {"Found begin event"}}; + return MatchDiagnostic{false}; + }, + [&](EndFenceEvent event) { + if (event.descriptor().find("Kokkos::resize(View)") != + std::string::npos) + return MatchDiagnostic{true, {"Found end event"}}; + return MatchDiagnostic{false}; + }, + [&](BeginParallelForEvent event) { + if (event.descriptor().find("initialization") != std::string::npos) + return MatchDiagnostic{true, {"Found begin event"}}; + return MatchDiagnostic{false}; + }, + [&](EndParallelForEvent event) { + if (event.descriptor().find("initialization") != std::string::npos) + return MatchDiagnostic{true, {"Found end event"}}; + return MatchDiagnostic{false}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, realloc_exec_space_scatterview) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<typename TEST_EXECSPACE::memory_space, + Kokkos::CudaUVMSpace>::value) + GTEST_SKIP() << "skipping since CudaUVMSpace requires additional fences"; +#endif +// FIXME_THREADS The Threads backend fences every parallel_for +#ifdef KOKKOS_ENABLE_THREADS + if (std::is_same<typename TEST_EXECSPACE, Kokkos::Threads>::value) + GTEST_SKIP() << "skipping since the Threads backend isn't asynchronous"; +#endif + + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences()); + using view_type = Kokkos::Experimental::ScatterView< + int*, typename TEST_EXECSPACE::array_layout, TEST_EXECSPACE>; + view_type outer_view, outer_view2; + + auto success = validate_absence( + [&]() { + view_type inner_view(Kokkos::view_alloc(TEST_EXECSPACE{}, "bla"), 8); + // Avoid testing the destructor + outer_view = inner_view; + Kokkos::realloc( + Kokkos::view_alloc(Kokkos::WithoutInitializing, TEST_EXECSPACE{}), + inner_view, 10); + EXPECT_EQ(inner_view.subview().label(), "bla"); + outer_view2 = inner_view; + Kokkos::realloc(Kokkos::view_alloc(TEST_EXECSPACE{}), inner_view, 10); + EXPECT_EQ(inner_view.subview().label(), "bla"); + }, + [&](BeginFenceEvent event) { + if ((event.descriptor().find("Debug Only Check for Execution Error") != + std::string::npos) || + (event.descriptor().find("HostSpace fence") != std::string::npos)) + return MatchDiagnostic{false}; + return MatchDiagnostic{true, {"Found fence event!"}}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, create_mirror_no_init_dynrankview) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels()); + Kokkos::DynRankView<int, TEST_EXECSPACE> device_view("device view", 10); + Kokkos::DynRankView<int, Kokkos::HostSpace> host_view("host view", 10); + + auto success = validate_absence( + [&]() { + auto mirror_device = + Kokkos::create_mirror(Kokkos::WithoutInitializing, device_view); + auto mirror_host = Kokkos::create_mirror(Kokkos::WithoutInitializing, + TEST_EXECSPACE{}, host_view); + auto mirror_device_view = Kokkos::create_mirror_view( + Kokkos::WithoutInitializing, device_view); + auto mirror_host_view = Kokkos::create_mirror_view( + Kokkos::WithoutInitializing, TEST_EXECSPACE{}, host_view); + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found begin event"}}; + }, + [&](EndParallelForEvent) { + return MatchDiagnostic{true, {"Found end event"}}; + }); + ASSERT_TRUE(success); +} + +TEST(TEST_CATEGORY, create_mirror_no_init_dynrankview_viewctor) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels()); + Kokkos::DynRankView<int, Kokkos::DefaultExecutionSpace> device_view( + "device view", 10); + Kokkos::DynRankView<int, Kokkos::HostSpace> host_view("host view", 10); + + auto success = validate_absence( + [&]() { + auto mirror_device = Kokkos::create_mirror( + Kokkos::view_alloc(Kokkos::WithoutInitializing), device_view); + auto mirror_host = Kokkos::create_mirror( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + Kokkos::DefaultExecutionSpace{}), + host_view); + auto mirror_device_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::WithoutInitializing), device_view); + auto mirror_host_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + Kokkos::DefaultExecutionSpace{}), + host_view); + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found begin event"}}; + }, + [&](EndParallelForEvent) { + return MatchDiagnostic{true, {"Found end event"}}; + }); + ASSERT_TRUE(success); +} + +TEST(TEST_CATEGORY, create_mirror_view_and_copy_dynrankview) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<typename TEST_EXECSPACE::memory_space, + Kokkos::CudaUVMSpace>::value) + return; +#endif + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels(), + Config::EnableFences()); + + Kokkos::DynRankView<int, Kokkos::HostSpace> host_view("host view", 10); + decltype(Kokkos::create_mirror_view_and_copy(TEST_EXECSPACE{}, + host_view)) device_view; + + auto success = validate_absence( + [&]() { + auto mirror_device = Kokkos::create_mirror_view_and_copy( + Kokkos::view_alloc(TEST_EXECSPACE{}, + typename TEST_EXECSPACE::memory_space{}), + host_view); + // Avoid fences for deallocation when mirror_device goes out of scope. + device_view = mirror_device; + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found parallel_for event"}}; + }, + [&](BeginFenceEvent) { + return MatchDiagnostic{true, {"Found fence event"}}; + }); + ASSERT_TRUE(success); +} + +TEST(TEST_CATEGORY, create_mirror_no_init_offsetview) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels()); + Kokkos::Experimental::OffsetView<int*, TEST_EXECSPACE> device_view( + "device view", {0, 10}); + Kokkos::Experimental::OffsetView<int*, Kokkos::HostSpace> host_view( + "host view", {0, 10}); + + auto success = validate_absence( + [&]() { + auto mirror_device = + Kokkos::create_mirror(Kokkos::WithoutInitializing, device_view); + auto mirror_host = Kokkos::create_mirror(Kokkos::WithoutInitializing, + TEST_EXECSPACE{}, host_view); + auto mirror_device_view = Kokkos::create_mirror_view( + Kokkos::WithoutInitializing, device_view); + auto mirror_host_view = Kokkos::create_mirror_view( + Kokkos::WithoutInitializing, TEST_EXECSPACE{}, host_view); + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found begin event"}}; + }, + [&](EndParallelForEvent) { + return MatchDiagnostic{true, {"Found end event"}}; + }); + ASSERT_TRUE(success); +} + +TEST(TEST_CATEGORY, create_mirror_no_init_offsetview_view_ctor) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels()); + Kokkos::Experimental::OffsetView<int*, Kokkos::DefaultExecutionSpace> + device_view("device view", {0, 10}); + Kokkos::Experimental::OffsetView<int*, Kokkos::HostSpace> host_view( + "host view", {0, 10}); + + auto success = validate_absence( + [&]() { + auto mirror_device = Kokkos::create_mirror( + Kokkos::view_alloc(Kokkos::WithoutInitializing), device_view); + auto mirror_host = Kokkos::create_mirror( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + Kokkos::DefaultExecutionSpace{}), + host_view); + auto mirror_device_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::WithoutInitializing), device_view); + auto mirror_host_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + Kokkos::DefaultExecutionSpace{}), + host_view); + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found begin event"}}; + }, + [&](EndParallelForEvent) { + return MatchDiagnostic{true, {"Found end event"}}; + }); + ASSERT_TRUE(success); +} + +TEST(TEST_CATEGORY, create_mirror_view_and_copy_offsetview) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<typename TEST_EXECSPACE::memory_space, + Kokkos::CudaUVMSpace>::value) + return; +#endif + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels(), + Config::EnableFences()); + + Kokkos::Experimental::OffsetView<int*, Kokkos::HostSpace> host_view( + "host view", {0, 10}); + decltype(Kokkos::create_mirror_view_and_copy(TEST_EXECSPACE{}, + host_view)) device_view; + + auto success = validate_absence( + [&]() { + auto mirror_device = Kokkos::create_mirror_view_and_copy( + Kokkos::view_alloc(TEST_EXECSPACE{}, + typename TEST_EXECSPACE::memory_space{}), + host_view); + // Avoid fences for deallocation when mirror_device goes out of scope. + device_view = mirror_device; + auto mirror_device_mirror = Kokkos::create_mirror_view_and_copy( + Kokkos::view_alloc(TEST_EXECSPACE{}, + typename TEST_EXECSPACE::memory_space{}), + mirror_device); + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found parallel_for event"}}; + }, + [&](BeginFenceEvent) { + return MatchDiagnostic{true, {"Found fence event"}}; + }); + ASSERT_TRUE(success); +} + +// FIXME OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET +TEST(TEST_CATEGORY, create_mirror_no_init_dynamicview) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels()); + Kokkos::Experimental::DynamicView<int*, TEST_EXECSPACE> device_view( + "device view", 2, 10); + Kokkos::Experimental::DynamicView<int*, Kokkos::HostSpace> host_view( + "host view", 2, 10); + + auto success = validate_absence( + [&]() { + auto mirror_device = + Kokkos::create_mirror(Kokkos::WithoutInitializing, device_view); + auto mirror_host = Kokkos::create_mirror(Kokkos::WithoutInitializing, + TEST_EXECSPACE{}, host_view); + auto mirror_device_view = Kokkos::create_mirror_view( + Kokkos::WithoutInitializing, device_view); + auto mirror_host_view = Kokkos::create_mirror_view( + Kokkos::WithoutInitializing, TEST_EXECSPACE{}, host_view); + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found begin event"}}; + }, + [&](EndParallelForEvent) { + return MatchDiagnostic{true, {"Found end event"}}; + }); + ASSERT_TRUE(success); +} + +TEST(TEST_CATEGORY, create_mirror_view_and_copy_dynamicview) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<typename TEST_EXECSPACE::memory_space, + Kokkos::CudaUVMSpace>::value) + return; +#endif + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels(), + Config::EnableFences()); + + Kokkos::Experimental::DynamicView<int*, Kokkos::HostSpace> host_view( + "host view", 2, 10); + decltype(Kokkos::create_mirror_view_and_copy(TEST_EXECSPACE{}, + host_view)) device_view; + + auto success = validate_absence( + [&]() { + auto mirror_device = Kokkos::create_mirror_view_and_copy( + Kokkos::view_alloc(TEST_EXECSPACE{}, + typename TEST_EXECSPACE::memory_space{}), + host_view); + // Avoid fences for deallocation when mirror_device goes out of scope. + device_view = mirror_device; + auto mirror_device_mirror = Kokkos::create_mirror_view_and_copy( + Kokkos::view_alloc(TEST_EXECSPACE{}, + typename TEST_EXECSPACE::memory_space{}), + mirror_device); + }, + [&](BeginFenceEvent event) { + if (event.descriptor().find("DynamicView::resize_serial: Fence after " + "copying chunks to the device") != + std::string::npos) + return MatchDiagnostic{false}; + return MatchDiagnostic{true, {"Found fence event"}}; + }, + [&](EndFenceEvent) { return MatchDiagnostic{false}; }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found parallel_for event"}}; + }); + ASSERT_TRUE(success); +} +#endif + +// FIXME OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET +TEST(TEST_CATEGORY, create_mirror_no_init_dynamicview_view_ctor) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels()); + Kokkos::Experimental::DynamicView<int*, Kokkos::DefaultExecutionSpace> + device_view("device view", 2, 10); + Kokkos::Experimental::DynamicView<int*, Kokkos::HostSpace> host_view( + "host view", 2, 10); + + auto success = validate_absence( + [&]() { + auto mirror_device = Kokkos::create_mirror( + Kokkos::view_alloc(Kokkos::WithoutInitializing), device_view); + auto mirror_host = Kokkos::create_mirror( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + Kokkos::DefaultExecutionSpace{}), + host_view); + auto mirror_device_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::WithoutInitializing), device_view); + auto mirror_host_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + Kokkos::DefaultExecutionSpace{}), + host_view); + }, + [&](BeginFenceEvent event) { + if (event.descriptor().find("DynamicView::resize_serial: Fence after " + "copying chunks to the device") != + std::string::npos) + return MatchDiagnostic{false}; + return MatchDiagnostic{true, {"Found fence event"}}; + }, + [&](EndFenceEvent) { return MatchDiagnostic{false}; }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found begin event"}}; + }, + [&](EndParallelForEvent) { + return MatchDiagnostic{true, {"Found end event"}}; + }); + ASSERT_TRUE(success); +} +#endif diff --git a/packages/kokkos/core/perf_test/CMakeLists.txt b/packages/kokkos/core/perf_test/CMakeLists.txt index a7c57a94346d74db97c8c320e0f3669bb2cc68cc..7ba97dbfbb9c2c20bf7457534cdcf468c91772d9 100644 --- a/packages/kokkos/core/perf_test/CMakeLists.txt +++ b/packages/kokkos/core/perf_test/CMakeLists.txt @@ -96,11 +96,14 @@ IF(NOT KOKKOS_ENABLE_CUDA OR KOKKOS_ENABLE_CUDA_LAMBDA) ) ENDIF() +# FIXME_NVHPC +IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_Mempool SOURCES test_mempool.cpp CATEGORIES PERFORMANCE ) +ENDIF() IF(NOT Kokkos_ENABLE_OPENMPTARGET) # FIXME OPENMPTARGET needs tasking diff --git a/packages/kokkos/core/perf_test/PerfTestBlasKernels.hpp b/packages/kokkos/core/perf_test/PerfTestBlasKernels.hpp index e133dafa368d562a148caf2e3b8adc4ff4a8b77d..5be29e65d724e28b5d4990ff905f10bfe25f2dc7 100644 --- a/packages/kokkos/core/perf_test/PerfTestBlasKernels.hpp +++ b/packages/kokkos/core/perf_test/PerfTestBlasKernels.hpp @@ -72,8 +72,7 @@ struct Dot { void operator()(int i, value_type& update) const { update += X[i] * Y[i]; } KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& source) { + static void join(value_type& update, const value_type& source) { update += source; } @@ -105,8 +104,7 @@ struct DotSingle { } KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& source) { + static void join(value_type& update, const value_type& source) { update += source; } diff --git a/packages/kokkos/core/perf_test/PerfTestGramSchmidt.cpp b/packages/kokkos/core/perf_test/PerfTestGramSchmidt.cpp index b534c32c52c691f4c65c5442d89a9381516391f1..31a01184c17cf167def6e614fcb68a7050d58f79 100644 --- a/packages/kokkos/core/perf_test/PerfTestGramSchmidt.cpp +++ b/packages/kokkos/core/perf_test/PerfTestGramSchmidt.cpp @@ -69,7 +69,7 @@ struct InvNorm2 : public Kokkos::DotSingle<VectorView> { KOKKOS_INLINE_FUNCTION void final(value_type& result) const { - result = Kokkos::Experimental::sqrt(result); + result = Kokkos::sqrt(result); Rjj() = result; inv() = (0 < result) ? 1.0 / result : 0; } diff --git a/packages/kokkos/core/src/CMakeLists.txt b/packages/kokkos/core/src/CMakeLists.txt index 793e07a84153f29334655c0284602b0677f7feb3..684ea353ae35f024211eef1c1d4642fbbbe6a2ff 100644 --- a/packages/kokkos/core/src/CMakeLists.txt +++ b/packages/kokkos/core/src/CMakeLists.txt @@ -1,17 +1,19 @@ -#I have to leave these here for tribits KOKKOS_INCLUDE_DIRECTORIES( ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${KOKKOS_TOP_BUILD_DIR} ) +IF (Kokkos_ENABLE_IMPL_DESUL_ATOMICS AND NOT desul_FOUND) + KOKKOS_INCLUDE_DIRECTORIES( + ${CMAKE_CURRENT_SOURCE_DIR}/../../tpls/desul/include + ) +ENDIF() + INSTALL (DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" DESTINATION ${KOKKOS_HEADER_DIR} FILES_MATCHING - PATTERN desul/src EXCLUDE - PATTERN "*.inc" - PATTERN "*.inc_*" PATTERN "*.hpp" PATTERN "*.h" ) @@ -37,6 +39,11 @@ IF (KOKKOS_ENABLE_OPENMPTARGET) APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/OpenMPTarget/*.hpp) ENDIF() +IF (KOKKOS_ENABLE_OPENACC) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/OpenACC/*.cpp) + APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/OpenACC/*.hpp) +ENDIF() + IF (KOKKOS_ENABLE_THREADS) APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Threads/*.cpp) APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/Threads/*.hpp) @@ -57,10 +64,8 @@ IF (NOT KOKKOS_ENABLE_MEMKIND) ENDIF() IF (KOKKOS_ENABLE_SERIAL) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Serial/*.cpp) APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/Serial/*.hpp) -ELSE() - LIST(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/Kokkos_Serial.cpp) - LIST(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/Kokkos_Serial_task.cpp) ENDIF() IF (KOKKOS_ENABLE_SYCL) @@ -68,12 +73,25 @@ IF (KOKKOS_ENABLE_SYCL) APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/SYCL/*.hpp) ENDIF() -IF (KOKKOS_ENABLE_IMPL_DESUL_ATOMICS) - APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/desul/src/*.cpp) - APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/desul/*.hpp) - APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/desul/*/*.hpp) - APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/desul/*/*/*.hpp) - APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/desul/*/*/*.inc) +IF (Kokkos_ENABLE_IMPL_DESUL_ATOMICS AND NOT desul_FOUND) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../../tpls/desul/src/*.cpp) + APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../../tpls/desul/include/desul/*.hpp) + APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../../tpls/desul/include/desul/*/*.hpp) + APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../../tpls/desul/include/desul/*/*/*.hpp) + APPEND_GLOB(KOKKOS_CORE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../../tpls/desul/include/*/*/*.inc*) + + INSTALL (DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}/../../tpls/desul/include/desul" + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING + PATTERN "*.inc" + PATTERN "*.inc_*" + PATTERN "*.hpp" + ) + MESSAGE(STATUS "Using internal desul_atomics copy") +ELSE() + MESSAGE(STATUS "Using external desul_atomics install found at:") + MESSAGE(STATUS " " ${desul_DIR}) ENDIF() @@ -89,6 +107,11 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscore ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ) +IF (Kokkos_ENABLE_IMPL_DESUL_ATOMICS AND NOT desul_FOUND) + KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscore + ${CMAKE_CURRENT_SOURCE_DIR}/../../tpls/desul/include + ) +ENDIF() KOKKOS_LINK_TPL(kokkoscore PUBLIC HWLOC) KOKKOS_LINK_TPL(kokkoscore PUBLIC MEMKIND) @@ -108,10 +131,13 @@ KOKKOS_LINK_TPL(kokkoscore PUBLIC ROCM) # libatomic # XL requires libatomic even for 64 bit CAS, most others only for 128 # I (CT) had removed 128bit CAS from desul to not need libatomic. -IF (KOKKOS_ENABLE_IMPL_DESUL_ATOMICS AND +IF (Kokkos_ENABLE_IMPL_DESUL_ATOMICS AND (KOKKOS_ENABLE_OPENMPTARGET OR (CMAKE_CXX_COMPILER_ID STREQUAL XLClang))) target_link_libraries(kokkoscore PUBLIC atomic) ENDIF() +IF (Kokkos_ENABLE_IMPL_DESUL_ATOMICS AND desul_FOUND) + target_link_libraries(kokkoscore PUBLIC desul_atomics) +ENDIF() KOKKOS_LINK_TPL(kokkoscore PUBLIC LIBQUADMATH) diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 31601944ba5a03979aecc9431919e5cbd819ba4b..b2161bc1fa50000ca99d85fcab28bca0947963cb 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #ifdef KOKKOS_ENABLE_CUDA @@ -52,7 +56,6 @@ #include <cstdlib> #include <iostream> #include <sstream> -#include <stdexcept> #include <algorithm> #include <atomic> @@ -199,14 +202,22 @@ void *CudaSpace::allocate(const size_t arg_alloc_size) const { return allocate("[unlabeled]", arg_alloc_size); } +void *CudaSpace::allocate(const Cuda &exec_space, const char *arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return impl_allocate(exec_space, arg_label, arg_alloc_size, arg_logical_size); +} void *CudaSpace::allocate(const char *arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { return impl_allocate(arg_label, arg_alloc_size, arg_logical_size); } -void *CudaSpace::impl_allocate( - const char *arg_label, const size_t arg_alloc_size, - const size_t arg_logical_size, - const Kokkos::Tools::SpaceHandle arg_handle) const { + +namespace { +void *impl_allocate_common(const Cuda &exec_space, const char *arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle, + bool exec_space_provided) { void *ptr = nullptr; #ifndef CUDART_VERSION @@ -214,12 +225,20 @@ void *CudaSpace::impl_allocate( #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) cudaError_t error_code; if (arg_alloc_size >= memory_threshold_g) { - error_code = cudaMallocAsync(&ptr, arg_alloc_size, 0); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaDeviceSynchronize()); + if (exec_space_provided) { + cudaStream_t stream = exec_space.cuda_stream(); + error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamSynchronize(stream)); + } else { + error_code = cudaMallocAsync(&ptr, arg_alloc_size, 0); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaDeviceSynchronize()); + } } else { error_code = cudaMalloc(&ptr, arg_alloc_size); } #else + (void)exec_space; + (void)exec_space_provided; auto error_code = cudaMalloc(&ptr, arg_alloc_size); #endif if (error_code != cudaSuccess) { // TODO tag as unlikely branch @@ -239,6 +258,23 @@ void *CudaSpace::impl_allocate( } return ptr; } +} // namespace + +void *CudaSpace::impl_allocate( + const char *arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + return impl_allocate_common(Kokkos::Cuda{}, arg_label, arg_alloc_size, + arg_logical_size, arg_handle, false); +} + +void *CudaSpace::impl_allocate( + const Cuda &exec_space, const char *arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + return impl_allocate_common(exec_space, arg_label, arg_alloc_size, + arg_logical_size, arg_handle, true); +} void *CudaUVMSpace::allocate(const size_t arg_alloc_size) const { return allocate("[unlabeled]", arg_alloc_size); @@ -493,6 +529,17 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::~SharedAllocationRecord() { alloc_size, (alloc_size - sizeof(SharedAllocationHeader))); } +void SharedAllocationRecord<Kokkos::CudaSpace, void>::deep_copy_header_no_exec( + void *ptr, const void *header) { + Kokkos::Cuda exec; + Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(exec, ptr, header, + sizeof(SharedAllocationHeader)); + exec.fence( + "SharedAllocationRecord<Kokkos::CudaSpace, " + "void>::SharedAllocationRecord(): fence after copying header from " + "HostSpace"); +} + SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::~SharedAllocationRecord() { m_space.deallocate(m_label.c_str(), SharedAllocationRecord<void, void>::m_alloc_ptr, @@ -547,6 +594,33 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord( "HostSpace"); } +SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord( + const Kokkos::Cuda &arg_exec_space, const Kokkos::CudaSpace &arg_space, + const std::string &arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord<void, void>::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record, +#endif + Impl::checked_allocation_with_header(arg_exec_space, arg_space, + arg_label, arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_tex_obj(0), + m_space(arg_space) { + + SharedAllocationHeader header; + + this->base_t::_fill_host_accessible_header_info(header, arg_label); + + // Copy to device memory + Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(arg_exec_space, + RecordBase::m_alloc_ptr, &header, + sizeof(SharedAllocationHeader)); +} + SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::SharedAllocationRecord( const Kokkos::CudaUVMSpace &arg_space, const std::string &arg_label, const size_t arg_alloc_size, diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp index 8a6c0433c8d848633457c98845b7758e63fae52d..8e8dff67763f495771377a2cff5b412fd233a4dc 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp @@ -464,20 +464,19 @@ inline __device__ int __stronger_order_simt_(int a, int b) { base */ -#define DO__atomic_load_simt_(bytes, bits) \ - template <class type, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ - void __device__ __atomic_load_simt_(const type *ptr, type *ret, \ - int memorder) { \ - int##bits##_t tmp = 0; \ - switch (memorder) { \ - case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ - case __ATOMIC_CONSUME: \ - case __ATOMIC_ACQUIRE: __simt_load_acquire_##bits(ptr, tmp); break; \ - case __ATOMIC_RELAXED: __simt_load_relaxed_##bits(ptr, tmp); break; \ - default: assert(0); \ - } \ - memcpy(ret, &tmp, bytes); \ +#define DO__atomic_load_simt_(bytes, bits) \ + template <class type, std::enable_if_t<sizeof(type) == bytes, int> = 0> \ + void __device__ __atomic_load_simt_(const type *ptr, type *ret, \ + int memorder) { \ + int##bits##_t tmp = 0; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_load_acquire_##bits(ptr, tmp); break; \ + case __ATOMIC_RELAXED: __simt_load_relaxed_##bits(ptr, tmp); break; \ + default: assert(0); \ + } \ + memcpy(ret, &tmp, bytes); \ } DO__atomic_load_simt_(1, 32) DO__atomic_load_simt_(2, 16) DO__atomic_load_simt_(4, 32) DO__atomic_load_simt_(8, 64) @@ -490,8 +489,7 @@ DO__atomic_load_simt_(1, 32) DO__atomic_load_simt_(2, 16) } #define DO__atomic_store_simt_(bytes, bits) \ - template <class type, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ + template <class type, std::enable_if_t<sizeof(type) == bytes, int> = 0> \ void __device__ __atomic_store_simt_(type *ptr, type *val, int memorder) { \ int##bits##_t tmp = 0; \ memcpy(&tmp, val, bytes); \ @@ -511,49 +509,47 @@ DO__atomic_store_simt_(1, 32) DO__atomic_store_simt_(2, 16) __atomic_store_simt_(ptr, &val, memorder); } -#define DO__atomic_compare_exchange_simt_(bytes, bits) \ - template <class type, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ - bool __device__ __atomic_compare_exchange_simt_( \ - type *ptr, type *expected, const type *desired, bool, \ - int success_memorder, int failure_memorder) { \ - int##bits##_t tmp = 0, old = 0, old_tmp; \ - memcpy(&tmp, desired, bytes); \ - memcpy(&old, expected, bytes); \ - old_tmp = old; \ - switch (__stronger_order_simt_(success_memorder, failure_memorder)) { \ - case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ - case __ATOMIC_CONSUME: \ - case __ATOMIC_ACQUIRE: \ - __simt_cas_acquire_##bits(ptr, old, old_tmp, tmp); \ - break; \ - case __ATOMIC_ACQ_REL: \ - __simt_cas_acq_rel_##bits(ptr, old, old_tmp, tmp); \ - break; \ - case __ATOMIC_RELEASE: \ - __simt_cas_release_##bits(ptr, old, old_tmp, tmp); \ - break; \ - case __ATOMIC_RELAXED: \ - __simt_cas_relaxed_##bits(ptr, old, old_tmp, tmp); \ - break; \ - default: assert(0); \ - } \ - bool const ret = old == old_tmp; \ - memcpy(expected, &old, bytes); \ - return ret; \ +#define DO__atomic_compare_exchange_simt_(bytes, bits) \ + template <class type, std::enable_if_t<sizeof(type) == bytes, int> = 0> \ + bool __device__ __atomic_compare_exchange_simt_( \ + type *ptr, type *expected, const type *desired, bool, \ + int success_memorder, int failure_memorder) { \ + int##bits##_t tmp = 0, old = 0, old_tmp; \ + memcpy(&tmp, desired, bytes); \ + memcpy(&old, expected, bytes); \ + old_tmp = old; \ + switch (__stronger_order_simt_(success_memorder, failure_memorder)) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: \ + __simt_cas_acquire_##bits(ptr, old, old_tmp, tmp); \ + break; \ + case __ATOMIC_ACQ_REL: \ + __simt_cas_acq_rel_##bits(ptr, old, old_tmp, tmp); \ + break; \ + case __ATOMIC_RELEASE: \ + __simt_cas_release_##bits(ptr, old, old_tmp, tmp); \ + break; \ + case __ATOMIC_RELAXED: \ + __simt_cas_relaxed_##bits(ptr, old, old_tmp, tmp); \ + break; \ + default: assert(0); \ + } \ + bool const ret = old == old_tmp; \ + memcpy(expected, &old, bytes); \ + return ret; \ } DO__atomic_compare_exchange_simt_(4, 32) DO__atomic_compare_exchange_simt_(8, 64) - template <class type, - typename std::enable_if<sizeof(type) <= 2, int>::type = 0> + template <class type, std::enable_if_t<sizeof(type) <= 2, int> = 0> bool __device__ __atomic_compare_exchange_simt_(type *ptr, type *expected, const type *desired, bool, int success_memorder, int failure_memorder) { - using R = typename std::conditional<std::is_volatile<type>::value, - volatile uint32_t, uint32_t>::type; + using R = std::conditional_t<std::is_volatile<type>::value, volatile uint32_t, + uint32_t>; auto const aligned = (R *)((intptr_t)ptr & ~(sizeof(uint32_t) - 1)); auto const offset = uint32_t((intptr_t)ptr & (sizeof(uint32_t) - 1)) * 8; auto const mask = ((1 << sizeof(type) * 8) - 1) << offset; @@ -581,8 +577,7 @@ bool __device__ __atomic_compare_exchange_n_simt_(type *ptr, type *expected, } #define DO__atomic_exchange_simt_(bytes, bits) \ - template <class type, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ + template <class type, std::enable_if_t<sizeof(type) == bytes, int> = 0> \ void __device__ __atomic_exchange_simt_(type *ptr, type *val, type *ret, \ int memorder) { \ int##bits##_t tmp = 0; \ @@ -600,8 +595,7 @@ bool __device__ __atomic_compare_exchange_n_simt_(type *ptr, type *expected, } DO__atomic_exchange_simt_(4, 32) DO__atomic_exchange_simt_(8, 64) - template <class type, - typename std::enable_if<sizeof(type) <= 2, int>::type = 0> + template <class type, std::enable_if_t<sizeof(type) <= 2, int> = 0> void __device__ __atomic_exchange_simt_(type *ptr, type *val, type *ret, int memorder) { type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); @@ -620,7 +614,7 @@ type __device__ __atomic_exchange_n_simt_(type *ptr, type val, int memorder) { #define DO__atomic_fetch_add_simt_(bytes, bits) \ template <class type, class delta, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ + std::enable_if_t<sizeof(type) == bytes, int> = 0> \ type __device__ __atomic_fetch_add_simt_(type *ptr, delta val, \ int memorder) { \ type ret; \ @@ -638,7 +632,7 @@ type __device__ __atomic_exchange_n_simt_(type *ptr, type val, int memorder) { DO__atomic_fetch_add_simt_(4, 32) DO__atomic_fetch_add_simt_(8, 64) template <class type, class delta, - typename std::enable_if<sizeof(type) <= 2, int>::type = 0> + std::enable_if_t<sizeof(type) <= 2, int> = 0> type __device__ __atomic_fetch_add_simt_(type *ptr, delta val, int memorder) { type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); @@ -651,7 +645,7 @@ DO__atomic_fetch_add_simt_(4, 32) DO__atomic_fetch_add_simt_(8, 64) #define DO__atomic_fetch_sub_simt_(bytes, bits) \ template <class type, class delta, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ + std::enable_if_t<sizeof(type) == bytes, int> = 0> \ type __device__ __atomic_fetch_sub_simt_(type *ptr, delta val, \ int memorder) { \ type ret; \ @@ -669,7 +663,7 @@ DO__atomic_fetch_add_simt_(4, 32) DO__atomic_fetch_add_simt_(8, 64) DO__atomic_fetch_sub_simt_(4, 32) DO__atomic_fetch_sub_simt_(8, 64) template <class type, class delta, - typename std::enable_if<sizeof(type) <= 2, int>::type = 0> + std::enable_if_t<sizeof(type) <= 2, int> = 0> type __device__ __atomic_fetch_sub_simt_(type *ptr, delta val, int memorder) { type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); @@ -681,8 +675,7 @@ DO__atomic_fetch_sub_simt_(4, 32) DO__atomic_fetch_sub_simt_(8, 64) } #define DO__atomic_fetch_and_simt_(bytes, bits) \ - template <class type, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ + template <class type, std::enable_if_t<sizeof(type) == bytes, int> = 0> \ type __device__ __atomic_fetch_and_simt_(type *ptr, type val, \ int memorder) { \ type ret; \ @@ -700,7 +693,7 @@ DO__atomic_fetch_sub_simt_(4, 32) DO__atomic_fetch_sub_simt_(8, 64) DO__atomic_fetch_and_simt_(4, 32) DO__atomic_fetch_and_simt_(8, 64) template <class type, class delta, - typename std::enable_if<sizeof(type) <= 2, int>::type = 0> + std::enable_if_t<sizeof(type) <= 2, int> = 0> type __device__ __atomic_fetch_and_simt_(type *ptr, delta val, int memorder) { type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); @@ -712,8 +705,7 @@ DO__atomic_fetch_and_simt_(4, 32) DO__atomic_fetch_and_simt_(8, 64) } #define DO__atomic_fetch_xor_simt_(bytes, bits) \ - template <class type, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ + template <class type, std::enable_if_t<sizeof(type) == bytes, int> = 0> \ type __device__ __atomic_fetch_xor_simt_(type *ptr, type val, \ int memorder) { \ type ret; \ @@ -731,7 +723,7 @@ DO__atomic_fetch_and_simt_(4, 32) DO__atomic_fetch_and_simt_(8, 64) DO__atomic_fetch_xor_simt_(4, 32) DO__atomic_fetch_xor_simt_(8, 64) template <class type, class delta, - typename std::enable_if<sizeof(type) <= 2, int>::type = 0> + std::enable_if_t<sizeof(type) <= 2, int> = 0> type __device__ __atomic_fetch_xor_simt_(type *ptr, delta val, int memorder) { type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); @@ -743,8 +735,7 @@ DO__atomic_fetch_xor_simt_(4, 32) DO__atomic_fetch_xor_simt_(8, 64) } #define DO__atomic_fetch_or_simt_(bytes, bits) \ - template <class type, \ - typename std::enable_if<sizeof(type) == bytes, int>::type = 0> \ + template <class type, std::enable_if_t<sizeof(type) == bytes, int> = 0> \ type __device__ __atomic_fetch_or_simt_(type *ptr, type val, int memorder) { \ type ret; \ switch (memorder) { \ @@ -761,7 +752,7 @@ DO__atomic_fetch_xor_simt_(4, 32) DO__atomic_fetch_xor_simt_(8, 64) DO__atomic_fetch_or_simt_(4, 32) DO__atomic_fetch_or_simt_(8, 64) template <class type, class delta, - typename std::enable_if<sizeof(type) <= 2, int>::type = 0> + std::enable_if_t<sizeof(type) <= 2, int> = 0> type __device__ __atomic_fetch_or_simt_(type *ptr, delta val, int memorder) { type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp index 36df0d2564ae8ab86849837cf60cb6d93727aab2..e28e964d36aabfccf31e688050d286e2d25a0268 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp @@ -63,14 +63,37 @@ void cuda_device_synchronize(const std::string& name); void cuda_stream_synchronize(const cudaStream_t stream, const std::string& name); -void cuda_internal_error_throw(cudaError e, const char* name, - const char* file = nullptr, const int line = 0); +[[noreturn]] void cuda_internal_error_throw(cudaError e, const char* name, + const char* file = nullptr, + const int line = 0); + +#ifndef KOKKOS_COMPILER_NVHPC +[[noreturn]] +#endif + void cuda_internal_error_abort(cudaError e, const char* name, + const char* file = nullptr, + const int line = 0); inline void cuda_internal_safe_call(cudaError e, const char* name, const char* file = nullptr, const int line = 0) { - if (cudaSuccess != e) { - cuda_internal_error_throw(e, name, file, line); + // 1. Success -> normal continuation. + // 2. Error codes for which, to continue using CUDA, the process must be + // terminated and relaunched -> call abort on the host-side. + // 3. Any other error code -> throw a runtime error. + switch (e) { + case cudaSuccess: break; + case cudaErrorIllegalAddress: + case cudaErrorAssert: + case cudaErrorHardwareStackError: + case cudaErrorIllegalInstruction: + case cudaErrorMisalignedAddress: + case cudaErrorInvalidAddressSpace: + case cudaErrorInvalidPc: + case cudaErrorLaunchFailure: + cuda_internal_error_abort(e, name, file, line); + break; + default: cuda_internal_error_throw(e, name, file, line); break; } } diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp index e8a7641347241a2443b8ca7e1a15d10ea00c74a2..40a263561f4cd1b1389d0f8ac4a9e5afa9d58fb8 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp @@ -80,56 +80,38 @@ half_t cast_to_half(double val) { KOKKOS_INLINE_FUNCTION half_t cast_to_half(short val) { -#ifdef __CUDA_ARCH__ - return half_t(__short2half_rn(val)); -#else - return half_t(__float2half(static_cast<float>(val))); -#endif + KOKKOS_IF_ON_DEVICE((return half_t(__short2half_rn(val));)) + KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast<float>(val)));)) } KOKKOS_INLINE_FUNCTION half_t cast_to_half(unsigned short val) { -#ifdef __CUDA_ARCH__ - return half_t(__ushort2half_rn(val)); -#else - return half_t(__float2half(static_cast<float>(val))); -#endif + KOKKOS_IF_ON_DEVICE((return half_t(__ushort2half_rn(val));)) + KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast<float>(val)));)) } KOKKOS_INLINE_FUNCTION half_t cast_to_half(int val) { -#ifdef __CUDA_ARCH__ - return half_t(__int2half_rn(val)); -#else - return half_t(__float2half(static_cast<float>(val))); -#endif + KOKKOS_IF_ON_DEVICE((return half_t(__int2half_rn(val));)) + KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast<float>(val)));)) } KOKKOS_INLINE_FUNCTION half_t cast_to_half(unsigned int val) { -#ifdef __CUDA_ARCH__ - return half_t(__uint2half_rn(val)); -#else - return half_t(__float2half(static_cast<float>(val))); -#endif + KOKKOS_IF_ON_DEVICE((return half_t(__uint2half_rn(val));)) + KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast<float>(val)));)) } KOKKOS_INLINE_FUNCTION half_t cast_to_half(long long val) { -#ifdef __CUDA_ARCH__ - return half_t(__ll2half_rn(val)); -#else - return half_t(__float2half(static_cast<float>(val))); -#endif + KOKKOS_IF_ON_DEVICE((return half_t(__ll2half_rn(val));)) + KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast<float>(val)));)) } KOKKOS_INLINE_FUNCTION half_t cast_to_half(unsigned long long val) { -#ifdef __CUDA_ARCH__ - return half_t(__ull2half_rn(val)); -#else - return half_t(__float2half(static_cast<float>(val))); -#endif + KOKKOS_IF_ON_DEVICE((return half_t(__ull2half_rn(val));)) + KOKKOS_IF_ON_HOST((return half_t(__float2half(static_cast<float>(val)));)) } KOKKOS_INLINE_FUNCTION @@ -163,62 +145,50 @@ cast_from_half(half_t val) { template <class T> KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, short>::value, T> cast_from_half(half_t val) { -#ifdef __CUDA_ARCH__ - return __half2short_rz(half_t::impl_type(val)); -#else - return static_cast<T>(__half2float(half_t::impl_type(val))); -#endif + KOKKOS_IF_ON_DEVICE((return __half2short_rz(half_t::impl_type(val));)) + KOKKOS_IF_ON_HOST( + (return static_cast<T>(__half2float(half_t::impl_type(val)));)) } template <class T> KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned short>::value, T> cast_from_half(half_t val) { -#ifdef __CUDA_ARCH__ - return __half2ushort_rz(half_t::impl_type(val)); -#else - return static_cast<T>(__half2float(half_t::impl_type(val))); -#endif + KOKKOS_IF_ON_DEVICE((return __half2ushort_rz(half_t::impl_type(val));)) + KOKKOS_IF_ON_HOST( + (return static_cast<T>(__half2float(half_t::impl_type(val)));)) } template <class T> KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, int>::value, T> cast_from_half(half_t val) { -#ifdef __CUDA_ARCH__ - return __half2int_rz(half_t::impl_type(val)); -#else - return static_cast<T>(__half2float(half_t::impl_type(val))); -#endif + KOKKOS_IF_ON_DEVICE((return __half2int_rz(half_t::impl_type(val));)) + KOKKOS_IF_ON_HOST( + (return static_cast<T>(__half2float(half_t::impl_type(val)));)) } template <class T> KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned>::value, T> cast_from_half(half_t val) { -#ifdef __CUDA_ARCH__ - return __half2uint_rz(half_t::impl_type(val)); -#else - return static_cast<T>(__half2float(half_t::impl_type(val))); -#endif + KOKKOS_IF_ON_DEVICE((return __half2uint_rz(half_t::impl_type(val));)) + KOKKOS_IF_ON_HOST( + (return static_cast<T>(__half2float(half_t::impl_type(val)));)) } template <class T> KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, long long>::value, T> cast_from_half(half_t val) { -#ifdef __CUDA_ARCH__ - return __half2ll_rz(half_t::impl_type(val)); -#else - return static_cast<T>(__half2float(half_t::impl_type(val))); -#endif + KOKKOS_IF_ON_DEVICE((return __half2ll_rz(half_t::impl_type(val));)) + KOKKOS_IF_ON_HOST( + (return static_cast<T>(__half2float(half_t::impl_type(val)));)) } template <class T> KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_same<T, unsigned long long>::value, T> cast_from_half(half_t val) { -#ifdef __CUDA_ARCH__ - return __half2ull_rz(half_t::impl_type(val)); -#else - return static_cast<T>(__half2float(half_t::impl_type(val))); -#endif + KOKKOS_IF_ON_DEVICE((return __half2ull_rz(half_t::impl_type(val));)) + KOKKOS_IF_ON_HOST( + (return static_cast<T>(__half2float(half_t::impl_type(val)));)) } template <class T> diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp index aaa9ea8ad4f86fad749e89b379dc4e52428e6518..5811498e010f4b34ededc6b763f3d6cbd7cf9490 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -45,6 +45,10 @@ /*--------------------------------------------------------------------------*/ /* Kokkos interfaces */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #ifdef KOKKOS_ENABLE_CUDA @@ -57,6 +61,8 @@ #include <Cuda/Kokkos_Cuda_UniqueToken.hpp> #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_Tools.hpp> +#include <impl/Kokkos_DeviceManagement.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> /*--------------------------------------------------------------------------*/ /* Standard 'C' libraries */ @@ -107,11 +113,15 @@ namespace Impl { namespace { __global__ void query_cuda_kernel_arch(int *d_arch) { +#ifdef _NVHPC_CUDA + *d_arch = __builtin_current_device_sm() * 10; +#else #if defined(__CUDA_ARCH__) *d_arch = __CUDA_ARCH__; #else *d_arch = 0; #endif +#endif } /** Query what compute capability is actually launched to the device: */ @@ -184,6 +194,17 @@ void cuda_internal_error_throw(cudaError e, const char *name, const char *file, throw_runtime_exception(out.str()); } +void cuda_internal_error_abort(cudaError e, const char *name, const char *file, + const int line) { + std::ostringstream out; + out << name << " error( " << cudaGetErrorName(e) + << "): " << cudaGetErrorString(e); + if (file) { + out << " " << file << ":" << line; + } + abort(out.str().c_str()); +} + //---------------------------------------------------------------------------- // Some significant cuda device properties: // @@ -331,8 +352,9 @@ CudaInternal::~CudaInternal() { int CudaInternal::verify_is_initialized(const char *const label) const { if (m_cudaDev < 0) { - std::cerr << "Kokkos::Cuda::" << label << " : ERROR device not initialized" - << std::endl; + Kokkos::abort((std::string("Kokkos::Cuda::") + label + + " : ERROR device not initialized\n") + .c_str()); } return 0 <= m_cudaDev; } @@ -716,13 +738,22 @@ void CudaInternal::finalize() { if (was_finalized) return; was_finalized = true; - if (nullptr != m_scratchSpace || nullptr != m_scratchFlags) { - // Only finalize this if we're the singleton - if (this == &singleton()) { - (void)Impl::cuda_global_unique_token_locks(true); - Impl::finalize_host_cuda_lock_arrays(); - } + // Only finalize this if we're the singleton + if (this == &singleton()) { + (void)Impl::cuda_global_unique_token_locks(true); + Impl::finalize_host_cuda_lock_arrays(); + + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeHost(constantMemHostStaging)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaEventDestroy(constantMemReusable)); + auto &deep_copy_space = + Kokkos::Impl::cuda_get_deep_copy_space(/*initialize*/ false); + if (deep_copy_space) + deep_copy_space->impl_internal_space_instance()->finalize(); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(cuda_get_deep_copy_stream())); + } + + if (nullptr != m_scratchSpace || nullptr != m_scratchFlags) { using RecordCuda = Kokkos::Impl::SharedAllocationRecord<CudaSpace>; using RecordHost = Kokkos::Impl::SharedAllocationRecord<CudaHostPinnedSpace>; @@ -732,47 +763,36 @@ void CudaInternal::finalize() { RecordHost::decrement(RecordHost::get_record(m_scratchUnified)); if (m_scratchFunctorSize > 0) RecordCuda::decrement(RecordCuda::get_record(m_scratchFunctor)); + } - for (int i = 0; i < m_n_team_scratch; ++i) { - if (m_team_scratch_current_size[i] > 0) - Kokkos::kokkos_free<Kokkos::CudaSpace>(m_team_scratch_ptr[i]); - } - - if (m_manage_stream && m_stream != nullptr) - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(m_stream)); - - m_cudaDev = -1; - m_multiProcCount = 0; - m_maxWarpCount = 0; - m_maxBlock = {0, 0, 0}; - m_maxSharedWords = 0; - m_scratchSpaceCount = 0; - m_scratchFlagsCount = 0; - m_scratchUnifiedCount = 0; - m_streamCount = 0; - m_scratchSpace = nullptr; - m_scratchFlags = nullptr; - m_scratchUnified = nullptr; - m_stream = nullptr; - for (int i = 0; i < m_n_team_scratch; ++i) { - m_team_scratch_current_size[i] = 0; - m_team_scratch_ptr[i] = nullptr; - } - - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(m_scratch_locks)); - m_scratch_locks = nullptr; + for (int i = 0; i < m_n_team_scratch; ++i) { + if (m_team_scratch_current_size[i] > 0) + Kokkos::kokkos_free<Kokkos::CudaSpace>(m_team_scratch_ptr[i]); } - // only destroy these if we're finalizing the singleton - if (this == &singleton()) { - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeHost(constantMemHostStaging)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaEventDestroy(constantMemReusable)); - auto &deep_copy_space = - Kokkos::Impl::cuda_get_deep_copy_space(/*initialize*/ false); - if (deep_copy_space) - deep_copy_space->impl_internal_space_instance()->finalize(); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(cuda_get_deep_copy_stream())); + if (m_manage_stream && m_stream != nullptr) + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(m_stream)); + + m_cudaDev = -1; + m_multiProcCount = 0; + m_maxWarpCount = 0; + m_maxBlock = {0, 0, 0}; + m_maxSharedWords = 0; + m_scratchSpaceCount = 0; + m_scratchFlagsCount = 0; + m_scratchUnifiedCount = 0; + m_streamCount = 0; + m_scratchSpace = nullptr; + m_scratchFlags = nullptr; + m_scratchUnified = nullptr; + m_stream = nullptr; + for (int i = 0; i < m_n_team_scratch; ++i) { + m_team_scratch_current_size[i] = 0; + m_team_scratch_ptr[i] = nullptr; } + + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(m_scratch_locks)); + m_scratch_locks = nullptr; } //---------------------------------------------------------------------------- @@ -839,9 +859,16 @@ int Cuda::impl_is_initialized() { return Impl::CudaInternal::singleton().is_initialized(); } -void Cuda::impl_initialize(const Cuda::SelectDevice config, - size_t /*num_instances*/) { - Impl::CudaInternal::singleton().initialize(config.cuda_device_id, nullptr); +void Cuda::impl_initialize(InitializationSettings const &settings) { + Impl::CudaInternal::singleton().initialize(Impl::get_gpu(settings)); + + // In order to support setting an atexit hook for Kokkos::finalize + // We need to ensure that the Cuda deep_copy instance is not destroyed + // before that atexit hook is getting called. + // Thus we create the static instance here, so that it will be deallocated + // after the potential atexit call. + // This is neccessary since we will access that instance in Kokkos::finalize + (void)::Kokkos::Impl::cuda_get_deep_copy_space(true); } std::vector<unsigned> Cuda::detect_device_arch() { @@ -891,20 +918,59 @@ Cuda::Cuda(cudaStream_t stream, bool manage_stream) stream, manage_stream); } -void Cuda::print_configuration(std::ostream &s, const bool) { - Impl::CudaInternal::singleton().print_configuration(s); +void Cuda::print_configuration(std::ostream &os, bool /*verbose*/) const { + os << "Device Execution Space:\n"; + os << " KOKKOS_ENABLE_CUDA: yes\n"; + + os << "Cuda Atomics:\n"; + os << " KOKKOS_ENABLE_CUDA_ATOMICS: "; +#ifdef KOKKOS_ENABLE_CUDA_ATOMICS + os << "yes\n"; +#else + os << "no\n"; +#endif + + os << "Cuda Options:\n"; + os << " KOKKOS_ENABLE_CUDA_LAMBDA: "; +#ifdef KOKKOS_ENABLE_CUDA_LAMBDA + os << "yes\n"; +#else + os << "no\n"; +#endif + os << " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC: "; +#ifdef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC + os << "yes\n"; +#else + os << "no\n"; +#endif + os << " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE: "; +#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE + os << "yes\n"; +#else + os << "no\n"; +#endif + os << " KOKKOS_ENABLE_CUDA_UVM: "; +#ifdef KOKKOS_ENABLE_CUDA_UVM + os << "yes\n"; +#else + os << "no\n"; +#endif + os << " KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA: "; +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + os << "yes\n"; +#else + os << "no\n"; +#endif + + os << "\nCuda Runtime Configuration:\n"; + + m_space_instance->print_configuration(os); } void Cuda::impl_static_fence(const std::string &name) { Kokkos::Impl::cuda_device_synchronize(name); } -void Cuda::impl_static_fence() { - impl_static_fence("Kokkos::Cuda::impl_static_fence(): Unnamed Static Fence"); -} -void Cuda::fence() const { - fence("Kokkos::Cuda::fence(): Unnamed Instance Fence"); -} void Cuda::fence(const std::string &name) const { m_space_instance->fence(name); } @@ -922,89 +988,8 @@ const cudaDeviceProp &Cuda::cuda_device_prop() const { namespace Impl { -int get_gpu(const InitArguments &args); - int g_cuda_space_factory_initialized = - initialize_space_factory<CudaSpaceInitializer>("150_Cuda"); - -void CudaSpaceInitializer::initialize(const InitArguments &args) { - int use_gpu = get_gpu(args); - if (std::is_same<Kokkos::Cuda, Kokkos::DefaultExecutionSpace>::value || - 0 < use_gpu) { - if (use_gpu > -1) { - Kokkos::Cuda::impl_initialize(Kokkos::Cuda::SelectDevice(use_gpu)); - } else { - Kokkos::Cuda::impl_initialize(); - } - } -} - -void CudaSpaceInitializer::finalize(bool all_spaces) { - if ((std::is_same<Kokkos::Cuda, Kokkos::DefaultExecutionSpace>::value || - all_spaces) && - Kokkos::Cuda::impl_is_initialized()) { - Kokkos::Cuda::impl_finalize(); - } -} - -void CudaSpaceInitializer::fence() { - Kokkos::Cuda::impl_static_fence( - "Kokkos::CudaSpaceInitializer::fence: Initializer Fence"); -} -void CudaSpaceInitializer::fence(const std::string &name) { - // Kokkos::Cuda::impl_static_fence("Kokkos::CudaSpaceInitializer::fence: - // "+name); //TODO: or this - Kokkos::Cuda::impl_static_fence(name); -} - -void CudaSpaceInitializer::print_configuration(std::ostream &msg, - const bool detail) { - msg << "Device Execution Space:\n"; - msg << " KOKKOS_ENABLE_CUDA: yes\n"; - - msg << "Cuda Atomics:\n"; - msg << " KOKKOS_ENABLE_CUDA_ATOMICS: "; -#ifdef KOKKOS_ENABLE_CUDA_ATOMICS - msg << "yes\n"; -#else - msg << "no\n"; -#endif - - msg << "Cuda Options:\n"; - msg << " KOKKOS_ENABLE_CUDA_LAMBDA: "; -#ifdef KOKKOS_ENABLE_CUDA_LAMBDA - msg << "yes\n"; -#else - msg << "no\n"; -#endif - msg << " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC: "; -#ifdef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC - msg << "yes\n"; -#else - msg << "no\n"; -#endif - msg << " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE: "; -#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE - msg << "yes\n"; -#else - msg << "no\n"; -#endif - msg << " KOKKOS_ENABLE_CUDA_UVM: "; -#ifdef KOKKOS_ENABLE_CUDA_UVM - msg << "yes\n"; -#else - msg << "no\n"; -#endif - msg << " KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA: "; -#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - msg << "yes\n"; -#else - msg << "no\n"; -#endif - - msg << "\nCuda Runtime Configuration:" << std::endl; - Cuda::print_configuration(msg, detail); -} + initialize_space_factory<Cuda>("150_Cuda"); } // namespace Impl diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp index b7a80ad84ff22b00d9666956cf5896b259d38b6a..88810b6fc2bbcf5c6fef3bd4a9de0a72fb30c5e8 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp @@ -636,7 +636,7 @@ struct CudaParallelLaunchImpl< DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>>( base_t::get_kernel_func(), prefer_shmem); - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + ensure_cuda_lock_arrays_on_device(); // Invoke the driver function on the device base_t::invoke_kernel(driver, grid, block, shmem, cuda_instance); diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp index 1dcbdf0392fc3961e2c1e30e140c3edcaa61d820..3796534816a8bdb6bbeb1e517c6e54a04f2c82e1 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> #ifdef KOKKOS_ENABLE_CUDA #include <Cuda/Kokkos_Cuda_Locks.hpp> @@ -75,8 +79,7 @@ CudaLockArrays g_host_cuda_lock_arrays = {nullptr, 0}; void initialize_host_cuda_lock_arrays() { #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS desul::Impl::init_lock_arrays(); - - DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + desul::ensure_cuda_lock_arrays_on_device(); #endif if (g_host_cuda_lock_arrays.atomic != nullptr) return; KOKKOS_IMPL_CUDA_SAFE_CALL( @@ -85,7 +88,7 @@ void initialize_host_cuda_lock_arrays() { Impl::cuda_device_synchronize( "Kokkos::Impl::initialize_host_cuda_lock_arrays: Pre Init Lock Arrays"); g_host_cuda_lock_arrays.n = Cuda::concurrency(); - KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE(); + copy_cuda_lock_arrays_to_device(); init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256, 256>>>(); Impl::cuda_device_synchronize( @@ -102,7 +105,7 @@ void finalize_host_cuda_lock_arrays() { g_host_cuda_lock_arrays.atomic = nullptr; g_host_cuda_lock_arrays.n = 0; #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE - KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE(); + copy_cuda_lock_arrays_to_device(); #endif } diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp index bdb7723985e5a3c6c0451ada3d0b6b7303204089..244f142f0d83550bf79d5cfb5288494e2629226f 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp @@ -67,7 +67,7 @@ struct CudaLockArrays { /// \brief This global variable in Host space is the central definition /// of these arrays. -extern Kokkos::Impl::CudaLockArrays g_host_cuda_lock_arrays; +extern CudaLockArrays g_host_cuda_lock_arrays; /// \brief After this call, the g_host_cuda_lock_arrays variable has /// valid, initialized arrays. @@ -105,12 +105,12 @@ namespace Impl { /// instances in other translation units, we must update this CUDA global /// variable based on the Host global variable prior to running any kernels /// that will use it. -/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro. +/// That is the purpose of the ensure_cuda_lock_arrays_on_device function. __device__ #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE __constant__ extern #endif - Kokkos::Impl::CudaLockArrays g_device_cuda_lock_arrays; + CudaLockArrays g_device_cuda_lock_arrays; #define CUDA_SPACE_ATOMIC_MASK 0x1FFFF @@ -123,9 +123,7 @@ __device__ inline bool lock_address_cuda_space(void* ptr) { size_t offset = size_t(ptr); offset = offset >> 2; offset = offset & CUDA_SPACE_ATOMIC_MASK; - return ( - 0 == - atomicCAS(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0, 1)); + return (0 == atomicCAS(&g_device_cuda_lock_arrays.atomic[offset], 0, 1)); } /// \brief Release lock for the address @@ -138,7 +136,7 @@ __device__ inline void unlock_address_cuda_space(void* ptr) { size_t offset = size_t(ptr); offset = offset >> 2; offset = offset & CUDA_SPACE_ATOMIC_MASK; - atomicExch(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0); + atomicExch(&g_device_cuda_lock_arrays.atomic[offset], 0); } } // namespace Impl @@ -151,45 +149,49 @@ namespace { static int lock_array_copied = 0; inline int eliminate_warning_for_lock_array() { return lock_array_copied; } } // namespace -} // namespace Impl -} // namespace Kokkos -/* Dan Ibanez: it is critical that this code be a macro, so that it will - capture the right address for Kokkos::Impl::g_device_cuda_lock_arrays! - putting this in an inline function will NOT do the right thing! */ -#define KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \ - { \ - if (::Kokkos::Impl::lock_array_copied == 0) { \ - KOKKOS_IMPL_CUDA_SAFE_CALL( \ - cudaMemcpyToSymbol(Kokkos::Impl::g_device_cuda_lock_arrays, \ - &Kokkos::Impl::g_host_cuda_lock_arrays, \ - sizeof(Kokkos::Impl::CudaLockArrays))); \ - } \ - lock_array_copied = 1; \ +#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE +inline +#else +static +#endif + void + copy_cuda_lock_arrays_to_device() { + if (lock_array_copied == 0) { + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpyToSymbol(g_device_cuda_lock_arrays, + &g_host_cuda_lock_arrays, + sizeof(CudaLockArrays))); } + lock_array_copied = 1; +} #ifndef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE -#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() +inline void ensure_cuda_lock_arrays_on_device() {} #else -#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \ - KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() +inline static void ensure_cuda_lock_arrays_on_device() { + copy_cuda_lock_arrays_to_device(); +} #endif #else #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE -#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() +inline void ensure_cuda_lock_arrays_on_device() {} #else // Still Need COPY_CUDA_LOCK_ARRAYS for team scratch etc. -#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \ - KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \ - DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() +inline static void ensure_cuda_lock_arrays_on_device() { + copy_cuda_lock_arrays_to_device(); + desul::ensure_cuda_lock_arrays_on_device(); +} #endif #endif /* defined( KOKKOS_ENABLE_IMPL_DESUL_ATOMICS ) */ +} // namespace Impl +} // namespace Kokkos + #endif /* defined( KOKKOS_ENABLE_CUDA ) */ #endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP */ diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp deleted file mode 100644 index 5016f73e3c7d3fe3fd4f98cc37af9e1ed5ff3c2e..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ /dev/null @@ -1,2722 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDA_PARALLEL_HPP -#define KOKKOS_CUDA_PARALLEL_HPP - -#include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_CUDA) - -#include <algorithm> -#include <string> -#include <cstdio> -#include <cstdint> - -#include <utility> -#include <Kokkos_Parallel.hpp> - -#include <Cuda/Kokkos_Cuda_KernelLaunch.hpp> -#include <Cuda/Kokkos_Cuda_ReduceScan.hpp> -#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp> -#include <Cuda/Kokkos_Cuda_Locks.hpp> -#include <Cuda/Kokkos_Cuda_Team.hpp> -#include <Kokkos_MinMaxClamp.hpp> -#include <Kokkos_Vectorization.hpp> - -#include <impl/Kokkos_Tools.hpp> -#include <typeinfo> - -#include <KokkosExp_MDRangePolicy.hpp> -#include <impl/KokkosExp_IterateTileGPU.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -extern bool show_warnings() noexcept; - -namespace Impl { - -template <class... Properties> -class TeamPolicyInternal<Kokkos::Cuda, Properties...> - : public PolicyTraits<Properties...> { - public: - //! Tag this class as a kokkos execution policy - using execution_policy = TeamPolicyInternal; - - using traits = PolicyTraits<Properties...>; - - template <class ExecSpace, class... OtherProperties> - friend class TeamPolicyInternal; - - private: - enum { MAX_WARP = 8 }; - - typename traits::execution_space m_space; - int m_league_size; - int m_team_size; - int m_vector_length; - int m_team_scratch_size[2]; - int m_thread_scratch_size[2]; - int m_chunk_size; - bool m_tune_team; - bool m_tune_vector; - - public: - //! Execution space of this execution policy - using execution_space = Kokkos::Cuda; - - template <class... OtherProperties> - TeamPolicyInternal(const TeamPolicyInternal<OtherProperties...>& p) { - m_league_size = p.m_league_size; - m_team_size = p.m_team_size; - m_vector_length = p.m_vector_length; - m_team_scratch_size[0] = p.m_team_scratch_size[0]; - m_team_scratch_size[1] = p.m_team_scratch_size[1]; - m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; - m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; - m_chunk_size = p.m_chunk_size; - m_space = p.m_space; - m_tune_team = p.m_tune_team; - m_tune_vector = p.m_tune_vector; - } - - //---------------------------------------- - - template <class FunctorType> - int team_size_max(const FunctorType& f, const ParallelForTag&) const { - using closure_type = - Impl::ParallelFor<FunctorType, TeamPolicy<Properties...>>; - cudaFuncAttributes attr = - CudaParallelLaunch<closure_type, typename traits::launch_bounds>:: - get_cuda_func_attributes(); - int block_size = - Kokkos::Impl::cuda_get_max_block_size<FunctorType, - typename traits::launch_bounds>( - space().impl_internal_space_instance(), attr, f, - (size_t)impl_vector_length(), - (size_t)team_scratch_size(0) + 2 * sizeof(double), - (size_t)thread_scratch_size(0) + sizeof(double)); - return block_size / impl_vector_length(); - } - - template <class FunctorType> - inline int team_size_max(const FunctorType& f, - const ParallelReduceTag&) const { - using functor_analysis_type = - Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, - TeamPolicyInternal, FunctorType>; - using reducer_type = typename Impl::ParallelReduceReturnValue< - void, typename functor_analysis_type::value_type, - FunctorType>::reducer_type; - using closure_type = - Impl::ParallelReduce<FunctorType, TeamPolicy<Properties...>, - reducer_type>; - return internal_team_size_max<closure_type>(f); - } - - template <class FunctorType, class ReducerType> - inline int team_size_max(const FunctorType& f, const ReducerType& /*r*/, - const ParallelReduceTag&) const { - using closure_type = - Impl::ParallelReduce<FunctorType, TeamPolicy<Properties...>, - ReducerType>; - return internal_team_size_max<closure_type>(f); - } - - template <class FunctorType> - int team_size_recommended(const FunctorType& f, const ParallelForTag&) const { - using closure_type = - Impl::ParallelFor<FunctorType, TeamPolicy<Properties...>>; - cudaFuncAttributes attr = - CudaParallelLaunch<closure_type, typename traits::launch_bounds>:: - get_cuda_func_attributes(); - const int block_size = - Kokkos::Impl::cuda_get_opt_block_size<FunctorType, - typename traits::launch_bounds>( - space().impl_internal_space_instance(), attr, f, - (size_t)impl_vector_length(), - (size_t)team_scratch_size(0) + 2 * sizeof(double), - (size_t)thread_scratch_size(0) + sizeof(double)); - return block_size / impl_vector_length(); - } - - template <class FunctorType> - inline int team_size_recommended(const FunctorType& f, - const ParallelReduceTag&) const { - using functor_analysis_type = - Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, - TeamPolicyInternal, FunctorType>; - using reducer_type = typename Impl::ParallelReduceReturnValue< - void, typename functor_analysis_type::value_type, - FunctorType>::reducer_type; - using closure_type = - Impl::ParallelReduce<FunctorType, TeamPolicy<Properties...>, - reducer_type>; - return internal_team_size_recommended<closure_type>(f); - } - - template <class FunctorType, class ReducerType> - int team_size_recommended(const FunctorType& f, const ReducerType&, - const ParallelReduceTag&) const { - using closure_type = - Impl::ParallelReduce<FunctorType, TeamPolicy<Properties...>, - ReducerType>; - return internal_team_size_recommended<closure_type>(f); - } - - inline static int vector_length_max() { return Impl::CudaTraits::WarpSize; } - - inline static int verify_requested_vector_length( - int requested_vector_length) { - int test_vector_length = - std::min(requested_vector_length, vector_length_max()); - - // Allow only power-of-two vector_length - if (!(is_integral_power_of_two(test_vector_length))) { - int test_pow2 = 1; - for (int i = 0; i < 5; i++) { - test_pow2 = test_pow2 << 1; - if (test_pow2 > test_vector_length) { - break; - } - } - test_vector_length = test_pow2 >> 1; - } - - return test_vector_length; - } - - inline static int scratch_size_max(int level) { - return ( - level == 0 ? 1024 * 40 : // 48kB is the max for CUDA, but we need some - // for team_member.reduce etc. - 20 * 1024 * - 1024); // arbitrarily setting this to 20MB, for a Volta V100 - // that would give us about 3.2GB for 2 teams per SM - } - - //---------------------------------------- - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 - KOKKOS_DEPRECATED inline int vector_length() const { - return impl_vector_length(); - } -#endif - inline int impl_vector_length() const { return m_vector_length; } - inline int team_size() const { return m_team_size; } - inline int league_size() const { return m_league_size; } - inline bool impl_auto_team_size() const { return m_tune_team; } - inline bool impl_auto_vector_length() const { return m_tune_vector; } - inline void impl_set_team_size(size_t team_size) { m_team_size = team_size; } - inline void impl_set_vector_length(size_t vector_length) { - m_vector_length = vector_length; - } - inline int scratch_size(int level, int team_size_ = -1) const { - if (team_size_ < 0) team_size_ = m_team_size; - return m_team_scratch_size[level] + - team_size_ * m_thread_scratch_size[level]; - } - inline int team_scratch_size(int level) const { - return m_team_scratch_size[level]; - } - inline int thread_scratch_size(int level) const { - return m_thread_scratch_size[level]; - } - - const typename traits::execution_space& space() const { return m_space; } - - TeamPolicyInternal() - : m_space(typename traits::execution_space()), - m_league_size(0), - m_team_size(-1), - m_vector_length(0), - m_team_scratch_size{0, 0}, - m_thread_scratch_size{0, 0}, - m_chunk_size(Impl::CudaTraits::WarpSize), - m_tune_team(false), - m_tune_vector(false) {} - - /** \brief Specify league size, specify team size, specify vector length */ - TeamPolicyInternal(const execution_space space_, int league_size_, - int team_size_request, int vector_length_request = 1) - : m_space(space_), - m_league_size(league_size_), - m_team_size(team_size_request), - m_vector_length( - (vector_length_request > 0) - ? verify_requested_vector_length(vector_length_request) - : verify_requested_vector_length(1)), - m_team_scratch_size{0, 0}, - m_thread_scratch_size{0, 0}, - m_chunk_size(Impl::CudaTraits::WarpSize), - m_tune_team(bool(team_size_request <= 0)), - m_tune_vector(bool(vector_length_request <= 0)) { - // Make sure league size is permissible - if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count()[0])) - Impl::throw_runtime_exception( - "Requested too large league_size for TeamPolicy on Cuda execution " - "space."); - - // Make sure total block size is permissible - if (m_team_size * m_vector_length > - int(Impl::CudaTraits::MaxHierarchicalParallelism)) { - Impl::throw_runtime_exception( - std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. " - "Team size x vector length must be smaller than 1024.")); - } - } - - /** \brief Specify league size, request team size, specify vector length */ - TeamPolicyInternal(const execution_space space_, int league_size_, - const Kokkos::AUTO_t& /* team_size_request */ - , - int vector_length_request = 1) - : TeamPolicyInternal(space_, league_size_, -1, vector_length_request) {} - - /** \brief Specify league size, request team size and vector length */ - TeamPolicyInternal(const execution_space space_, int league_size_, - const Kokkos::AUTO_t& /* team_size_request */, - const Kokkos::AUTO_t& /* vector_length_request */ - ) - : TeamPolicyInternal(space_, league_size_, -1, -1) {} - - /** \brief Specify league size, specify team size, request vector length */ - TeamPolicyInternal(const execution_space space_, int league_size_, - int team_size_request, const Kokkos::AUTO_t&) - : TeamPolicyInternal(space_, league_size_, team_size_request, -1) {} - - TeamPolicyInternal(int league_size_, int team_size_request, - int vector_length_request = 1) - : TeamPolicyInternal(typename traits::execution_space(), league_size_, - team_size_request, vector_length_request) {} - - TeamPolicyInternal(int league_size_, const Kokkos::AUTO_t& team_size_request, - int vector_length_request = 1) - : TeamPolicyInternal(typename traits::execution_space(), league_size_, - team_size_request, vector_length_request) - - {} - - /** \brief Specify league size, request team size */ - TeamPolicyInternal(int league_size_, const Kokkos::AUTO_t& team_size_request, - const Kokkos::AUTO_t& vector_length_request) - : TeamPolicyInternal(typename traits::execution_space(), league_size_, - team_size_request, vector_length_request) {} - - /** \brief Specify league size, request team size */ - TeamPolicyInternal(int league_size_, int team_size_request, - const Kokkos::AUTO_t& vector_length_request) - : TeamPolicyInternal(typename traits::execution_space(), league_size_, - team_size_request, vector_length_request) {} - - inline int chunk_size() const { return m_chunk_size; } - - /** \brief set chunk_size to a discrete value*/ - inline TeamPolicyInternal& set_chunk_size( - typename traits::index_type chunk_size_) { - m_chunk_size = chunk_size_; - return *this; - } - - /** \brief set per team scratch size for a specific level of the scratch - * hierarchy */ - inline TeamPolicyInternal& set_scratch_size(const int& level, - const PerTeamValue& per_team) { - m_team_scratch_size[level] = per_team.value; - return *this; - } - - /** \brief set per thread scratch size for a specific level of the scratch - * hierarchy */ - inline TeamPolicyInternal& set_scratch_size( - const int& level, const PerThreadValue& per_thread) { - m_thread_scratch_size[level] = per_thread.value; - return *this; - } - - /** \brief set per thread and per team scratch size for a specific level of - * the scratch hierarchy */ - inline TeamPolicyInternal& set_scratch_size( - const int& level, const PerTeamValue& per_team, - const PerThreadValue& per_thread) { - m_team_scratch_size[level] = per_team.value; - m_thread_scratch_size[level] = per_thread.value; - return *this; - } - - using member_type = Kokkos::Impl::CudaTeamMember; - - protected: - template <class ClosureType, class FunctorType, class BlockSizeCallable> - int internal_team_size_common(const FunctorType& f, - BlockSizeCallable&& block_size_callable) const { - using closure_type = ClosureType; - using functor_value_traits = - Impl::FunctorValueTraits<FunctorType, typename traits::work_tag>; - - cudaFuncAttributes attr = - CudaParallelLaunch<closure_type, typename traits::launch_bounds>:: - get_cuda_func_attributes(); - const int block_size = std::forward<BlockSizeCallable>(block_size_callable)( - space().impl_internal_space_instance(), attr, f, - (size_t)impl_vector_length(), - (size_t)team_scratch_size(0) + 2 * sizeof(double), - (size_t)thread_scratch_size(0) + sizeof(double) + - ((functor_value_traits::StaticValueSize != 0) - ? 0 - : functor_value_traits::value_size(f))); - KOKKOS_ASSERT(block_size > 0); - - // Currently we require Power-of-2 team size for reductions. - int p2 = 1; - while (p2 <= block_size) p2 *= 2; - p2 /= 2; - return p2 / impl_vector_length(); - } - - template <class ClosureType, class FunctorType> - int internal_team_size_max(const FunctorType& f) const { - return internal_team_size_common<ClosureType>( - f, - Kokkos::Impl::cuda_get_max_block_size<FunctorType, - typename traits::launch_bounds>); - } - - template <class ClosureType, class FunctorType> - int internal_team_size_recommended(const FunctorType& f) const { - return internal_team_size_common<ClosureType>( - f, - Kokkos::Impl::cuda_get_opt_block_size<FunctorType, - typename traits::launch_bounds>); - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template <class FunctorType, class... Traits> -class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> { - public: - using Policy = Kokkos::RangePolicy<Traits...>; - - private: - using Member = typename Policy::member_type; - using WorkTag = typename Policy::work_tag; - using LaunchBounds = typename Policy::launch_bounds; - - const FunctorType m_functor; - const Policy m_policy; - - ParallelFor() = delete; - ParallelFor& operator=(const ParallelFor&) = delete; - - template <class TagType> - inline __device__ - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const Member i) const { - m_functor(i); - } - - template <class TagType> - inline __device__ - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const Member i) const { - m_functor(TagType(), i); - } - - public: - using functor_type = FunctorType; - - Policy const& get_policy() const { return m_policy; } - - inline __device__ void operator()() const { - const Member work_stride = blockDim.y * gridDim.x; - const Member work_end = m_policy.end(); - - for (Member iwork = - m_policy.begin() + threadIdx.y + blockDim.y * blockIdx.x; - iwork < work_end; - iwork = iwork < work_end - work_stride ? iwork + work_stride - : work_end) { - this->template exec_range<WorkTag>(iwork); - } - } - - inline void execute() const { - const typename Policy::index_type nwork = m_policy.end() - m_policy.begin(); - - cudaFuncAttributes attr = - CudaParallelLaunch<ParallelFor, - LaunchBounds>::get_cuda_func_attributes(); - const int block_size = - Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>( - m_policy.space().impl_internal_space_instance(), attr, m_functor, 1, - 0, 0); - KOKKOS_ASSERT(block_size > 0); - dim3 block(1, block_size, 1); - dim3 grid( - std::min( - typename Policy::index_type((nwork + block.y - 1) / block.y), - typename Policy::index_type(cuda_internal_maximum_grid_count()[0])), - 1, 1); -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - if (Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) { - block = dim3(1, 1, 1); - grid = dim3(1, 1, 1); - } -#endif - - CudaParallelLaunch<ParallelFor, LaunchBounds>( - *this, grid, block, 0, m_policy.space().impl_internal_space_instance(), - false); - } - - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} -}; - -// MDRangePolicy impl -template <class FunctorType, class... Traits> -class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> { - public: - using Policy = Kokkos::MDRangePolicy<Traits...>; - using functor_type = FunctorType; - - private: - using RP = Policy; - using array_index_type = typename Policy::array_index_type; - using index_type = typename Policy::index_type; - using LaunchBounds = typename Policy::launch_bounds; - - const FunctorType m_functor; - const Policy m_rp; - - public: - template <typename Policy, typename Functor> - static int max_tile_size_product(const Policy& pol, const Functor&) { - cudaFuncAttributes attr = - CudaParallelLaunch<ParallelFor, - LaunchBounds>::get_cuda_func_attributes(); - auto const& prop = pol.space().cuda_device_prop(); - // Limits due to registers/SM, MDRange doesn't have - // shared memory constraints - int const regs_per_sm = prop.regsPerMultiprocessor; - int const regs_per_thread = attr.numRegs; - int const max_threads_per_sm = regs_per_sm / regs_per_thread; - return std::min( - max_threads_per_sm, - static_cast<int>(Kokkos::Impl::CudaTraits::MaxHierarchicalParallelism)); - } - Policy const& get_policy() const { return m_rp; } - inline __device__ void operator()() const { - Kokkos::Impl::DeviceIterateTile<Policy::rank, Policy, FunctorType, - typename Policy::work_tag>(m_rp, m_functor) - .exec_range(); - } - - inline void execute() const { - using namespace std; - - if (m_rp.m_num_tiles == 0) return; - const auto maxblocks = cuda_internal_maximum_grid_count(); - if (RP::rank == 2) { - const dim3 block(m_rp.m_tile[0], m_rp.m_tile[1], 1); - KOKKOS_ASSERT(block.x > 0); - KOKKOS_ASSERT(block.y > 0); - const dim3 grid( - std::min<array_index_type>( - (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x, - maxblocks[0]), - std::min<array_index_type>( - (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y, - maxblocks[1]), - 1); - CudaParallelLaunch<ParallelFor, LaunchBounds>( - *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), - false); - } else if (RP::rank == 3) { - const dim3 block(m_rp.m_tile[0], m_rp.m_tile[1], m_rp.m_tile[2]); - KOKKOS_ASSERT(block.x > 0); - KOKKOS_ASSERT(block.y > 0); - KOKKOS_ASSERT(block.z > 0); - const dim3 grid( - std::min<array_index_type>( - (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x, - maxblocks[0]), - std::min<array_index_type>( - (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y, - maxblocks[1]), - std::min<array_index_type>( - (m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1) / block.z, - maxblocks[2])); - CudaParallelLaunch<ParallelFor, LaunchBounds>( - *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), - false); - } else if (RP::rank == 4) { - // id0,id1 encoded within threadIdx.x; id2 to threadIdx.y; id3 to - // threadIdx.z - const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1], m_rp.m_tile[2], - m_rp.m_tile[3]); - KOKKOS_ASSERT(block.y > 0); - KOKKOS_ASSERT(block.z > 0); - const dim3 grid( - std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1], - maxblocks[0]), - std::min<array_index_type>( - (m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1) / block.y, - maxblocks[1]), - std::min<array_index_type>( - (m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1) / block.z, - maxblocks[2])); - CudaParallelLaunch<ParallelFor, LaunchBounds>( - *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), - false); - } else if (RP::rank == 5) { - // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4 to - // threadIdx.z - const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1], - m_rp.m_tile[2] * m_rp.m_tile[3], m_rp.m_tile[4]); - KOKKOS_ASSERT(block.z > 0); - const dim3 grid( - std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1], - maxblocks[0]), - std::min<array_index_type>(m_rp.m_tile_end[2] * m_rp.m_tile_end[3], - maxblocks[1]), - std::min<array_index_type>( - (m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1) / block.z, - maxblocks[2])); - CudaParallelLaunch<ParallelFor, LaunchBounds>( - *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), - false); - } else if (RP::rank == 6) { - // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4,id5 to - // threadIdx.z - const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1], - m_rp.m_tile[2] * m_rp.m_tile[3], - m_rp.m_tile[4] * m_rp.m_tile[5]); - const dim3 grid( - std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1], - maxblocks[0]), - std::min<array_index_type>(m_rp.m_tile_end[2] * m_rp.m_tile_end[3], - maxblocks[1]), - std::min<array_index_type>(m_rp.m_tile_end[4] * m_rp.m_tile_end[5], - maxblocks[2])); - CudaParallelLaunch<ParallelFor, LaunchBounds>( - *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), - false); - } else { - Kokkos::abort("Kokkos::MDRange Error: Exceeded rank bounds with Cuda\n"); - } - - } // end execute - - // inline - ParallelFor(const FunctorType& arg_functor, Policy arg_policy) - : m_functor(arg_functor), m_rp(arg_policy) {} -}; - -__device__ inline int64_t cuda_get_scratch_index(Cuda::size_type league_size, - int32_t* scratch_locks) { - int64_t threadid = 0; - __shared__ int64_t base_thread_id; - if (threadIdx.x == 0 && threadIdx.y == 0) { - int64_t const wraparound_len = Kokkos::Experimental::min( - int64_t(league_size), - (int64_t(Kokkos::Impl::g_device_cuda_lock_arrays.n)) / - (blockDim.x * blockDim.y)); - threadid = (blockIdx.x * blockDim.z + threadIdx.z) % wraparound_len; - threadid *= blockDim.x * blockDim.y; - int done = 0; - while (!done) { - done = (0 == atomicCAS(&scratch_locks[threadid], 0, 1)); - if (!done) { - threadid += blockDim.x * blockDim.y; - if (int64_t(threadid + blockDim.x * blockDim.y) >= - wraparound_len * blockDim.x * blockDim.y) - threadid = 0; - } - } - base_thread_id = threadid; - } - __syncthreads(); - threadid = base_thread_id; - return threadid; -} - -__device__ inline void cuda_release_scratch_index(int32_t* scratch_locks, - int64_t threadid) { - __syncthreads(); - if (threadIdx.x == 0 && threadIdx.y == 0) { - scratch_locks[threadid] = 0; - } -} - -template <class FunctorType, class... Properties> -class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, - Kokkos::Cuda> { - public: - using Policy = TeamPolicy<Properties...>; - - private: - using Member = typename Policy::member_type; - using WorkTag = typename Policy::work_tag; - using LaunchBounds = typename Policy::launch_bounds; - - public: - using functor_type = FunctorType; - using size_type = Cuda::size_type; - - private: - // Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == - // blockDim.z == 1 shared memory utilization: - // - // [ team reduce space ] - // [ team shared space ] - // - - const FunctorType m_functor; - const Policy m_policy; - const size_type m_league_size; - int m_team_size; - const size_type m_vector_size; - int m_shmem_begin; - int m_shmem_size; - void* m_scratch_ptr[2]; - int m_scratch_size[2]; - int m_scratch_pool_id = -1; - int32_t* m_scratch_locks; - - template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_team(const Member& member) const { - m_functor(member); - } - - template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_team(const Member& member) const { - m_functor(TagType(), member); - } - - public: - Policy const& get_policy() const { return m_policy; } - - __device__ inline void operator()() const { - // Iterate this block through the league - int64_t threadid = 0; - if (m_scratch_size[1] > 0) { - threadid = cuda_get_scratch_index(m_league_size, m_scratch_locks); - } - - const int int_league_size = (int)m_league_size; - for (int league_rank = blockIdx.x; league_rank < int_league_size; - league_rank += gridDim.x) { - this->template exec_team<WorkTag>(typename Policy::member_type( - kokkos_impl_cuda_shared_memory<void>(), m_shmem_begin, m_shmem_size, - (void*)(((char*)m_scratch_ptr[1]) + - ptrdiff_t(threadid / (blockDim.x * blockDim.y)) * - m_scratch_size[1]), - m_scratch_size[1], league_rank, m_league_size)); - } - if (m_scratch_size[1] > 0) { - cuda_release_scratch_index(m_scratch_locks, threadid); - } - } - - inline void execute() const { - const int64_t shmem_size_total = m_shmem_begin + m_shmem_size; - dim3 grid(int(m_league_size), 1, 1); - const dim3 block(int(m_vector_size), int(m_team_size), 1); - -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - if (Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) { - grid = dim3(1, 1, 1); - } -#endif - - CudaParallelLaunch<ParallelFor, LaunchBounds>( - *this, grid, block, shmem_size_total, - m_policy.space().impl_internal_space_instance(), - true); // copy to device and execute - } - - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), - m_policy(arg_policy), - m_league_size(arg_policy.league_size()), - m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()) { - cudaFuncAttributes attr = - CudaParallelLaunch<ParallelFor, - LaunchBounds>::get_cuda_func_attributes(); - m_team_size = - m_team_size >= 0 - ? m_team_size - : Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>( - m_policy.space().impl_internal_space_instance(), attr, - m_functor, m_vector_size, m_policy.team_scratch_size(0), - m_policy.thread_scratch_size(0)) / - m_vector_size; - - m_shmem_begin = (sizeof(double) * (m_team_size + 2)); - m_shmem_size = - (m_policy.scratch_size(0, m_team_size) + - FunctorTeamShmemSize<FunctorType>::value(m_functor, m_team_size)); - m_scratch_size[0] = m_policy.scratch_size(0, m_team_size); - m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); - m_scratch_locks = - m_policy.space().impl_internal_space_instance()->m_scratch_locks; - - // Functor's reduce memory, team scan memory, and team shared memory depend - // upon team size. - m_scratch_ptr[0] = nullptr; - if (m_team_size <= 0) { - m_scratch_ptr[1] = nullptr; - } else { - auto scratch_ptr_id = - m_policy.space() - .impl_internal_space_instance() - ->resize_team_scratch_space( - static_cast<std::int64_t>(m_scratch_size[1]) * - (std::min( - static_cast<std::int64_t>(Cuda::concurrency() / - (m_team_size * m_vector_size)), - static_cast<std::int64_t>(m_league_size)))); - m_scratch_ptr[1] = scratch_ptr_id.first; - m_scratch_pool_id = scratch_ptr_id.second; - } - - const int shmem_size_total = m_shmem_begin + m_shmem_size; - if (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < - shmem_size_total) { - printf( - "%i %i\n", - m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock, - shmem_size_total); - Kokkos::Impl::throw_runtime_exception(std::string( - "Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory")); - } - - if (int(m_team_size) > - int(Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>( - m_policy.space().impl_internal_space_instance(), attr, - arg_functor, arg_policy.impl_vector_length(), - arg_policy.team_scratch_size(0), - arg_policy.thread_scratch_size(0)) / - arg_policy.impl_vector_length())) { - Kokkos::Impl::throw_runtime_exception(std::string( - "Kokkos::Impl::ParallelFor< Cuda > requested too large team size.")); - } - } - - ~ParallelFor() { - if (m_scratch_pool_id >= 0) { - m_policy.space() - .impl_internal_space_instance() - ->m_team_scratch_pool[m_scratch_pool_id] = 0; - } - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template <class FunctorType, class ReducerType, class... Traits> -class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, - Kokkos::Cuda> { - public: - using Policy = Kokkos::RangePolicy<Traits...>; - - private: - using WorkRange = typename Policy::WorkRange; - using WorkTag = typename Policy::work_tag; - using Member = typename Policy::member_type; - using LaunchBounds = typename Policy::launch_bounds; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - - public: - using pointer_type = typename ValueTraits::pointer_type; - using value_type = typename ValueTraits::value_type; - using reference_type = typename ValueTraits::reference_type; - using functor_type = FunctorType; - // Conditionally set word_size_type to int16_t or int8_t if value_type is - // smaller than int32_t (Kokkos::Cuda::size_type) - // word_size_type is used to determine the word count, shared memory buffer - // size, and global memory buffer size before the reduction is performed. - // Within the reduction, the word count is recomputed based on word_size_type - // and when calculating indexes into the shared/global memory buffers for - // performing the reduction, word_size_type is used again. - // For scalars > 4 bytes in size, indexing into shared/global memory relies - // on the block and grid dimensions to ensure that we index at the correct - // offset rather than at every 4 byte word; such that, when the join is - // performed, we have the correct data that was copied over in chunks of 4 - // bytes. - using word_size_type = typename std::conditional< - sizeof(value_type) < sizeof(Kokkos::Cuda::size_type), - typename std::conditional<sizeof(value_type) == 2, int16_t, int8_t>::type, - Kokkos::Cuda::size_type>::type; - using index_type = typename Policy::index_type; - using reducer_type = ReducerType; - - // Algorithmic constraints: blockSize is a power of two AND blockDim.y == - // blockDim.z == 1 - - const FunctorType m_functor; - const Policy m_policy; - const ReducerType m_reducer; - const pointer_type m_result_ptr; - const bool m_result_ptr_device_accessible; - const bool m_result_ptr_host_accessible; - word_size_type* m_scratch_space; - // m_scratch_flags must be of type Cuda::size_type due to use of atomics - // for tracking metadata in Kokkos_Cuda_ReduceScan.hpp - Cuda::size_type* m_scratch_flags; - word_size_type* m_unified_space; - - // Shall we use the shfl based reduction or not (only use it for static sized - // types of more than 128bit) - enum { - UseShflReduction = false - }; //((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) - //}; - // Some crutch to do function overloading - private: - using DummyShflReductionType = double; - using DummySHMEMReductionType = int; - - public: - Policy const& get_policy() const { return m_policy; } - - // Make the exec_range calls call to Reduce::DeviceIterateTile - template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update) const { - m_functor(i, update); - } - - template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update) const { - m_functor(TagType(), i, update); - } - - __device__ inline void operator()() const { - /* run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, - DummySHMEMReductionType>::select(1,1.0) ); - } - - __device__ inline - void run(const DummySHMEMReductionType& ) const - {*/ - const integral_nonzero_constant< - word_size_type, ValueTraits::StaticValueSize / sizeof(word_size_type)> - word_count(ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)) / - sizeof(word_size_type)); - - { - reference_type value = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - kokkos_impl_cuda_shared_memory<word_size_type>() + - threadIdx.y * word_count.value); - - // Number of blocks is bounded so that the reduction can be limited to two - // passes. Each thread block is given an approximately equal amount of - // work to perform. Accumulate the values for this block. The accumulation - // ordering does not match the final pass, but is arithmatically - // equivalent. - - const WorkRange range(m_policy, blockIdx.x, gridDim.x); - - for (Member iwork = range.begin() + threadIdx.y, iwork_end = range.end(); - iwork < iwork_end; iwork += blockDim.y) { - this->template exec_range<WorkTag>(iwork, value); - } - } - - // Doing code duplication here to fix issue #3428 - // Suspect optimizer bug?? - // Reduce with final value at blockDim.y - 1 location. - // Shortcut for length zero reduction - if (m_policy.begin() == m_policy.end()) { - // This is the final block with the final result at the final threads' - // location - - word_size_type* const shared = - kokkos_impl_cuda_shared_memory<word_size_type>() + - (blockDim.y - 1) * word_count.value; - word_size_type* const global = - m_result_ptr_device_accessible - ? reinterpret_cast<word_size_type*>(m_result_ptr) - : (m_unified_space ? m_unified_space : m_scratch_space); - - if (threadIdx.y == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), shared); - } - - if (CudaTraits::WarpSize < word_count.value) { - __syncthreads(); - } - - for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) { - global[i] = shared[i]; - } - // return ; - } - - if (m_policy.begin() != m_policy.end()) { - { - if (cuda_single_inter_block_reduce_scan<false, ReducerTypeFwd, - WorkTagFwd>( - ReducerConditional::select(m_functor, m_reducer), blockIdx.x, - gridDim.x, kokkos_impl_cuda_shared_memory<word_size_type>(), - m_scratch_space, m_scratch_flags)) { - // This is the final block with the final result at the final threads' - // location - - word_size_type* const shared = - kokkos_impl_cuda_shared_memory<word_size_type>() + - (blockDim.y - 1) * word_count.value; - word_size_type* const global = - m_result_ptr_device_accessible - ? reinterpret_cast<word_size_type*>(m_result_ptr) - : (m_unified_space ? m_unified_space : m_scratch_space); - - if (threadIdx.y == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), shared); - } - - if (CudaTraits::WarpSize < word_count.value) { - __syncthreads(); - } - - for (unsigned i = threadIdx.y; i < word_count.value; - i += blockDim.y) { - global[i] = shared[i]; - } - } - } - } - } - /* __device__ inline - void run(const DummyShflReductionType&) const - { - value_type value; - ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , - &value); - // Number of blocks is bounded so that the reduction can be limited to - two passes. - // Each thread block is given an approximately equal amount of work to - perform. - // Accumulate the values for this block. - // The accumulation ordering does not match the final pass, but is - arithmatically equivalent. - - const WorkRange range( m_policy , blockIdx.x , gridDim.x ); - - for ( Member iwork = range.begin() + threadIdx.y , iwork_end = - range.end() ; iwork < iwork_end ; iwork += blockDim.y ) { this-> template - exec_range< WorkTag >( iwork , value ); - } - - pointer_type const result = (pointer_type) (m_unified_space ? - m_unified_space : m_scratch_space) ; - - int max_active_thread = range.end()-range.begin() < blockDim.y ? - range.end() - range.begin():blockDim.y; - - max_active_thread = (max_active_thread == - 0)?blockDim.y:max_active_thread; - - value_type init; - ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , - &init); - if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTagFwd> - (value,init,ValueJoin(ReducerConditional::select(m_functor , - m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) { - const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; - if(id==0) { - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( - ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); - *result = value; - } - } - }*/ - - // Determine block size constrained by shared memory: - inline unsigned local_block_size(const FunctorType& f) { - unsigned n = CudaTraits::WarpSize * 8; - int shmem_size = - cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, WorkTag>( - f, n); - using closure_type = Impl::ParallelReduce<FunctorType, Policy, ReducerType>; - cudaFuncAttributes attr = - CudaParallelLaunch<closure_type, - LaunchBounds>::get_cuda_func_attributes(); - while ( - (n && - (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < - shmem_size)) || - (n > - static_cast<unsigned>( - Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>( - m_policy.space().impl_internal_space_instance(), attr, f, 1, - shmem_size, 0)))) { - n >>= 1; - shmem_size = cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, - WorkTag>(f, n); - } - return n; - } - - inline void execute() { - const index_type nwork = m_policy.end() - m_policy.begin(); - const bool need_device_set = ReduceFunctorHasInit<FunctorType>::value || - ReduceFunctorHasFinal<FunctorType>::value || - !m_result_ptr_host_accessible || -#ifdef KOKKOS_CUDA_ENABLE_GRAPHS - Policy::is_graph_kernel::value || -#endif - !std::is_same<ReducerType, InvalidType>::value; - if ((nwork > 0) || need_device_set) { - const int block_size = local_block_size(m_functor); - - KOKKOS_ASSERT(block_size > 0); - - // TODO: down casting these uses more space than required? - m_scratch_space = (word_size_type*)cuda_internal_scratch_space( - m_policy.space(), ValueTraits::value_size(ReducerConditional::select( - m_functor, m_reducer)) * - block_size /* block_size == max block_count */); - - // Intentionally do not downcast to word_size_type since we use Cuda - // atomics in Kokkos_Cuda_ReduceScan.hpp - m_scratch_flags = cuda_internal_scratch_flags(m_policy.space(), - sizeof(Cuda::size_type)); - m_unified_space = - reinterpret_cast<word_size_type*>(cuda_internal_scratch_unified( - m_policy.space(), - ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)))); - - // REQUIRED ( 1 , N , 1 ) - dim3 block(1, block_size, 1); - // Required grid.x <= block.y - dim3 grid(std::min(int(block.y), int((nwork + block.y - 1) / block.y)), 1, - 1); - - // TODO @graph We need to effectively insert this in to the graph - const int shmem = - UseShflReduction - ? 0 - : cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, - WorkTag>(m_functor, - block.y); - - if ((nwork == 0) -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - || Kokkos::Impl::CudaInternal::cuda_use_serial_execution() -#endif - ) { - block = dim3(1, 1, 1); - grid = dim3(1, 1, 1); - } - - CudaParallelLaunch<ParallelReduce, LaunchBounds>( - *this, grid, block, shmem, - m_policy.space().impl_internal_space_instance(), - false); // copy to device and execute - - if (!m_result_ptr_device_accessible) { - m_policy.space().fence( - "Kokkos::Impl::ParallelReduce<Cuda, RangePolicy>::execute: Result " - "Not Device Accessible"); - - if (m_result_ptr) { - if (m_unified_space) { - const int count = ValueTraits::value_count( - ReducerConditional::select(m_functor, m_reducer)); - for (int i = 0; i < count; ++i) { - m_result_ptr[i] = pointer_type(m_unified_space)[i]; - } - } else { - const int size = ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)); - DeepCopy<HostSpace, CudaSpace>(m_result_ptr, m_scratch_space, size); - } - } - } - } else { - if (m_result_ptr) { - // TODO @graph We need to effectively insert this in to the graph - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - } - } - } - - template <class ViewType> - ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value, - void*>::type = nullptr) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(InvalidType()), - m_result_ptr(arg_result.data()), - m_result_ptr_device_accessible( - MemorySpaceAccess<Kokkos::CudaSpace, - typename ViewType::memory_space>::accessible), - m_result_ptr_host_accessible( - MemorySpaceAccess<Kokkos::HostSpace, - typename ViewType::memory_space>::accessible), - m_scratch_space(nullptr), - m_scratch_flags(nullptr), - m_unified_space(nullptr) { - check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor); - } - - ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, - const ReducerType& reducer) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(reducer), - m_result_ptr(reducer.view().data()), - m_result_ptr_device_accessible( - MemorySpaceAccess<Kokkos::CudaSpace, - typename ReducerType::result_view_type:: - memory_space>::accessible), - m_result_ptr_host_accessible( - MemorySpaceAccess<Kokkos::HostSpace, - typename ReducerType::result_view_type:: - memory_space>::accessible), - m_scratch_space(nullptr), - m_scratch_flags(nullptr), - m_unified_space(nullptr) { - check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor); - } -}; - -// MDRangePolicy impl -template <class FunctorType, class ReducerType, class... Traits> -class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, - Kokkos::Cuda> { - public: - using Policy = Kokkos::MDRangePolicy<Traits...>; - - private: - using array_index_type = typename Policy::array_index_type; - using index_type = typename Policy::index_type; - - using WorkTag = typename Policy::work_tag; - using Member = typename Policy::member_type; - using LaunchBounds = typename Policy::launch_bounds; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - - public: - using pointer_type = typename ValueTraits::pointer_type; - using value_type = typename ValueTraits::value_type; - using reference_type = typename ValueTraits::reference_type; - using functor_type = FunctorType; - using size_type = Cuda::size_type; - using reducer_type = ReducerType; - - // Algorithmic constraints: blockSize is a power of two AND blockDim.y == - // blockDim.z == 1 - - const FunctorType m_functor; - const Policy m_policy; // used for workrange and nwork - const ReducerType m_reducer; - const pointer_type m_result_ptr; - const bool m_result_ptr_device_accessible; - size_type* m_scratch_space; - size_type* m_scratch_flags; - size_type* m_unified_space; - - using DeviceIteratePattern = typename Kokkos::Impl::Reduce::DeviceIterateTile< - Policy::rank, Policy, FunctorType, typename Policy::work_tag, - reference_type>; - - // Shall we use the shfl based reduction or not (only use it for static sized - // types of more than 128bit - static constexpr bool UseShflReduction = false; - //((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) - // Some crutch to do function overloading - private: - using DummyShflReductionType = double; - using DummySHMEMReductionType = int; - - public: - template <typename Policy, typename Functor> - static int max_tile_size_product(const Policy& pol, const Functor&) { - cudaFuncAttributes attr = - CudaParallelLaunch<ParallelReduce, - LaunchBounds>::get_cuda_func_attributes(); - auto const& prop = pol.space().cuda_device_prop(); - // Limits due do registers/SM - int const regs_per_sm = prop.regsPerMultiprocessor; - int const regs_per_thread = attr.numRegs; - int const max_threads_per_sm = regs_per_sm / regs_per_thread; - return std::min( - max_threads_per_sm, - static_cast<int>(Kokkos::Impl::CudaTraits::MaxHierarchicalParallelism)); - } - Policy const& get_policy() const { return m_policy; } - inline __device__ void exec_range(reference_type update) const { - Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank, Policy, FunctorType, - typename Policy::work_tag, - reference_type>(m_policy, m_functor, - update) - .exec_range(); - } - - inline __device__ void operator()() const { - /* run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, - DummySHMEMReductionType>::select(1,1.0) ); - } - - __device__ inline - void run(const DummySHMEMReductionType& ) const - {*/ - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / - sizeof(size_type)> - word_count(ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)) / - sizeof(size_type)); - - { - reference_type value = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - kokkos_impl_cuda_shared_memory<size_type>() + - threadIdx.y * word_count.value); - - // Number of blocks is bounded so that the reduction can be limited to two - // passes. Each thread block is given an approximately equal amount of - // work to perform. Accumulate the values for this block. The accumulation - // ordering does not match the final pass, but is arithmatically - // equivalent. - - this->exec_range(value); - } - - // Reduce with final value at blockDim.y - 1 location. - // Problem: non power-of-two blockDim - if (cuda_single_inter_block_reduce_scan<false, ReducerTypeFwd, WorkTagFwd>( - ReducerConditional::select(m_functor, m_reducer), blockIdx.x, - gridDim.x, kokkos_impl_cuda_shared_memory<size_type>(), - m_scratch_space, m_scratch_flags)) { - // This is the final block with the final result at the final threads' - // location - size_type* const shared = kokkos_impl_cuda_shared_memory<size_type>() + - (blockDim.y - 1) * word_count.value; - size_type* const global = - m_result_ptr_device_accessible - ? reinterpret_cast<size_type*>(m_result_ptr) - : (m_unified_space ? m_unified_space : m_scratch_space); - - if (threadIdx.y == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), shared); - } - - if (CudaTraits::WarpSize < word_count.value) { - __syncthreads(); - } - - for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) { - global[i] = shared[i]; - } - } - } - - /* __device__ inline - void run(const DummyShflReductionType&) const - { - - value_type value; - ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , - &value); - // Number of blocks is bounded so that the reduction can be limited to - two passes. - // Each thread block is given an approximately equal amount of work to - perform. - // Accumulate the values for this block. - // The accumulation ordering does not match the final pass, but is - arithmatically equivalent. - - const Member work_part = - ( ( m_policy.m_num_tiles + ( gridDim.x - 1 ) ) / gridDim.x ); //portion - of tiles handled by each block - - this-> exec_range( value ); - - pointer_type const result = (pointer_type) (m_unified_space ? - m_unified_space : m_scratch_space) ; - - int max_active_thread = work_part < blockDim.y ? work_part:blockDim.y; - max_active_thread = (max_active_thread == - 0)?blockDim.y:max_active_thread; - - value_type init; - ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , - &init); - if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTagFwd> - (value,init,ValueJoin(ReducerConditional::select(m_functor , - m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) { - const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; - if(id==0) { - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( - ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); - *result = value; - } - } - } - */ - // Determine block size constrained by shared memory: - inline unsigned local_block_size(const FunctorType& f) { - unsigned n = CudaTraits::WarpSize * 8; - int shmem_size = - cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, WorkTag>( - f, n); - using closure_type = Impl::ParallelReduce<FunctorType, Policy, ReducerType>; - cudaFuncAttributes attr = - CudaParallelLaunch<closure_type, - LaunchBounds>::get_cuda_func_attributes(); - while ( - (n && - (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < - shmem_size)) || - (n > - static_cast<unsigned>( - Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>( - m_policy.space().impl_internal_space_instance(), attr, f, 1, - shmem_size, 0)))) { - n >>= 1; - shmem_size = cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, - WorkTag>(f, n); - } - return n; - } - - inline void execute() { - const auto nwork = m_policy.m_num_tiles; - if (nwork) { - int block_size = m_policy.m_prod_tile_dims; - // CONSTRAINT: Algorithm requires block_size >= product of tile dimensions - // Nearest power of two - int exponent_pow_two = std::ceil(std::log2(block_size)); - block_size = std::pow(2, exponent_pow_two); - int suggested_blocksize = local_block_size(m_functor); - - block_size = (block_size > suggested_blocksize) - ? block_size - : suggested_blocksize; // Note: block_size must be less - // than or equal to 512 - - m_scratch_space = cuda_internal_scratch_space( - m_policy.space(), ValueTraits::value_size(ReducerConditional::select( - m_functor, m_reducer)) * - block_size /* block_size == max block_count */); - m_scratch_flags = - cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type)); - m_unified_space = cuda_internal_scratch_unified( - m_policy.space(), ValueTraits::value_size(ReducerConditional::select( - m_functor, m_reducer))); - - // REQUIRED ( 1 , N , 1 ) - const dim3 block(1, block_size, 1); - // Required grid.x <= block.y - const dim3 grid(std::min(int(block.y), int(nwork)), 1, 1); - - // TODO @graph We need to effectively insert this in to the graph - const int shmem = - UseShflReduction - ? 0 - : cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, - WorkTag>(m_functor, - block.y); - - CudaParallelLaunch<ParallelReduce, LaunchBounds>( - *this, grid, block, shmem, - m_policy.space().impl_internal_space_instance(), - false); // copy to device and execute - - if (!m_result_ptr_device_accessible) { - m_policy.space().fence( - "Kokkos::Impl::ParallelReduce<Cuda, MDRangePolicy>::execute: " - "Result Not Device Accessible"); - - if (m_result_ptr) { - if (m_unified_space) { - const int count = ValueTraits::value_count( - ReducerConditional::select(m_functor, m_reducer)); - for (int i = 0; i < count; ++i) { - m_result_ptr[i] = pointer_type(m_unified_space)[i]; - } - } else { - const int size = ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)); - DeepCopy<HostSpace, CudaSpace>(m_result_ptr, m_scratch_space, size); - } - } - } - } else { - if (m_result_ptr) { - // TODO @graph We need to effectively insert this in to the graph - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - } - } - } - - template <class ViewType> - ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value, - void*>::type = nullptr) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(InvalidType()), - m_result_ptr(arg_result.data()), - m_result_ptr_device_accessible( - MemorySpaceAccess<Kokkos::CudaSpace, - typename ViewType::memory_space>::accessible), - m_scratch_space(nullptr), - m_scratch_flags(nullptr), - m_unified_space(nullptr) { - check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor); - } - - ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, - const ReducerType& reducer) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(reducer), - m_result_ptr(reducer.view().data()), - m_result_ptr_device_accessible( - MemorySpaceAccess<Kokkos::CudaSpace, - typename ReducerType::result_view_type:: - memory_space>::accessible), - m_scratch_space(nullptr), - m_scratch_flags(nullptr), - m_unified_space(nullptr) { - check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor); - } -}; - -//---------------------------------------------------------------------------- - -template <class FunctorType, class ReducerType, class... Properties> -class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, - ReducerType, Kokkos::Cuda> { - public: - using Policy = TeamPolicy<Properties...>; - - private: - using Member = typename Policy::member_type; - using WorkTag = typename Policy::work_tag; - using LaunchBounds = typename Policy::launch_bounds; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - using value_type = typename ValueTraits::value_type; - - public: - using functor_type = FunctorType; - using size_type = Cuda::size_type; - using reducer_type = ReducerType; - - enum : bool { - UseShflReduction = (true && (ValueTraits::StaticValueSize != 0)) - }; - - private: - using DummyShflReductionType = double; - using DummySHMEMReductionType = int; - - // Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == - // blockDim.z == 1 shared memory utilization: - // - // [ global reduce space ] - // [ team reduce space ] - // [ team shared space ] - // - - const FunctorType m_functor; - const Policy m_policy; - const ReducerType m_reducer; - const pointer_type m_result_ptr; - const bool m_result_ptr_device_accessible; - const bool m_result_ptr_host_accessible; - size_type* m_scratch_space; - size_type* m_scratch_flags; - size_type* m_unified_space; - size_type m_team_begin; - size_type m_shmem_begin; - size_type m_shmem_size; - void* m_scratch_ptr[2]; - int m_scratch_size[2]; - int m_scratch_pool_id = -1; - int32_t* m_scratch_locks; - const size_type m_league_size; - int m_team_size; - const size_type m_vector_size; - - template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_team(const Member& member, reference_type update) const { - m_functor(member, update); - } - - template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_team(const Member& member, reference_type update) const { - m_functor(TagType(), member, update); - } - - public: - Policy const& get_policy() const { return m_policy; } - - __device__ inline void operator()() const { - int64_t threadid = 0; - if (m_scratch_size[1] > 0) { - threadid = cuda_get_scratch_index(m_league_size, m_scratch_locks); - } - - run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, - DummySHMEMReductionType>::select(1, 1.0), - threadid); - if (m_scratch_size[1] > 0) { - cuda_release_scratch_index(m_scratch_locks, threadid); - } - } - - __device__ inline void run(const DummySHMEMReductionType&, - const int& threadid) const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / - sizeof(size_type)> - word_count(ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)) / - sizeof(size_type)); - - reference_type value = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - kokkos_impl_cuda_shared_memory<size_type>() + - threadIdx.y * word_count.value); - - // Iterate this block through the league - const int int_league_size = (int)m_league_size; - for (int league_rank = blockIdx.x; league_rank < int_league_size; - league_rank += gridDim.x) { - this->template exec_team<WorkTag>( - Member(kokkos_impl_cuda_shared_memory<char>() + m_team_begin, - m_shmem_begin, m_shmem_size, - (void*)(((char*)m_scratch_ptr[1]) + - ptrdiff_t(threadid / (blockDim.x * blockDim.y)) * - m_scratch_size[1]), - m_scratch_size[1], league_rank, m_league_size), - value); - } - - // Reduce with final value at blockDim.y - 1 location. - // Doing code duplication here to fix issue #3428 - // Suspect optimizer bug?? - if (m_league_size == 0) { - // This is the final block with the final result at the final threads' - // location - - size_type* const shared = kokkos_impl_cuda_shared_memory<size_type>() + - (blockDim.y - 1) * word_count.value; - size_type* const global = - m_result_ptr_device_accessible - ? reinterpret_cast<size_type*>(m_result_ptr) - : (m_unified_space ? m_unified_space : m_scratch_space); - - if (threadIdx.y == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), shared); - } - - if (CudaTraits::WarpSize < word_count.value) { - __syncthreads(); - } - - for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) { - global[i] = shared[i]; - } - } - - if (m_league_size != 0) { - if (cuda_single_inter_block_reduce_scan<false, FunctorType, WorkTag>( - ReducerConditional::select(m_functor, m_reducer), blockIdx.x, - gridDim.x, kokkos_impl_cuda_shared_memory<size_type>(), - m_scratch_space, m_scratch_flags)) { - // This is the final block with the final result at the final threads' - // location - - size_type* const shared = kokkos_impl_cuda_shared_memory<size_type>() + - (blockDim.y - 1) * word_count.value; - size_type* const global = - m_result_ptr_device_accessible - ? reinterpret_cast<size_type*>(m_result_ptr) - : (m_unified_space ? m_unified_space : m_scratch_space); - - if (threadIdx.y == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), shared); - } - - if (CudaTraits::WarpSize < word_count.value) { - __syncthreads(); - } - - for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) { - global[i] = shared[i]; - } - } - } - } - - __device__ inline void run(const DummyShflReductionType&, - const int& threadid) const { - value_type value; - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), &value); - - // Iterate this block through the league - const int int_league_size = (int)m_league_size; - for (int league_rank = blockIdx.x; league_rank < int_league_size; - league_rank += gridDim.x) { - this->template exec_team<WorkTag>( - Member(kokkos_impl_cuda_shared_memory<char>() + m_team_begin, - m_shmem_begin, m_shmem_size, - (void*)(((char*)m_scratch_ptr[1]) + - ptrdiff_t(threadid / (blockDim.x * blockDim.y)) * - m_scratch_size[1]), - m_scratch_size[1], league_rank, m_league_size), - value); - } - - pointer_type const result = - m_result_ptr_device_accessible - ? m_result_ptr - : (pointer_type)(m_unified_space ? m_unified_space - : m_scratch_space); - - value_type init; - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), &init); - - if (int_league_size == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), (void*)&value); - *result = value; - } else if ( - Impl::cuda_inter_block_reduction<FunctorType, ValueJoin, WorkTag>( - value, init, - ValueJoin(ReducerConditional::select(m_functor, m_reducer)), - m_scratch_space, result, m_scratch_flags, blockDim.y) - // This breaks a test - // Kokkos::Impl::CudaReductionsFunctor<FunctorType,WorkTag,false,true>::scalar_inter_block_reduction(ReducerConditional::select(m_functor - // , m_reducer) , blockIdx.x , gridDim.x , - // kokkos_impl_cuda_shared_memory<size_type>() , - // m_scratch_space , m_scratch_flags) - ) { - const unsigned id = threadIdx.y * blockDim.x + threadIdx.x; - if (id == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), (void*)&value); - *result = value; - } - } - } - - inline void execute() { - const bool is_empty_range = m_league_size == 0 || m_team_size == 0; - const bool need_device_set = ReduceFunctorHasInit<FunctorType>::value || - ReduceFunctorHasFinal<FunctorType>::value || - !m_result_ptr_host_accessible || -#ifdef KOKKOS_CUDA_ENABLE_GRAPHS - Policy::is_graph_kernel::value || -#endif - !std::is_same<ReducerType, InvalidType>::value; - if (!is_empty_range || need_device_set) { - const int block_count = - UseShflReduction ? std::min(m_league_size, size_type(1024 * 32)) - : std::min(int(m_league_size), m_team_size); - - m_scratch_space = cuda_internal_scratch_space( - m_policy.space(), ValueTraits::value_size(ReducerConditional::select( - m_functor, m_reducer)) * - block_count); - m_scratch_flags = - cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type)); - m_unified_space = cuda_internal_scratch_unified( - m_policy.space(), ValueTraits::value_size(ReducerConditional::select( - m_functor, m_reducer))); - - dim3 block(m_vector_size, m_team_size, 1); - dim3 grid(block_count, 1, 1); - const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size; - - if (is_empty_range -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - || Kokkos::Impl::CudaInternal::cuda_use_serial_execution() -#endif - ) { - block = dim3(1, 1, 1); - grid = dim3(1, 1, 1); - } - - CudaParallelLaunch<ParallelReduce, LaunchBounds>( - *this, grid, block, shmem_size_total, - m_policy.space().impl_internal_space_instance(), - true); // copy to device and execute - - if (!m_result_ptr_device_accessible) { - m_policy.space().fence( - "Kokkos::Impl::ParallelReduce<Cuda, TeamPolicy>::execute: Result " - "Not Device Accessible"); - - if (m_result_ptr) { - if (m_unified_space) { - const int count = ValueTraits::value_count( - ReducerConditional::select(m_functor, m_reducer)); - for (int i = 0; i < count; ++i) { - m_result_ptr[i] = pointer_type(m_unified_space)[i]; - } - } else { - const int size = ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)); - DeepCopy<HostSpace, CudaSpace>(m_result_ptr, m_scratch_space, size); - } - } - } - } else { - if (m_result_ptr) { - // TODO @graph We need to effectively insert this in to the graph - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - } - } - } - - template <class ViewType> - ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value, - void*>::type = nullptr) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(InvalidType()), - m_result_ptr(arg_result.data()), - m_result_ptr_device_accessible( - MemorySpaceAccess<Kokkos::CudaSpace, - typename ViewType::memory_space>::accessible), - m_result_ptr_host_accessible( - MemorySpaceAccess<Kokkos::HostSpace, - typename ViewType::memory_space>::accessible), - m_scratch_space(nullptr), - m_scratch_flags(nullptr), - m_unified_space(nullptr), - m_team_begin(0), - m_shmem_begin(0), - m_shmem_size(0), - m_scratch_ptr{nullptr, nullptr}, - m_league_size(arg_policy.league_size()), - m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()) { - cudaFuncAttributes attr = - CudaParallelLaunch<ParallelReduce, - LaunchBounds>::get_cuda_func_attributes(); - m_team_size = - m_team_size >= 0 - ? m_team_size - : Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>( - m_policy.space().impl_internal_space_instance(), attr, - m_functor, m_vector_size, m_policy.team_scratch_size(0), - m_policy.thread_scratch_size(0)) / - m_vector_size; - - m_team_begin = - UseShflReduction - ? 0 - : cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, - WorkTag>(arg_functor, - m_team_size); - m_shmem_begin = sizeof(double) * (m_team_size + 2); - m_shmem_size = - m_policy.scratch_size(0, m_team_size) + - FunctorTeamShmemSize<FunctorType>::value(arg_functor, m_team_size); - m_scratch_size[0] = m_shmem_size; - m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); - m_scratch_locks = - m_policy.space().impl_internal_space_instance()->m_scratch_locks; - if (m_team_size <= 0) { - m_scratch_ptr[1] = nullptr; - } else { - auto scratch_ptr_id = - m_policy.space() - .impl_internal_space_instance() - ->resize_team_scratch_space( - static_cast<std::int64_t>(m_scratch_size[1]) * - (std::min( - static_cast<std::int64_t>(Cuda::concurrency() / - (m_team_size * m_vector_size)), - static_cast<std::int64_t>(m_league_size)))); - m_scratch_ptr[1] = scratch_ptr_id.first; - m_scratch_pool_id = scratch_ptr_id.second; - } - - // The global parallel_reduce does not support vector_length other than 1 at - // the moment - if ((arg_policy.impl_vector_length() > 1) && !UseShflReduction) - Impl::throw_runtime_exception( - "Kokkos::parallel_reduce with a TeamPolicy using a vector length of " - "greater than 1 is not currently supported for CUDA for dynamic " - "sized reduction types."); - - if ((m_team_size < 32) && !UseShflReduction) - Impl::throw_runtime_exception( - "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller " - "than 32 is not currently supported with CUDA for dynamic sized " - "reduction types."); - - // Functor's reduce memory, team scan memory, and team shared memory depend - // upon team size. - - const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size; - - if (!Kokkos::Impl::is_integral_power_of_two(m_team_size) && - !UseShflReduction) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); - } - - if (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < - shmem_size_total) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much " - "L0 scratch memory")); - } - - if (int(m_team_size) > - arg_policy.team_size_max(m_functor, m_reducer, ParallelReduceTag())) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too " - "large team size.")); - } - } - - ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, - const ReducerType& reducer) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(reducer), - m_result_ptr(reducer.view().data()), - m_result_ptr_device_accessible( - MemorySpaceAccess<Kokkos::CudaSpace, - typename ReducerType::result_view_type:: - memory_space>::accessible), - m_result_ptr_host_accessible( - MemorySpaceAccess<Kokkos::HostSpace, - typename ReducerType::result_view_type:: - memory_space>::accessible), - m_scratch_space(nullptr), - m_scratch_flags(nullptr), - m_unified_space(nullptr), - m_team_begin(0), - m_shmem_begin(0), - m_shmem_size(0), - m_scratch_ptr{nullptr, nullptr}, - m_league_size(arg_policy.league_size()), - m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()) { - cudaFuncAttributes attr = - CudaParallelLaunch<ParallelReduce, - LaunchBounds>::get_cuda_func_attributes(); - - // Valid team size not provided, deduce team size - m_team_size = - m_team_size >= 0 - ? m_team_size - : Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>( - m_policy.space().impl_internal_space_instance(), attr, - m_functor, m_vector_size, m_policy.team_scratch_size(0), - m_policy.thread_scratch_size(0)) / - m_vector_size; - - m_team_begin = - UseShflReduction - ? 0 - : cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, - WorkTag>(arg_functor, - m_team_size); - m_shmem_begin = sizeof(double) * (m_team_size + 2); - m_shmem_size = - m_policy.scratch_size(0, m_team_size) + - FunctorTeamShmemSize<FunctorType>::value(arg_functor, m_team_size); - m_scratch_size[0] = m_shmem_size; - m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); - m_scratch_locks = - m_policy.space().impl_internal_space_instance()->m_scratch_locks; - if (m_team_size <= 0) { - m_scratch_ptr[1] = nullptr; - } else { - auto scratch_ptr_id = - m_policy.space() - .impl_internal_space_instance() - ->resize_team_scratch_space( - static_cast<std::int64_t>(m_scratch_size[1]) * - (std::min( - static_cast<std::int64_t>(Cuda::concurrency() / - (m_team_size * m_vector_size)), - static_cast<std::int64_t>(m_league_size)))); - m_scratch_ptr[1] = scratch_ptr_id.first; - m_scratch_pool_id = scratch_ptr_id.second; - } - - // The global parallel_reduce does not support vector_length other than 1 at - // the moment - if ((arg_policy.impl_vector_length() > 1) && !UseShflReduction) - Impl::throw_runtime_exception( - "Kokkos::parallel_reduce with a TeamPolicy using a vector length of " - "greater than 1 is not currently supported for CUDA for dynamic " - "sized reduction types."); - - if ((m_team_size < 32) && !UseShflReduction) - Impl::throw_runtime_exception( - "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller " - "than 32 is not currently supported with CUDA for dynamic sized " - "reduction types."); - - // Functor's reduce memory, team scan memory, and team shared memory depend - // upon team size. - - const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size; - - if ((!Kokkos::Impl::is_integral_power_of_two(m_team_size) && - !UseShflReduction) || - m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < - shmem_size_total) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); - } - - size_type team_size_max = - Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>( - m_policy.space().impl_internal_space_instance(), attr, m_functor, - m_vector_size, m_policy.team_scratch_size(0), - m_policy.thread_scratch_size(0)) / - m_vector_size; - - if ((int)m_team_size > (int)team_size_max) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too " - "large team size.")); - } - } - - ~ParallelReduce() { - if (m_scratch_pool_id >= 0) { - m_policy.space() - .impl_internal_space_instance() - ->m_team_scratch_pool[m_scratch_pool_id] = 0; - } - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template <class FunctorType, class... Traits> -class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> { - public: - using Policy = Kokkos::RangePolicy<Traits...>; - - private: - using Member = typename Policy::member_type; - using WorkTag = typename Policy::work_tag; - using WorkRange = typename Policy::WorkRange; - using LaunchBounds = typename Policy::launch_bounds; - - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; - - public: - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - using functor_type = FunctorType; - using size_type = Cuda::size_type; - - private: - // Algorithmic constraints: - // (a) blockDim.y is a power of two - // (b) blockDim.y == blockDim.z == 1 - // (c) gridDim.x <= blockDim.y * blockDim.y - // (d) gridDim.y == gridDim.z == 1 - - const FunctorType m_functor; - const Policy m_policy; - size_type* m_scratch_space; - size_type* m_scratch_flags; - size_type m_final; -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - bool m_run_serial; -#endif - - template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update, - const bool final_result) const { - m_functor(i, update, final_result); - } - - template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update, - const bool final_result) const { - m_functor(TagType(), i, update, final_result); - } - - //---------------------------------------- - - __device__ inline void initial() const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / - sizeof(size_type)> - word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); - - size_type* const shared_value = - kokkos_impl_cuda_shared_memory<size_type>() + - word_count.value * threadIdx.y; - - ValueInit::init(m_functor, shared_value); - - // Number of blocks is bounded so that the reduction can be limited to two - // passes. Each thread block is given an approximately equal amount of work - // to perform. Accumulate the values for this block. The accumulation - // ordering does not match the final pass, but is arithmatically equivalent. - - const WorkRange range(m_policy, blockIdx.x, gridDim.x); - - for (Member iwork = range.begin() + threadIdx.y, iwork_end = range.end(); - iwork < iwork_end; iwork += blockDim.y) { - this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_value), false); - } - - // Reduce and scan, writing out scan of blocks' totals and block-groups' - // totals. Blocks' scan values are written to 'blockIdx.x' location. - // Block-groups' scan values are at: i = ( j * blockDim.y - 1 ) for i < - // gridDim.x - cuda_single_inter_block_reduce_scan<true, FunctorType, WorkTag>( - m_functor, blockIdx.x, gridDim.x, - kokkos_impl_cuda_shared_memory<size_type>(), m_scratch_space, - m_scratch_flags); - } - - //---------------------------------------- - - __device__ inline void final() const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / - sizeof(size_type)> - word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); - - // Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , - // value[2] , ... } - size_type* const shared_data = kokkos_impl_cuda_shared_memory<size_type>(); - size_type* const shared_prefix = - shared_data + word_count.value * threadIdx.y; - size_type* const shared_accum = - shared_data + word_count.value * (blockDim.y + 1); - - // Starting value for this thread block is the previous block's total. - if (blockIdx.x) { - size_type* const block_total = - m_scratch_space + word_count.value * (blockIdx.x - 1); - for (unsigned i = threadIdx.y; i < word_count.value; ++i) { - shared_accum[i] = block_total[i]; - } - } else if (0 == threadIdx.y) { - ValueInit::init(m_functor, shared_accum); - } - - const WorkRange range(m_policy, blockIdx.x, gridDim.x); - - for (typename Policy::member_type iwork_base = range.begin(); - iwork_base < range.end(); iwork_base += blockDim.y) { - unsigned MASK = __activemask(); - const typename Policy::member_type iwork = iwork_base + threadIdx.y; - - __syncthreads(); // Don't overwrite previous iteration values until they - // are used - - ValueInit::init(m_functor, shared_prefix + word_count.value); - - // Copy previous block's accumulation total into thread[0] prefix and - // inclusive scan value of this block - for (unsigned i = threadIdx.y; i < word_count.value; ++i) { - shared_data[i + word_count.value] = shared_data[i] = shared_accum[i]; - } - __syncwarp(MASK); - if (CudaTraits::WarpSize < word_count.value) { - __syncthreads(); - } // Protect against large scan values. - - // Call functor to accumulate inclusive scan value for this work item - if (iwork < range.end()) { - this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_prefix + word_count.value), - false); - } - - // Scan block values into locations shared_data[1..blockDim.y] - cuda_intra_block_reduce_scan<true, FunctorType, WorkTag>( - m_functor, - typename ValueTraits::pointer_type(shared_data + word_count.value)); - - { - size_type* const block_total = - shared_data + word_count.value * blockDim.y; - for (unsigned i = threadIdx.y; i < word_count.value; ++i) { - shared_accum[i] = block_total[i]; - } - } - - // Call functor with exclusive scan value - if (iwork < range.end()) { - this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_prefix), true); - } - } - } - - public: - Policy const& get_policy() const { return m_policy; } - - //---------------------------------------- - - __device__ inline void operator()() const { -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - if (m_run_serial) { - typename ValueTraits::value_type value; - ValueInit::init(m_functor, (void*)&value); - const WorkRange range(m_policy, blockIdx.x, gridDim.x); - - for (typename Policy::member_type iwork_base = range.begin(); - iwork_base < range.end(); iwork_base++) { - this->template exec_range<WorkTag>(iwork_base, value, true); - } - } else { -#endif - if (!m_final) { - initial(); - } else { - final(); - } -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - } -#endif - } - - // Determine block size constrained by shared memory: - inline unsigned local_block_size(const FunctorType& f) { - // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 - // (16 warps) gridDim.x <= blockDim.y * blockDim.y - // - // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit - // testing - - unsigned n = CudaTraits::WarpSize * 4; - while (n && - unsigned(m_policy.space() - .impl_internal_space_instance() - ->m_maxShmemPerBlock) < - cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, - WorkTag>(f, n)) { - n >>= 1; - } - return n; - } - - inline void execute() { - const auto nwork = m_policy.end() - m_policy.begin(); - if (nwork) { - enum { GridMaxComputeCapability_2x = 0x0ffff }; - - const int block_size = local_block_size(m_functor); - KOKKOS_ASSERT(block_size > 0); - - const int grid_max = - (block_size * block_size) < GridMaxComputeCapability_2x - ? (block_size * block_size) - : GridMaxComputeCapability_2x; - - // At most 'max_grid' blocks: - const int max_grid = - std::min(int(grid_max), int((nwork + block_size - 1) / block_size)); - - // How much work per block: - const int work_per_block = (nwork + max_grid - 1) / max_grid; - - // How many block are really needed for this much work: - const int grid_x = (nwork + work_per_block - 1) / work_per_block; - - m_scratch_space = cuda_internal_scratch_space( - m_policy.space(), ValueTraits::value_size(m_functor) * grid_x); - m_scratch_flags = - cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type) * 1); - - dim3 grid(grid_x, 1, 1); - dim3 block(1, block_size, 1); // REQUIRED DIMENSIONS ( 1 , N , 1 ) - const int shmem = ValueTraits::value_size(m_functor) * (block_size + 2); - -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - if (m_run_serial) { - block = dim3(1, 1, 1); - grid = dim3(1, 1, 1); - } else { -#endif - m_final = false; - CudaParallelLaunch<ParallelScan, LaunchBounds>( - *this, grid, block, shmem, - m_policy.space().impl_internal_space_instance(), - false); // copy to device and execute -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - } -#endif - m_final = true; - CudaParallelLaunch<ParallelScan, LaunchBounds>( - *this, grid, block, shmem, - m_policy.space().impl_internal_space_instance(), - false); // copy to device and execute - } - } - - ParallelScan(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), - m_policy(arg_policy), - m_scratch_space(nullptr), - m_scratch_flags(nullptr), - m_final(false) -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - , - m_run_serial(Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) -#endif - { - } -}; - -//---------------------------------------------------------------------------- -template <class FunctorType, class ReturnType, class... Traits> -class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, - ReturnType, Kokkos::Cuda> { - public: - using Policy = Kokkos::RangePolicy<Traits...>; - - private: - using Member = typename Policy::member_type; - using WorkTag = typename Policy::work_tag; - using WorkRange = typename Policy::WorkRange; - using LaunchBounds = typename Policy::launch_bounds; - - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; - - public: - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - using functor_type = FunctorType; - using size_type = Cuda::size_type; - - private: - // Algorithmic constraints: - // (a) blockDim.y is a power of two - // (b) blockDim.y == blockDim.z == 1 - // (c) gridDim.x <= blockDim.y * blockDim.y - // (d) gridDim.y == gridDim.z == 1 - - const FunctorType m_functor; - const Policy m_policy; - size_type* m_scratch_space; - size_type* m_scratch_flags; - size_type m_final; - ReturnType& m_returnvalue; -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - bool m_run_serial; -#endif - - template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update, - const bool final_result) const { - m_functor(i, update, final_result); - } - - template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update, - const bool final_result) const { - m_functor(TagType(), i, update, final_result); - } - - //---------------------------------------- - - __device__ inline void initial() const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / - sizeof(size_type)> - word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); - - size_type* const shared_value = - kokkos_impl_cuda_shared_memory<size_type>() + - word_count.value * threadIdx.y; - - ValueInit::init(m_functor, shared_value); - - // Number of blocks is bounded so that the reduction can be limited to two - // passes. Each thread block is given an approximately equal amount of work - // to perform. Accumulate the values for this block. The accumulation - // ordering does not match the final pass, but is arithmatically equivalent. - - const WorkRange range(m_policy, blockIdx.x, gridDim.x); - - for (Member iwork = range.begin() + threadIdx.y, iwork_end = range.end(); - iwork < iwork_end; iwork += blockDim.y) { - this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_value), false); - } - - // Reduce and scan, writing out scan of blocks' totals and block-groups' - // totals. Blocks' scan values are written to 'blockIdx.x' location. - // Block-groups' scan values are at: i = ( j * blockDim.y - 1 ) for i < - // gridDim.x - cuda_single_inter_block_reduce_scan<true, FunctorType, WorkTag>( - m_functor, blockIdx.x, gridDim.x, - kokkos_impl_cuda_shared_memory<size_type>(), m_scratch_space, - m_scratch_flags); - } - - //---------------------------------------- - - __device__ inline void final() const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / - sizeof(size_type)> - word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); - - // Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , - // value[2] , ... } - size_type* const shared_data = kokkos_impl_cuda_shared_memory<size_type>(); - size_type* const shared_prefix = - shared_data + word_count.value * threadIdx.y; - size_type* const shared_accum = - shared_data + word_count.value * (blockDim.y + 1); - - // Starting value for this thread block is the previous block's total. - if (blockIdx.x) { - size_type* const block_total = - m_scratch_space + word_count.value * (blockIdx.x - 1); - for (unsigned i = threadIdx.y; i < word_count.value; ++i) { - shared_accum[i] = block_total[i]; - } - } else if (0 == threadIdx.y) { - ValueInit::init(m_functor, shared_accum); - } - - const WorkRange range(m_policy, blockIdx.x, gridDim.x); - - for (typename Policy::member_type iwork_base = range.begin(); - iwork_base < range.end(); iwork_base += blockDim.y) { - unsigned MASK = __activemask(); - - const typename Policy::member_type iwork = iwork_base + threadIdx.y; - - __syncthreads(); // Don't overwrite previous iteration values until they - // are used - - ValueInit::init(m_functor, shared_prefix + word_count.value); - - // Copy previous block's accumulation total into thread[0] prefix and - // inclusive scan value of this block - for (unsigned i = threadIdx.y; i < word_count.value; ++i) { - shared_data[i + word_count.value] = shared_data[i] = shared_accum[i]; - } - - __syncwarp(MASK); - if (CudaTraits::WarpSize < word_count.value) { - __syncthreads(); - } // Protect against large scan values. - - // Call functor to accumulate inclusive scan value for this work item - if (iwork < range.end()) { - this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_prefix + word_count.value), - false); - } - - // Scan block values into locations shared_data[1..blockDim.y] - cuda_intra_block_reduce_scan<true, FunctorType, WorkTag>( - m_functor, - typename ValueTraits::pointer_type(shared_data + word_count.value)); - - { - size_type* const block_total = - shared_data + word_count.value * blockDim.y; - for (unsigned i = threadIdx.y; i < word_count.value; ++i) { - shared_accum[i] = block_total[i]; - } - } - - // Call functor with exclusive scan value - if (iwork < range.end()) { - this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_prefix), true); - } - } - } - - public: - Policy const& get_policy() const { return m_policy; } - - //---------------------------------------- - - __device__ inline void operator()() const { -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - if (m_run_serial) { - typename ValueTraits::value_type value; - ValueInit::init(m_functor, (void*)&value); - const WorkRange range(m_policy, blockIdx.x, gridDim.x); - - for (typename Policy::member_type iwork_base = range.begin(); - iwork_base < range.end(); iwork_base++) { - this->template exec_range<WorkTag>(iwork_base, value, true); - } - *((typename ValueTraits::value_type*)m_scratch_space) = value; - } else { -#endif - if (!m_final) { - initial(); - } else { - final(); - } -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - } -#endif - } - - // Determine block size constrained by shared memory: - inline unsigned local_block_size(const FunctorType& f) { - // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 - // (16 warps) gridDim.x <= blockDim.y * blockDim.y - // - // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit - // testing - - unsigned n = CudaTraits::WarpSize * 4; - while (n && - unsigned(m_policy.space() - .impl_internal_space_instance() - ->m_maxShmemPerBlock) < - cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, - WorkTag>(f, n)) { - n >>= 1; - } - return n; - } - - inline void execute() { - const auto nwork = m_policy.end() - m_policy.begin(); - if (nwork) { - enum { GridMaxComputeCapability_2x = 0x0ffff }; - - const int block_size = local_block_size(m_functor); - KOKKOS_ASSERT(block_size > 0); - - const int grid_max = - (block_size * block_size) < GridMaxComputeCapability_2x - ? (block_size * block_size) - : GridMaxComputeCapability_2x; - - // At most 'max_grid' blocks: - const int max_grid = - std::min(int(grid_max), int((nwork + block_size - 1) / block_size)); - - // How much work per block: - const int work_per_block = (nwork + max_grid - 1) / max_grid; - - // How many block are really needed for this much work: - const int grid_x = (nwork + work_per_block - 1) / work_per_block; - - m_scratch_space = cuda_internal_scratch_space( - m_policy.space(), ValueTraits::value_size(m_functor) * grid_x); - m_scratch_flags = - cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type) * 1); - - dim3 grid(grid_x, 1, 1); - dim3 block(1, block_size, 1); // REQUIRED DIMENSIONS ( 1 , N , 1 ) - const int shmem = ValueTraits::value_size(m_functor) * (block_size + 2); - -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - if (m_run_serial) { - block = dim3(1, 1, 1); - grid = dim3(1, 1, 1); - } else { -#endif - - m_final = false; - CudaParallelLaunch<ParallelScanWithTotal, LaunchBounds>( - *this, grid, block, shmem, - m_policy.space().impl_internal_space_instance(), - false); // copy to device and execute -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - } -#endif - m_final = true; - CudaParallelLaunch<ParallelScanWithTotal, LaunchBounds>( - *this, grid, block, shmem, - m_policy.space().impl_internal_space_instance(), - false); // copy to device and execute - - const int size = ValueTraits::value_size(m_functor); -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - if (m_run_serial) - DeepCopy<HostSpace, CudaSpace>(&m_returnvalue, m_scratch_space, size); - else -#endif - DeepCopy<HostSpace, CudaSpace>( - &m_returnvalue, m_scratch_space + (grid_x - 1) * size / sizeof(int), - size); - } - } - - ParallelScanWithTotal(const FunctorType& arg_functor, - const Policy& arg_policy, ReturnType& arg_returnvalue) - : m_functor(arg_functor), - m_policy(arg_policy), - m_scratch_space(nullptr), - m_scratch_flags(nullptr), - m_final(false), - m_returnvalue(arg_returnvalue) -#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION - , - m_run_serial(Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) -#endif - { - } -}; - -} // namespace Impl - -} // namespace Kokkos - -#endif /* defined(KOKKOS_ENABLE_CUDA) */ -#endif /* #ifndef KOKKOS_CUDA_PARALLEL_HPP */ diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e586bb4cc6c58e41ad3eff95ddf05f6e0a5cb8cf --- /dev/null +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp @@ -0,0 +1,477 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_PARALLEL_MD_RANGE_HPP +#define KOKKOS_CUDA_PARALLEL_MD_RANGE_HPP + +#include <Kokkos_Macros.hpp> +#if defined(KOKKOS_ENABLE_CUDA) + +#include <algorithm> +#include <string> + +#include <Kokkos_Parallel.hpp> + +#include <Cuda/Kokkos_Cuda_KernelLaunch.hpp> +#include <Cuda/Kokkos_Cuda_ReduceScan.hpp> +#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp> +#include <Kokkos_MinMaxClamp.hpp> + +#include <impl/Kokkos_Tools.hpp> +#include <typeinfo> + +#include <KokkosExp_MDRangePolicy.hpp> +#include <impl/KokkosExp_IterateTileGPU.hpp> + +namespace Kokkos { +namespace Impl { + +template <class FunctorType, class... Traits> +class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> { + public: + using Policy = Kokkos::MDRangePolicy<Traits...>; + using functor_type = FunctorType; + + private: + using RP = Policy; + using array_index_type = typename Policy::array_index_type; + using index_type = typename Policy::index_type; + using LaunchBounds = typename Policy::launch_bounds; + + const FunctorType m_functor; + const Policy m_rp; + + public: + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy& pol, const Functor&) { + cudaFuncAttributes attr = + CudaParallelLaunch<ParallelFor, + LaunchBounds>::get_cuda_func_attributes(); + auto const& prop = pol.space().cuda_device_prop(); + // Limits due to registers/SM, MDRange doesn't have + // shared memory constraints + int const regs_per_sm = prop.regsPerMultiprocessor; + int const regs_per_thread = attr.numRegs; + int const max_threads_per_sm = regs_per_sm / regs_per_thread; + return std::min( + max_threads_per_sm, + static_cast<int>(Kokkos::Impl::CudaTraits::MaxHierarchicalParallelism)); + } + Policy const& get_policy() const { return m_rp; } + inline __device__ void operator()() const { + Kokkos::Impl::DeviceIterateTile<Policy::rank, Policy, FunctorType, + typename Policy::work_tag>(m_rp, m_functor) + .exec_range(); + } + + inline void execute() const { + if (m_rp.m_num_tiles == 0) return; + const auto maxblocks = cuda_internal_maximum_grid_count(); + if (RP::rank == 2) { + const dim3 block(m_rp.m_tile[0], m_rp.m_tile[1], 1); + KOKKOS_ASSERT(block.x > 0); + KOKKOS_ASSERT(block.y > 0); + const dim3 grid( + std::min<array_index_type>( + (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x, + maxblocks[0]), + std::min<array_index_type>( + (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y, + maxblocks[1]), + 1); + CudaParallelLaunch<ParallelFor, LaunchBounds>( + *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), + false); + } else if (RP::rank == 3) { + const dim3 block(m_rp.m_tile[0], m_rp.m_tile[1], m_rp.m_tile[2]); + KOKKOS_ASSERT(block.x > 0); + KOKKOS_ASSERT(block.y > 0); + KOKKOS_ASSERT(block.z > 0); + const dim3 grid( + std::min<array_index_type>( + (m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1) / block.x, + maxblocks[0]), + std::min<array_index_type>( + (m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1) / block.y, + maxblocks[1]), + std::min<array_index_type>( + (m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1) / block.z, + maxblocks[2])); + CudaParallelLaunch<ParallelFor, LaunchBounds>( + *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), + false); + } else if (RP::rank == 4) { + // id0,id1 encoded within threadIdx.x; id2 to threadIdx.y; id3 to + // threadIdx.z + const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1], m_rp.m_tile[2], + m_rp.m_tile[3]); + KOKKOS_ASSERT(block.y > 0); + KOKKOS_ASSERT(block.z > 0); + const dim3 grid( + std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1], + maxblocks[0]), + std::min<array_index_type>( + (m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1) / block.y, + maxblocks[1]), + std::min<array_index_type>( + (m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1) / block.z, + maxblocks[2])); + CudaParallelLaunch<ParallelFor, LaunchBounds>( + *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), + false); + } else if (RP::rank == 5) { + // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4 to + // threadIdx.z + const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1], + m_rp.m_tile[2] * m_rp.m_tile[3], m_rp.m_tile[4]); + KOKKOS_ASSERT(block.z > 0); + const dim3 grid( + std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1], + maxblocks[0]), + std::min<array_index_type>(m_rp.m_tile_end[2] * m_rp.m_tile_end[3], + maxblocks[1]), + std::min<array_index_type>( + (m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1) / block.z, + maxblocks[2])); + CudaParallelLaunch<ParallelFor, LaunchBounds>( + *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), + false); + } else if (RP::rank == 6) { + // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4,id5 to + // threadIdx.z + const dim3 block(m_rp.m_tile[0] * m_rp.m_tile[1], + m_rp.m_tile[2] * m_rp.m_tile[3], + m_rp.m_tile[4] * m_rp.m_tile[5]); + const dim3 grid( + std::min<array_index_type>(m_rp.m_tile_end[0] * m_rp.m_tile_end[1], + maxblocks[0]), + std::min<array_index_type>(m_rp.m_tile_end[2] * m_rp.m_tile_end[3], + maxblocks[1]), + std::min<array_index_type>(m_rp.m_tile_end[4] * m_rp.m_tile_end[5], + maxblocks[2])); + CudaParallelLaunch<ParallelFor, LaunchBounds>( + *this, grid, block, 0, m_rp.space().impl_internal_space_instance(), + false); + } else { + Kokkos::abort("Kokkos::MDRange Error: Exceeded rank bounds with Cuda\n"); + } + + } // end execute + + // inline + ParallelFor(const FunctorType& arg_functor, Policy arg_policy) + : m_functor(arg_functor), m_rp(arg_policy) {} +}; + +template <class FunctorType, class ReducerType, class... Traits> +class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, + Kokkos::Cuda> { + public: + using Policy = Kokkos::MDRangePolicy<Traits...>; + + private: + using array_index_type = typename Policy::array_index_type; + using index_type = typename Policy::index_type; + + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using LaunchBounds = typename Policy::launch_bounds; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>::type; + + using Analysis = + Kokkos::Impl::FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, + ReducerTypeFwd>; + + public: + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; + using functor_type = FunctorType; + using size_type = Cuda::size_type; + using reducer_type = ReducerType; + + // Algorithmic constraints: blockSize is a power of two AND blockDim.y == + // blockDim.z == 1 + + const FunctorType m_functor; + const Policy m_policy; // used for workrange and nwork + const ReducerType m_reducer; + const pointer_type m_result_ptr; + const bool m_result_ptr_device_accessible; + size_type* m_scratch_space; + size_type* m_scratch_flags; + size_type* m_unified_space; + + using DeviceIteratePattern = typename Kokkos::Impl::Reduce::DeviceIterateTile< + Policy::rank, Policy, FunctorType, typename Policy::work_tag, + reference_type>; + + // Shall we use the shfl based reduction or not (only use it for static sized + // types of more than 128bit + static constexpr bool UseShflReduction = false; + //((sizeof(value_type)>2*sizeof(double)) && Analysis::StaticValueSize) + // Some crutch to do function overloading + + public: + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy& pol, const Functor&) { + cudaFuncAttributes attr = + CudaParallelLaunch<ParallelReduce, + LaunchBounds>::get_cuda_func_attributes(); + auto const& prop = pol.space().cuda_device_prop(); + // Limits due do registers/SM + int const regs_per_sm = prop.regsPerMultiprocessor; + int const regs_per_thread = attr.numRegs; + int const max_threads_per_sm = regs_per_sm / regs_per_thread; + return std::min( + max_threads_per_sm, + static_cast<int>(Kokkos::Impl::CudaTraits::MaxHierarchicalParallelism)); + } + Policy const& get_policy() const { return m_policy; } + inline __device__ void exec_range(reference_type update) const { + Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank, Policy, FunctorType, + typename Policy::work_tag, + reference_type>(m_policy, m_functor, + update) + .exec_range(); + } + + inline __device__ void operator()() const { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / + sizeof(size_type)> + word_count(Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)) / + sizeof(size_type)); + + { + reference_type value = final_reducer.init(reinterpret_cast<pointer_type>( + kokkos_impl_cuda_shared_memory<size_type>() + + threadIdx.y * word_count.value)); + + // Number of blocks is bounded so that the reduction can be limited to two + // passes. Each thread block is given an approximately equal amount of + // work to perform. Accumulate the values for this block. The accumulation + // ordering does not match the final pass, but is arithmatically + // equivalent. + + this->exec_range(value); + } + + // Reduce with final value at blockDim.y - 1 location. + // Problem: non power-of-two blockDim + if (cuda_single_inter_block_reduce_scan<false>( + final_reducer, blockIdx.x, gridDim.x, + kokkos_impl_cuda_shared_memory<size_type>(), m_scratch_space, + m_scratch_flags)) { + // This is the final block with the final result at the final threads' + // location + size_type* const shared = kokkos_impl_cuda_shared_memory<size_type>() + + (blockDim.y - 1) * word_count.value; + size_type* const global = + m_result_ptr_device_accessible + ? reinterpret_cast<size_type*>(m_result_ptr) + : (m_unified_space ? m_unified_space : m_scratch_space); + + if (threadIdx.y == 0) { + final_reducer.final(reinterpret_cast<value_type*>(shared)); + } + + if (CudaTraits::WarpSize < word_count.value) { + __syncthreads(); + } + + for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) { + global[i] = shared[i]; + } + } + } + + // Determine block size constrained by shared memory: + inline unsigned local_block_size(const FunctorType& f) { + unsigned n = CudaTraits::WarpSize * 8; + int shmem_size = + cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, WorkTag>( + f, n); + using closure_type = Impl::ParallelReduce<FunctorType, Policy, ReducerType>; + cudaFuncAttributes attr = + CudaParallelLaunch<closure_type, + LaunchBounds>::get_cuda_func_attributes(); + while ( + (n && + (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < + shmem_size)) || + (n > + static_cast<unsigned>( + Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, f, 1, + shmem_size, 0)))) { + n >>= 1; + shmem_size = cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, + WorkTag>(f, n); + } + return n; + } + + inline void execute() { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + const auto nwork = m_policy.m_num_tiles; + if (nwork) { + int block_size = m_policy.m_prod_tile_dims; + // CONSTRAINT: Algorithm requires block_size >= product of tile dimensions + // Nearest power of two + int exponent_pow_two = std::ceil(std::log2(block_size)); + block_size = std::pow(2, exponent_pow_two); + int suggested_blocksize = local_block_size(m_functor); + + block_size = (block_size > suggested_blocksize) + ? block_size + : suggested_blocksize; // Note: block_size must be less + // than or equal to 512 + + m_scratch_space = cuda_internal_scratch_space( + m_policy.space(), Analysis::value_size(ReducerConditional::select( + m_functor, m_reducer)) * + block_size /* block_size == max block_count */); + m_scratch_flags = + cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type)); + m_unified_space = cuda_internal_scratch_unified( + m_policy.space(), Analysis::value_size(ReducerConditional::select( + m_functor, m_reducer))); + + // REQUIRED ( 1 , N , 1 ) + const dim3 block(1, block_size, 1); + // Required grid.x <= block.y + const dim3 grid(std::min(int(block.y), int(nwork)), 1, 1); + + // TODO @graph We need to effectively insert this in to the graph + const int shmem = + UseShflReduction + ? 0 + : cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, + WorkTag>(m_functor, + block.y); + + CudaParallelLaunch<ParallelReduce, LaunchBounds>( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute + + if (!m_result_ptr_device_accessible) { + if (m_result_ptr) { + if (m_unified_space) { + m_policy.space().fence( + "Kokkos::Impl::ParallelReduce<Cuda, MDRangePolicy>::execute: " + "Result Not Device Accessible"); + + const int count = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + for (int i = 0; i < count; ++i) { + m_result_ptr[i] = pointer_type(m_unified_space)[i]; + } + } else { + const int size = Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)); + DeepCopy<HostSpace, CudaSpace, Cuda>(m_policy.space(), m_result_ptr, + m_scratch_space, size); + } + } + } + } else { + if (m_result_ptr) { + // TODO @graph We need to effectively insert this in to the graph + final_reducer.init(m_result_ptr); + } + } + } + + template <class ViewType> + ParallelReduce( + const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value, void*> = nullptr) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_result_ptr_device_accessible( + MemorySpaceAccess<Kokkos::CudaSpace, + typename ViewType::memory_space>::accessible), + m_scratch_space(nullptr), + m_scratch_flags(nullptr), + m_unified_space(nullptr) { + check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor); + } + + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ReducerType& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_result_ptr_device_accessible( + MemorySpaceAccess<Kokkos::CudaSpace, + typename ReducerType::result_view_type:: + memory_space>::accessible), + m_scratch_space(nullptr), + m_scratch_flags(nullptr), + m_unified_space(nullptr) { + check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor); + } +}; +} // namespace Impl +} // namespace Kokkos +#endif + +#endif diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp new file mode 100644 index 0000000000000000000000000000000000000000..98733430063d98815ececb0ac9fcf83592a4e681 --- /dev/null +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp @@ -0,0 +1,1049 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_PARALLEL_RANGE_HPP +#define KOKKOS_CUDA_PARALLEL_RANGE_HPP + +#include <Kokkos_Macros.hpp> +#if defined(KOKKOS_ENABLE_CUDA) + +#include <algorithm> +#include <string> + +#include <Kokkos_Parallel.hpp> + +#include <Cuda/Kokkos_Cuda_KernelLaunch.hpp> +#include <Cuda/Kokkos_Cuda_ReduceScan.hpp> +#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp> +#include <Kokkos_MinMaxClamp.hpp> + +#include <impl/Kokkos_Tools.hpp> +#include <typeinfo> + +namespace Kokkos { +namespace Impl { + +template <class FunctorType, class... Traits> +class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> { + public: + using Policy = Kokkos::RangePolicy<Traits...>; + + private: + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using LaunchBounds = typename Policy::launch_bounds; + + const FunctorType m_functor; + const Policy m_policy; + + ParallelFor() = delete; + ParallelFor& operator=(const ParallelFor&) = delete; + + template <class TagType> + inline __device__ std::enable_if_t<std::is_void<TagType>::value> exec_range( + const Member i) const { + m_functor(i); + } + + template <class TagType> + inline __device__ std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const Member i) const { + m_functor(TagType(), i); + } + + public: + using functor_type = FunctorType; + + Policy const& get_policy() const { return m_policy; } + + inline __device__ void operator()() const { + const Member work_stride = blockDim.y * gridDim.x; + const Member work_end = m_policy.end(); + + for (Member iwork = + m_policy.begin() + threadIdx.y + blockDim.y * blockIdx.x; + iwork < work_end; + iwork = iwork < work_end - work_stride ? iwork + work_stride + : work_end) { + this->template exec_range<WorkTag>(iwork); + } + } + + inline void execute() const { + const typename Policy::index_type nwork = m_policy.end() - m_policy.begin(); + + cudaFuncAttributes attr = + CudaParallelLaunch<ParallelFor, + LaunchBounds>::get_cuda_func_attributes(); + const int block_size = + Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, m_functor, 1, + 0, 0); + KOKKOS_ASSERT(block_size > 0); + dim3 block(1, block_size, 1); + dim3 grid( + std::min( + typename Policy::index_type((nwork + block.y - 1) / block.y), + typename Policy::index_type(cuda_internal_maximum_grid_count()[0])), + 1, 1); +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + if (Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) { + block = dim3(1, 1, 1); + grid = dim3(1, 1, 1); + } +#endif + + CudaParallelLaunch<ParallelFor, LaunchBounds>( + *this, grid, block, 0, m_policy.space().impl_internal_space_instance(), + false); + } + + ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +template <class FunctorType, class ReducerType, class... Traits> +class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, + Kokkos::Cuda> { + public: + using Policy = Kokkos::RangePolicy<Traits...>; + + private: + using WorkRange = typename Policy::WorkRange; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using LaunchBounds = typename Policy::launch_bounds; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>::type; + + using Analysis = + Kokkos::Impl::FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, + ReducerTypeFwd>; + + public: + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; + using functor_type = FunctorType; + // Conditionally set word_size_type to int16_t or int8_t if value_type is + // smaller than int32_t (Kokkos::Cuda::size_type) + // word_size_type is used to determine the word count, shared memory buffer + // size, and global memory buffer size before the reduction is performed. + // Within the reduction, the word count is recomputed based on word_size_type + // and when calculating indexes into the shared/global memory buffers for + // performing the reduction, word_size_type is used again. + // For scalars > 4 bytes in size, indexing into shared/global memory relies + // on the block and grid dimensions to ensure that we index at the correct + // offset rather than at every 4 byte word; such that, when the join is + // performed, we have the correct data that was copied over in chunks of 4 + // bytes. + using word_size_type = std::conditional_t< + sizeof(value_type) < sizeof(Kokkos::Cuda::size_type), + std::conditional_t<sizeof(value_type) == 2, int16_t, int8_t>, + Kokkos::Cuda::size_type>; + using index_type = typename Policy::index_type; + using reducer_type = ReducerType; + + // Algorithmic constraints: blockSize is a power of two AND blockDim.y == + // blockDim.z == 1 + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + const bool m_result_ptr_device_accessible; + const bool m_result_ptr_host_accessible; + word_size_type* m_scratch_space; + // m_scratch_flags must be of type Cuda::size_type due to use of atomics + // for tracking metadata in Kokkos_Cuda_ReduceScan.hpp + Cuda::size_type* m_scratch_flags; + word_size_type* m_unified_space; + + // FIXME_CUDA Shall we use the shfl based reduction or not (only use it for + // static sized types of more than 128bit: + // sizeof(value_type)>2*sizeof(double)) && Analysis::StaticValueSize) + static constexpr bool UseShflReduction = false; + + public: + Policy const& get_policy() const { return m_policy; } + + // Make the exec_range calls call to Reduce::DeviceIterateTile + template <class TagType> + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update) const { + m_functor(i, update); + } + + template <class TagType> + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update) const { + m_functor(TagType(), i, update); + } + + __device__ inline void operator()() const { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + const integral_nonzero_constant<word_size_type, Analysis::StaticValueSize / + sizeof(word_size_type)> + word_count(Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)) / + sizeof(word_size_type)); + + { + reference_type value = final_reducer.init(reinterpret_cast<pointer_type>( + kokkos_impl_cuda_shared_memory<word_size_type>() + + threadIdx.y * word_count.value)); + + // Number of blocks is bounded so that the reduction can be limited to two + // passes. Each thread block is given an approximately equal amount of + // work to perform. Accumulate the values for this block. The accumulation + // ordering does not match the final pass, but is arithmatically + // equivalent. + + const WorkRange range(m_policy, blockIdx.x, gridDim.x); + + for (Member iwork = range.begin() + threadIdx.y, iwork_end = range.end(); + iwork < iwork_end; iwork += blockDim.y) { + this->template exec_range<WorkTag>(iwork, value); + } + } + + // Reduce with final value at blockDim.y - 1 location. + // Shortcut for length zero reduction + bool zero_length = m_policy.begin() == m_policy.end(); + bool do_final_reduction = true; + if (!zero_length) + do_final_reduction = cuda_single_inter_block_reduce_scan<false>( + final_reducer, blockIdx.x, gridDim.x, + kokkos_impl_cuda_shared_memory<word_size_type>(), m_scratch_space, + m_scratch_flags); + + if (do_final_reduction) { + // This is the final block with the final result at the final threads' + // location + + word_size_type* const shared = + kokkos_impl_cuda_shared_memory<word_size_type>() + + (blockDim.y - 1) * word_count.value; + word_size_type* const global = + m_result_ptr_device_accessible + ? reinterpret_cast<word_size_type*>(m_result_ptr) + : (m_unified_space ? m_unified_space : m_scratch_space); + + if (threadIdx.y == 0) { + final_reducer.final(reinterpret_cast<value_type*>(shared)); + } + + if (CudaTraits::WarpSize < word_count.value) { + __syncthreads(); + } + + for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) { + global[i] = shared[i]; + } + } + } + + // Determine block size constrained by shared memory: + inline unsigned local_block_size(const FunctorType& f) { + unsigned n = CudaTraits::WarpSize * 8; + int shmem_size = + cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, WorkTag>( + f, n); + using closure_type = Impl::ParallelReduce<FunctorType, Policy, ReducerType>; + cudaFuncAttributes attr = + CudaParallelLaunch<closure_type, + LaunchBounds>::get_cuda_func_attributes(); + while ( + (n && + (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < + shmem_size)) || + (n > + static_cast<unsigned>( + Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, f, 1, + shmem_size, 0)))) { + n >>= 1; + shmem_size = cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, + WorkTag>(f, n); + } + return n; + } + + inline void execute() { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + const index_type nwork = m_policy.end() - m_policy.begin(); + const bool need_device_set = Analysis::has_init_member_function || + Analysis::has_final_member_function || + !m_result_ptr_host_accessible || +#ifdef KOKKOS_CUDA_ENABLE_GRAPHS + Policy::is_graph_kernel::value || +#endif + !std::is_same<ReducerType, InvalidType>::value; + if ((nwork > 0) || need_device_set) { + const int block_size = local_block_size(m_functor); + + KOKKOS_ASSERT(block_size > 0); + + // TODO: down casting these uses more space than required? + m_scratch_space = (word_size_type*)cuda_internal_scratch_space( + m_policy.space(), Analysis::value_size(ReducerConditional::select( + m_functor, m_reducer)) * + block_size /* block_size == max block_count */); + + // Intentionally do not downcast to word_size_type since we use Cuda + // atomics in Kokkos_Cuda_ReduceScan.hpp + m_scratch_flags = cuda_internal_scratch_flags(m_policy.space(), + sizeof(Cuda::size_type)); + m_unified_space = + reinterpret_cast<word_size_type*>(cuda_internal_scratch_unified( + m_policy.space(), Analysis::value_size(ReducerConditional::select( + m_functor, m_reducer)))); + + // REQUIRED ( 1 , N , 1 ) + dim3 block(1, block_size, 1); + // Required grid.x <= block.y + dim3 grid(std::min(int(block.y), int((nwork + block.y - 1) / block.y)), 1, + 1); + + // TODO @graph We need to effectively insert this in to the graph + const int shmem = + UseShflReduction + ? 0 + : cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, + WorkTag>(m_functor, + block.y); + + if ((nwork == 0) +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + || Kokkos::Impl::CudaInternal::cuda_use_serial_execution() +#endif + ) { + block = dim3(1, 1, 1); + grid = dim3(1, 1, 1); + } + + CudaParallelLaunch<ParallelReduce, LaunchBounds>( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute + + if (!m_result_ptr_device_accessible) { + if (m_result_ptr) { + if (m_unified_space) { + m_policy.space().fence( + "Kokkos::Impl::ParallelReduce<Cuda, RangePolicy>::execute: " + "Result " + "Not Device Accessible"); + const int count = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + for (int i = 0; i < count; ++i) { + m_result_ptr[i] = pointer_type(m_unified_space)[i]; + } + } else { + const int size = Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)); + DeepCopy<HostSpace, CudaSpace, Cuda>(m_policy.space(), m_result_ptr, + m_scratch_space, size); + } + } + } + } else { + if (m_result_ptr) { + // TODO @graph We need to effectively insert this in to the graph + final_reducer.init(m_result_ptr); + } + } + } + + template <class ViewType> + ParallelReduce( + const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value, void*> = nullptr) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_result_ptr_device_accessible( + MemorySpaceAccess<Kokkos::CudaSpace, + typename ViewType::memory_space>::accessible), + m_result_ptr_host_accessible( + MemorySpaceAccess<Kokkos::HostSpace, + typename ViewType::memory_space>::accessible), + m_scratch_space(nullptr), + m_scratch_flags(nullptr), + m_unified_space(nullptr) { + check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor); + } + + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ReducerType& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_result_ptr_device_accessible( + MemorySpaceAccess<Kokkos::CudaSpace, + typename ReducerType::result_view_type:: + memory_space>::accessible), + m_result_ptr_host_accessible( + MemorySpaceAccess<Kokkos::HostSpace, + typename ReducerType::result_view_type:: + memory_space>::accessible), + m_scratch_space(nullptr), + m_scratch_flags(nullptr), + m_unified_space(nullptr) { + check_reduced_view_shmem_size<WorkTag>(m_policy, m_functor); + } +}; + +template <class FunctorType, class... Traits> +class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> { + public: + using Policy = Kokkos::RangePolicy<Traits...>; + + private: + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using LaunchBounds = typename Policy::launch_bounds; + + using Analysis = Kokkos::Impl::FunctorAnalysis<FunctorPatternInterface::SCAN, + Policy, FunctorType>; + + public: + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using functor_type = FunctorType; + using size_type = Cuda::size_type; + + private: + // Algorithmic constraints: + // (a) blockDim.y is a power of two + // (b) blockDim.y == blockDim.z == 1 + // (c) gridDim.x <= blockDim.y * blockDim.y + // (d) gridDim.y == gridDim.z == 1 + + const FunctorType m_functor; + const Policy m_policy; + size_type* m_scratch_space; + size_type* m_scratch_flags; + size_type m_final; +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + bool m_run_serial; +#endif + + template <class TagType> + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update, const bool final_result) const { + m_functor(i, update, final_result); + } + + template <class TagType> + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update, const bool final_result) const { + m_functor(TagType(), i, update, final_result); + } + + //---------------------------------------- + + __device__ inline void initial() const { + typename Analysis::Reducer final_reducer(&m_functor); + + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / + sizeof(size_type)> + word_count(Analysis::value_size(m_functor) / sizeof(size_type)); + + size_type* const shared_value = + kokkos_impl_cuda_shared_memory<size_type>() + + word_count.value * threadIdx.y; + + final_reducer.init(reinterpret_cast<pointer_type>(shared_value)); + + // Number of blocks is bounded so that the reduction can be limited to two + // passes. Each thread block is given an approximately equal amount of work + // to perform. Accumulate the values for this block. The accumulation + // ordering does not match the final pass, but is arithmatically equivalent. + + const WorkRange range(m_policy, blockIdx.x, gridDim.x); + + for (Member iwork = range.begin() + threadIdx.y, iwork_end = range.end(); + iwork < iwork_end; iwork += blockDim.y) { + this->template exec_range<WorkTag>( + iwork, + final_reducer.reference(reinterpret_cast<pointer_type>(shared_value)), + false); + } + + // Reduce and scan, writing out scan of blocks' totals and block-groups' + // totals. Blocks' scan values are written to 'blockIdx.x' location. + // Block-groups' scan values are at: i = ( j * blockDim.y - 1 ) for i < + // gridDim.x + cuda_single_inter_block_reduce_scan<true>( + final_reducer, blockIdx.x, gridDim.x, + kokkos_impl_cuda_shared_memory<size_type>(), m_scratch_space, + m_scratch_flags); + } + + //---------------------------------------- + + __device__ inline void final() const { + typename Analysis::Reducer final_reducer(&m_functor); + + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / + sizeof(size_type)> + word_count(Analysis::value_size(m_functor) / sizeof(size_type)); + + // Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , + // value[2] , ... } + size_type* const shared_data = kokkos_impl_cuda_shared_memory<size_type>(); + size_type* const shared_prefix = + shared_data + word_count.value * threadIdx.y; + size_type* const shared_accum = + shared_data + word_count.value * (blockDim.y + 1); + + // Starting value for this thread block is the previous block's total. + if (blockIdx.x) { + size_type* const block_total = + m_scratch_space + word_count.value * (blockIdx.x - 1); + for (unsigned i = threadIdx.y; i < word_count.value; ++i) { + shared_accum[i] = block_total[i]; + } + } else if (0 == threadIdx.y) { + final_reducer.init(reinterpret_cast<pointer_type>(shared_accum)); + } + + const WorkRange range(m_policy, blockIdx.x, gridDim.x); + + for (typename Policy::member_type iwork_base = range.begin(); + iwork_base < range.end(); iwork_base += blockDim.y) { + unsigned MASK = __activemask(); + const typename Policy::member_type iwork = iwork_base + threadIdx.y; + + __syncthreads(); // Don't overwrite previous iteration values until they + // are used + + final_reducer.init( + reinterpret_cast<pointer_type>(shared_prefix + word_count.value)); + + // Copy previous block's accumulation total into thread[0] prefix and + // inclusive scan value of this block + for (unsigned i = threadIdx.y; i < word_count.value; ++i) { + shared_data[i + word_count.value] = shared_data[i] = shared_accum[i]; + } + __syncwarp(MASK); + if (CudaTraits::WarpSize < word_count.value) { + __syncthreads(); + } // Protect against large scan values. + + // Call functor to accumulate inclusive scan value for this work item + if (iwork < range.end()) { + this->template exec_range<WorkTag>( + iwork, + final_reducer.reference(reinterpret_cast<pointer_type>( + shared_prefix + word_count.value)), + false); + } + + // Scan block values into locations shared_data[1..blockDim.y] + cuda_intra_block_reduce_scan<true>( + final_reducer, + typename Analysis::pointer_type(shared_data + word_count.value)); + + { + size_type* const block_total = + shared_data + word_count.value * blockDim.y; + for (unsigned i = threadIdx.y; i < word_count.value; ++i) { + shared_accum[i] = block_total[i]; + } + } + + // Call functor with exclusive scan value + if (iwork < range.end()) { + this->template exec_range<WorkTag>( + iwork, + final_reducer.reference( + reinterpret_cast<pointer_type>(shared_prefix)), + true); + } + } + } + + public: + Policy const& get_policy() const { return m_policy; } + + //---------------------------------------- + + __device__ inline void operator()() const { +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + if (m_run_serial) { + typename Analysis::value_type value; + ValueInit::init(m_functor, (void*)&value); + const WorkRange range(m_policy, blockIdx.x, gridDim.x); + + for (typename Policy::member_type iwork_base = range.begin(); + iwork_base < range.end(); iwork_base++) { + this->template exec_range<WorkTag>(iwork_base, value, true); + } + } else { +#endif + if (!m_final) { + initial(); + } else { + final(); + } +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + } +#endif + } + + // Determine block size constrained by shared memory: + inline unsigned local_block_size(const FunctorType& f) { + // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 + // (16 warps) gridDim.x <= blockDim.y * blockDim.y + // + // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit + // testing + + unsigned n = CudaTraits::WarpSize * 4; + while (n && unsigned(m_policy.space() + .impl_internal_space_instance() + ->m_maxShmemPerBlock) < + cuda_single_inter_block_reduce_scan_shmem<true, FunctorType, + WorkTag>(f, n)) { + n >>= 1; + } + return n; + } + + inline void execute() { + const auto nwork = m_policy.end() - m_policy.begin(); + if (nwork) { + constexpr int GridMaxComputeCapability_2x = 0x0ffff; + + const int block_size = local_block_size(m_functor); + KOKKOS_ASSERT(block_size > 0); + + const int grid_max = + (block_size * block_size) < GridMaxComputeCapability_2x + ? (block_size * block_size) + : GridMaxComputeCapability_2x; + + // At most 'max_grid' blocks: + const int max_grid = + std::min(int(grid_max), int((nwork + block_size - 1) / block_size)); + + // How much work per block: + const int work_per_block = (nwork + max_grid - 1) / max_grid; + + // How many block are really needed for this much work: + const int grid_x = (nwork + work_per_block - 1) / work_per_block; + + m_scratch_space = cuda_internal_scratch_space( + m_policy.space(), Analysis::value_size(m_functor) * grid_x); + m_scratch_flags = + cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type) * 1); + + dim3 grid(grid_x, 1, 1); + dim3 block(1, block_size, 1); // REQUIRED DIMENSIONS ( 1 , N , 1 ) + const int shmem = Analysis::value_size(m_functor) * (block_size + 2); + +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + if (m_run_serial) { + block = dim3(1, 1, 1); + grid = dim3(1, 1, 1); + } else { +#endif + m_final = false; + CudaParallelLaunch<ParallelScan, LaunchBounds>( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + } +#endif + m_final = true; + CudaParallelLaunch<ParallelScan, LaunchBounds>( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute + } + } + + ParallelScan(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), + m_policy(arg_policy), + m_scratch_space(nullptr), + m_scratch_flags(nullptr), + m_final(false) +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + , + m_run_serial(Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) +#endif + { + } +}; + +//---------------------------------------------------------------------------- +template <class FunctorType, class ReturnType, class... Traits> +class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, + ReturnType, Kokkos::Cuda> { + public: + using Policy = Kokkos::RangePolicy<Traits...>; + + private: + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using LaunchBounds = typename Policy::launch_bounds; + + using Analysis = Kokkos::Impl::FunctorAnalysis<FunctorPatternInterface::SCAN, + Policy, FunctorType>; + + public: + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using functor_type = FunctorType; + using size_type = Cuda::size_type; + + private: + // Algorithmic constraints: + // (a) blockDim.y is a power of two + // (b) blockDim.y == blockDim.z == 1 + // (c) gridDim.x <= blockDim.y * blockDim.y + // (d) gridDim.y == gridDim.z == 1 + + const FunctorType m_functor; + const Policy m_policy; + size_type* m_scratch_space; + size_type* m_scratch_flags; + size_type m_final; + ReturnType& m_returnvalue; +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + bool m_run_serial; +#endif + + template <class TagType> + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update, const bool final_result) const { + m_functor(i, update, final_result); + } + + template <class TagType> + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update, const bool final_result) const { + m_functor(TagType(), i, update, final_result); + } + + //---------------------------------------- + + __device__ inline void initial() const { + typename Analysis::Reducer final_reducer(&m_functor); + + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / + sizeof(size_type)> + word_count(Analysis::value_size(m_functor) / sizeof(size_type)); + + size_type* const shared_value = + kokkos_impl_cuda_shared_memory<size_type>() + + word_count.value * threadIdx.y; + + final_reducer.init(reinterpret_cast<pointer_type>(shared_value)); + + // Number of blocks is bounded so that the reduction can be limited to two + // passes. Each thread block is given an approximately equal amount of work + // to perform. Accumulate the values for this block. The accumulation + // ordering does not match the final pass, but is arithmatically equivalent. + + const WorkRange range(m_policy, blockIdx.x, gridDim.x); + + for (Member iwork = range.begin() + threadIdx.y, iwork_end = range.end(); + iwork < iwork_end; iwork += blockDim.y) { + this->template exec_range<WorkTag>( + iwork, + final_reducer.reference(reinterpret_cast<pointer_type>(shared_value)), + false); + } + + // Reduce and scan, writing out scan of blocks' totals and block-groups' + // totals. Blocks' scan values are written to 'blockIdx.x' location. + // Block-groups' scan values are at: i = ( j * blockDim.y - 1 ) for i < + // gridDim.x + cuda_single_inter_block_reduce_scan<true>( + final_reducer, blockIdx.x, gridDim.x, + kokkos_impl_cuda_shared_memory<size_type>(), m_scratch_space, + m_scratch_flags); + } + + //---------------------------------------- + + __device__ inline void final() const { + typename Analysis::Reducer final_reducer(&m_functor); + + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / + sizeof(size_type)> + word_count(Analysis::value_size(m_functor) / sizeof(size_type)); + + // Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , + // value[2] , ... } + size_type* const shared_data = kokkos_impl_cuda_shared_memory<size_type>(); + size_type* const shared_prefix = + shared_data + word_count.value * threadIdx.y; + size_type* const shared_accum = + shared_data + word_count.value * (blockDim.y + 1); + + // Starting value for this thread block is the previous block's total. + if (blockIdx.x) { + size_type* const block_total = + m_scratch_space + word_count.value * (blockIdx.x - 1); + for (unsigned i = threadIdx.y; i < word_count.value; ++i) { + shared_accum[i] = block_total[i]; + } + } else if (0 == threadIdx.y) { + final_reducer.init(reinterpret_cast<pointer_type>(shared_accum)); + } + + const WorkRange range(m_policy, blockIdx.x, gridDim.x); + + for (typename Policy::member_type iwork_base = range.begin(); + iwork_base < range.end(); iwork_base += blockDim.y) { + unsigned MASK = __activemask(); + + const typename Policy::member_type iwork = iwork_base + threadIdx.y; + + __syncthreads(); // Don't overwrite previous iteration values until they + // are used + + final_reducer.init( + reinterpret_cast<pointer_type>(shared_prefix + word_count.value)); + + // Copy previous block's accumulation total into thread[0] prefix and + // inclusive scan value of this block + for (unsigned i = threadIdx.y; i < word_count.value; ++i) { + shared_data[i + word_count.value] = shared_data[i] = shared_accum[i]; + } + + __syncwarp(MASK); + if (CudaTraits::WarpSize < word_count.value) { + __syncthreads(); + } // Protect against large scan values. + + // Call functor to accumulate inclusive scan value for this work item + if (iwork < range.end()) { + this->template exec_range<WorkTag>( + iwork, + final_reducer.reference(reinterpret_cast<pointer_type>( + shared_prefix + word_count.value)), + false); + } + + // Scan block values into locations shared_data[1..blockDim.y] + cuda_intra_block_reduce_scan<true>( + final_reducer, + typename Analysis::pointer_type(shared_data + word_count.value)); + + { + size_type* const block_total = + shared_data + word_count.value * blockDim.y; + for (unsigned i = threadIdx.y; i < word_count.value; ++i) { + shared_accum[i] = block_total[i]; + } + } + + // Call functor with exclusive scan value + if (iwork < range.end()) { + this->template exec_range<WorkTag>( + iwork, + final_reducer.reference( + reinterpret_cast<pointer_type>(shared_prefix)), + true); + } + } + } + + public: + Policy const& get_policy() const { return m_policy; } + + //---------------------------------------- + + __device__ inline void operator()() const { +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + if (m_run_serial) { + typename Analysis::value_type value; + ValueInit::init(m_functor, (void*)&value); + const WorkRange range(m_policy, blockIdx.x, gridDim.x); + + for (typename Policy::member_type iwork_base = range.begin(); + iwork_base < range.end(); iwork_base++) { + this->template exec_range<WorkTag>(iwork_base, value, true); + } + *((typename Analysis::value_type*)m_scratch_space) = value; + } else { +#endif + if (!m_final) { + initial(); + } else { + final(); + } +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + } +#endif + } + + // Determine block size constrained by shared memory: + inline unsigned local_block_size(const FunctorType& f) { + // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 + // (16 warps) gridDim.x <= blockDim.y * blockDim.y + // + // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit + // testing + + unsigned n = CudaTraits::WarpSize * 4; + while (n && unsigned(m_policy.space() + .impl_internal_space_instance() + ->m_maxShmemPerBlock) < + cuda_single_inter_block_reduce_scan_shmem<true, FunctorType, + WorkTag>(f, n)) { + n >>= 1; + } + return n; + } + + inline void execute() { + const auto nwork = m_policy.end() - m_policy.begin(); + if (nwork) { + enum { GridMaxComputeCapability_2x = 0x0ffff }; + + const int block_size = local_block_size(m_functor); + KOKKOS_ASSERT(block_size > 0); + + const int grid_max = + (block_size * block_size) < GridMaxComputeCapability_2x + ? (block_size * block_size) + : GridMaxComputeCapability_2x; + + // At most 'max_grid' blocks: + const int max_grid = + std::min(int(grid_max), int((nwork + block_size - 1) / block_size)); + + // How much work per block: + const int work_per_block = (nwork + max_grid - 1) / max_grid; + + // How many block are really needed for this much work: + const int grid_x = (nwork + work_per_block - 1) / work_per_block; + + m_scratch_space = cuda_internal_scratch_space( + m_policy.space(), Analysis::value_size(m_functor) * grid_x); + m_scratch_flags = + cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type) * 1); + + dim3 grid(grid_x, 1, 1); + dim3 block(1, block_size, 1); // REQUIRED DIMENSIONS ( 1 , N , 1 ) + const int shmem = Analysis::value_size(m_functor) * (block_size + 2); + +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + if (m_run_serial) { + block = dim3(1, 1, 1); + grid = dim3(1, 1, 1); + } else { +#endif + + m_final = false; + CudaParallelLaunch<ParallelScanWithTotal, LaunchBounds>( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + } +#endif + m_final = true; + CudaParallelLaunch<ParallelScanWithTotal, LaunchBounds>( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute + + const int size = Analysis::value_size(m_functor); +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + if (m_run_serial) + DeepCopy<HostSpace, CudaSpace, Cuda>(m_policy.space(), &m_returnvalue, + m_scratch_space, size); + else +#endif + DeepCopy<HostSpace, CudaSpace, Cuda>( + m_policy.space(), &m_returnvalue, + m_scratch_space + (grid_x - 1) * size / sizeof(int), size); + } + } + + ParallelScanWithTotal(const FunctorType& arg_functor, + const Policy& arg_policy, ReturnType& arg_returnvalue) + : m_functor(arg_functor), + m_policy(arg_policy), + m_scratch_space(nullptr), + m_scratch_flags(nullptr), + m_final(false), + m_returnvalue(arg_returnvalue) +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + , + m_run_serial(Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) +#endif + { + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif +#endif diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cdd16085b352fbbe8177e0d249d85007e58ca945 --- /dev/null +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp @@ -0,0 +1,1139 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_PARALLEL_TEAM_HPP +#define KOKKOS_CUDA_PARALLEL_TEAM_HPP + +#include <Kokkos_Macros.hpp> +#if defined(KOKKOS_ENABLE_CUDA) + +#include <algorithm> +#include <string> +#include <cstdio> +#include <cstdint> + +#include <utility> +#include <Kokkos_Parallel.hpp> + +#include <Cuda/Kokkos_Cuda_KernelLaunch.hpp> +#include <Cuda/Kokkos_Cuda_ReduceScan.hpp> +#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp> +#include <Cuda/Kokkos_Cuda_Locks.hpp> +#include <Cuda/Kokkos_Cuda_Team.hpp> +#include <Kokkos_MinMaxClamp.hpp> +#include <Kokkos_Vectorization.hpp> + +#include <impl/Kokkos_Tools.hpp> +#include <typeinfo> + +#include <impl/KokkosExp_IterateTileGPU.hpp> + +namespace Kokkos { + +extern bool show_warnings() noexcept; + +namespace Impl { + +template <class... Properties> +class TeamPolicyInternal<Kokkos::Cuda, Properties...> + : public PolicyTraits<Properties...> { + public: + //! Tag this class as a kokkos execution policy + using execution_policy = TeamPolicyInternal; + + using traits = PolicyTraits<Properties...>; + + template <class ExecSpace, class... OtherProperties> + friend class TeamPolicyInternal; + + private: + static constexpr int MAX_WARP = 8; + + typename traits::execution_space m_space; + int m_league_size; + int m_team_size; + int m_vector_length; + size_t m_team_scratch_size[2]; + size_t m_thread_scratch_size[2]; + int m_chunk_size; + bool m_tune_team; + bool m_tune_vector; + + public: + //! Execution space of this execution policy + using execution_space = Kokkos::Cuda; + + template <class... OtherProperties> + TeamPolicyInternal(const TeamPolicyInternal<OtherProperties...>& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + m_space = p.m_space; + m_tune_team = p.m_tune_team; + m_tune_vector = p.m_tune_vector; + } + + //---------------------------------------- + + template <class FunctorType> + int team_size_max(const FunctorType& f, const ParallelForTag&) const { + using closure_type = + Impl::ParallelFor<FunctorType, TeamPolicy<Properties...>>; + cudaFuncAttributes attr = + CudaParallelLaunch<closure_type, typename traits::launch_bounds>:: + get_cuda_func_attributes(); + int block_size = + Kokkos::Impl::cuda_get_max_block_size<FunctorType, + typename traits::launch_bounds>( + space().impl_internal_space_instance(), attr, f, + (size_t)impl_vector_length(), + (size_t)team_scratch_size(0) + 2 * sizeof(double), + (size_t)thread_scratch_size(0) + sizeof(double)); + return block_size / impl_vector_length(); + } + + template <class FunctorType> + inline int team_size_max(const FunctorType& f, + const ParallelReduceTag&) const { + using functor_analysis_type = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + TeamPolicyInternal, FunctorType>; + using reducer_type = typename Impl::ParallelReduceReturnValue< + void, typename functor_analysis_type::value_type, + FunctorType>::reducer_type; + using closure_type = + Impl::ParallelReduce<FunctorType, TeamPolicy<Properties...>, + reducer_type>; + return internal_team_size_max<closure_type>(f); + } + + template <class FunctorType, class ReducerType> + inline int team_size_max(const FunctorType& f, const ReducerType& /*r*/, + const ParallelReduceTag&) const { + using closure_type = + Impl::ParallelReduce<FunctorType, TeamPolicy<Properties...>, + ReducerType>; + return internal_team_size_max<closure_type>(f); + } + + template <class FunctorType> + int team_size_recommended(const FunctorType& f, const ParallelForTag&) const { + using closure_type = + Impl::ParallelFor<FunctorType, TeamPolicy<Properties...>>; + cudaFuncAttributes attr = + CudaParallelLaunch<closure_type, typename traits::launch_bounds>:: + get_cuda_func_attributes(); + const int block_size = + Kokkos::Impl::cuda_get_opt_block_size<FunctorType, + typename traits::launch_bounds>( + space().impl_internal_space_instance(), attr, f, + (size_t)impl_vector_length(), + (size_t)team_scratch_size(0) + 2 * sizeof(double), + (size_t)thread_scratch_size(0) + sizeof(double)); + return block_size / impl_vector_length(); + } + + template <class FunctorType> + inline int team_size_recommended(const FunctorType& f, + const ParallelReduceTag&) const { + using functor_analysis_type = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + TeamPolicyInternal, FunctorType>; + using reducer_type = typename Impl::ParallelReduceReturnValue< + void, typename functor_analysis_type::value_type, + FunctorType>::reducer_type; + using closure_type = + Impl::ParallelReduce<FunctorType, TeamPolicy<Properties...>, + reducer_type>; + return internal_team_size_recommended<closure_type>(f); + } + + template <class FunctorType, class ReducerType> + int team_size_recommended(const FunctorType& f, const ReducerType&, + const ParallelReduceTag&) const { + using closure_type = + Impl::ParallelReduce<FunctorType, TeamPolicy<Properties...>, + ReducerType>; + return internal_team_size_recommended<closure_type>(f); + } + + inline static int vector_length_max() { return Impl::CudaTraits::WarpSize; } + + inline static int verify_requested_vector_length( + int requested_vector_length) { + int test_vector_length = + std::min(requested_vector_length, vector_length_max()); + + // Allow only power-of-two vector_length + if (!(is_integral_power_of_two(test_vector_length))) { + int test_pow2 = 1; + for (int i = 0; i < 5; i++) { + test_pow2 = test_pow2 << 1; + if (test_pow2 > test_vector_length) { + break; + } + } + test_vector_length = test_pow2 >> 1; + } + + return test_vector_length; + } + + inline static int scratch_size_max(int level) { + return ( + level == 0 ? 1024 * 40 : // 48kB is the max for CUDA, but we need some + // for team_member.reduce etc. + 20 * 1024 * + 1024); // arbitrarily setting this to 20MB, for a Volta V100 + // that would give us about 3.2GB for 2 teams per SM + } + + //---------------------------------------- + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 + KOKKOS_DEPRECATED inline int vector_length() const { + return impl_vector_length(); + } +#endif + inline int impl_vector_length() const { return m_vector_length; } + inline int team_size() const { return m_team_size; } + inline int league_size() const { return m_league_size; } + inline bool impl_auto_team_size() const { return m_tune_team; } + inline bool impl_auto_vector_length() const { return m_tune_vector; } + inline void impl_set_team_size(size_t team_size) { m_team_size = team_size; } + inline void impl_set_vector_length(size_t vector_length) { + m_vector_length = vector_length; + } + size_t scratch_size(int level, int team_size_ = -1) const { + if (team_size_ < 0) team_size_ = m_team_size; + return m_team_scratch_size[level] + + team_size_ * m_thread_scratch_size[level]; + } + size_t team_scratch_size(int level) const { + return m_team_scratch_size[level]; + } + size_t thread_scratch_size(int level) const { + return m_thread_scratch_size[level]; + } + + const typename traits::execution_space& space() const { return m_space; } + + TeamPolicyInternal() + : m_space(typename traits::execution_space()), + m_league_size(0), + m_team_size(-1), + m_vector_length(0), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(Impl::CudaTraits::WarpSize), + m_tune_team(false), + m_tune_vector(false) {} + + /** \brief Specify league size, specify team size, specify vector length */ + TeamPolicyInternal(const execution_space space_, int league_size_, + int team_size_request, int vector_length_request = 1) + : m_space(space_), + m_league_size(league_size_), + m_team_size(team_size_request), + m_vector_length( + (vector_length_request > 0) + ? verify_requested_vector_length(vector_length_request) + : verify_requested_vector_length(1)), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(Impl::CudaTraits::WarpSize), + m_tune_team(bool(team_size_request <= 0)), + m_tune_vector(bool(vector_length_request <= 0)) { + // Make sure league size is permissible + if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count()[0])) + Impl::throw_runtime_exception( + "Requested too large league_size for TeamPolicy on Cuda execution " + "space."); + + // Make sure total block size is permissible + if (m_team_size * m_vector_length > + int(Impl::CudaTraits::MaxHierarchicalParallelism)) { + Impl::throw_runtime_exception( + std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. " + "Team size x vector length must be smaller than 1024.")); + } + } + + /** \brief Specify league size, request team size, specify vector length */ + TeamPolicyInternal(const execution_space space_, int league_size_, + const Kokkos::AUTO_t& /* team_size_request */ + , + int vector_length_request = 1) + : TeamPolicyInternal(space_, league_size_, -1, vector_length_request) {} + + /** \brief Specify league size, request team size and vector length */ + TeamPolicyInternal(const execution_space space_, int league_size_, + const Kokkos::AUTO_t& /* team_size_request */, + const Kokkos::AUTO_t& /* vector_length_request */ + ) + : TeamPolicyInternal(space_, league_size_, -1, -1) {} + + /** \brief Specify league size, specify team size, request vector length */ + TeamPolicyInternal(const execution_space space_, int league_size_, + int team_size_request, const Kokkos::AUTO_t&) + : TeamPolicyInternal(space_, league_size_, team_size_request, -1) {} + + TeamPolicyInternal(int league_size_, int team_size_request, + int vector_length_request = 1) + : TeamPolicyInternal(typename traits::execution_space(), league_size_, + team_size_request, vector_length_request) {} + + TeamPolicyInternal(int league_size_, const Kokkos::AUTO_t& team_size_request, + int vector_length_request = 1) + : TeamPolicyInternal(typename traits::execution_space(), league_size_, + team_size_request, vector_length_request) + + {} + + /** \brief Specify league size, request team size */ + TeamPolicyInternal(int league_size_, const Kokkos::AUTO_t& team_size_request, + const Kokkos::AUTO_t& vector_length_request) + : TeamPolicyInternal(typename traits::execution_space(), league_size_, + team_size_request, vector_length_request) {} + + /** \brief Specify league size, request team size */ + TeamPolicyInternal(int league_size_, int team_size_request, + const Kokkos::AUTO_t& vector_length_request) + : TeamPolicyInternal(typename traits::execution_space(), league_size_, + team_size_request, vector_length_request) {} + + inline int chunk_size() const { return m_chunk_size; } + + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal& set_chunk_size( + typename traits::index_type chunk_size_) { + m_chunk_size = chunk_size_; + return *this; + } + + /** \brief set per team scratch size for a specific level of the scratch + * hierarchy */ + inline TeamPolicyInternal& set_scratch_size(const int& level, + const PerTeamValue& per_team) { + m_team_scratch_size[level] = per_team.value; + return *this; + } + + /** \brief set per thread scratch size for a specific level of the scratch + * hierarchy */ + inline TeamPolicyInternal& set_scratch_size( + const int& level, const PerThreadValue& per_thread) { + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + /** \brief set per thread and per team scratch size for a specific level of + * the scratch hierarchy */ + inline TeamPolicyInternal& set_scratch_size( + const int& level, const PerTeamValue& per_team, + const PerThreadValue& per_thread) { + m_team_scratch_size[level] = per_team.value; + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + using member_type = Kokkos::Impl::CudaTeamMember; + + protected: + template <class ClosureType, class FunctorType, class BlockSizeCallable> + int internal_team_size_common(const FunctorType& f, + BlockSizeCallable&& block_size_callable) const { + using closure_type = ClosureType; + using Interface = + typename Impl::DeduceFunctorPatternInterface<ClosureType>::type; + using Analysis = + Impl::FunctorAnalysis<Interface, typename ClosureType::Policy, + FunctorType>; + + cudaFuncAttributes attr = + CudaParallelLaunch<closure_type, typename traits::launch_bounds>:: + get_cuda_func_attributes(); + const int block_size = std::forward<BlockSizeCallable>(block_size_callable)( + space().impl_internal_space_instance(), attr, f, + (size_t)impl_vector_length(), + (size_t)team_scratch_size(0) + 2 * sizeof(double), + (size_t)thread_scratch_size(0) + sizeof(double) + + ((Analysis::StaticValueSize != 0) ? 0 : Analysis::value_size(f))); + KOKKOS_ASSERT(block_size > 0); + + // Currently we require Power-of-2 team size for reductions. + int p2 = 1; + while (p2 <= block_size) p2 *= 2; + p2 /= 2; + return p2 / impl_vector_length(); + } + + template <class ClosureType, class FunctorType> + int internal_team_size_max(const FunctorType& f) const { + return internal_team_size_common<ClosureType>( + f, + Kokkos::Impl::cuda_get_max_block_size<FunctorType, + typename traits::launch_bounds>); + } + + template <class ClosureType, class FunctorType> + int internal_team_size_recommended(const FunctorType& f) const { + return internal_team_size_common<ClosureType>( + f, + Kokkos::Impl::cuda_get_opt_block_size<FunctorType, + typename traits::launch_bounds>); + } +}; + +__device__ inline int64_t cuda_get_scratch_index(Cuda::size_type league_size, + int32_t* scratch_locks) { + int64_t threadid = 0; + __shared__ int64_t base_thread_id; + if (threadIdx.x == 0 && threadIdx.y == 0) { + int64_t const wraparound_len = Kokkos::max( + int64_t(1), Kokkos::min(int64_t(league_size), + (int64_t(g_device_cuda_lock_arrays.n)) / + (blockDim.x * blockDim.y))); + threadid = (blockIdx.x * blockDim.z + threadIdx.z) % wraparound_len; + threadid *= blockDim.x * blockDim.y; + int done = 0; + while (!done) { + done = (0 == atomicCAS(&scratch_locks[threadid], 0, 1)); + if (!done) { + threadid += blockDim.x * blockDim.y; + if (int64_t(threadid + blockDim.x * blockDim.y) >= + wraparound_len * blockDim.x * blockDim.y) + threadid = 0; + } + } + base_thread_id = threadid; + } + __syncthreads(); + threadid = base_thread_id; + return threadid; +} + +__device__ inline void cuda_release_scratch_index(int32_t* scratch_locks, + int64_t threadid) { + __syncthreads(); + if (threadIdx.x == 0 && threadIdx.y == 0) { + scratch_locks[threadid] = 0; + } +} + +template <class FunctorType, class... Properties> +class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, + Kokkos::Cuda> { + public: + using Policy = TeamPolicy<Properties...>; + + private: + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using LaunchBounds = typename Policy::launch_bounds; + + public: + using functor_type = FunctorType; + using size_type = Cuda::size_type; + + private: + // Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == + // blockDim.z == 1 shared memory utilization: + // + // [ team reduce space ] + // [ team shared space ] + // + + const FunctorType m_functor; + const Policy m_policy; + const size_type m_league_size; + int m_team_size; + const size_type m_vector_size; + int m_shmem_begin; + int m_shmem_size; + void* m_scratch_ptr[2]; + size_t m_scratch_size[2]; + int m_scratch_pool_id = -1; + int32_t* m_scratch_locks; + + template <class TagType> + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_team( + const Member& member) const { + m_functor(member); + } + + template <class TagType> + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_team( + const Member& member) const { + m_functor(TagType(), member); + } + + public: + Policy const& get_policy() const { return m_policy; } + + __device__ inline void operator()() const { + // Iterate this block through the league + int64_t threadid = 0; + if (m_scratch_size[1] > 0) { + threadid = cuda_get_scratch_index(m_league_size, m_scratch_locks); + } + + const int int_league_size = (int)m_league_size; + for (int league_rank = blockIdx.x; league_rank < int_league_size; + league_rank += gridDim.x) { + this->template exec_team<WorkTag>(typename Policy::member_type( + kokkos_impl_cuda_shared_memory<void>(), m_shmem_begin, m_shmem_size, + (void*)(((char*)m_scratch_ptr[1]) + + ptrdiff_t(threadid / (blockDim.x * blockDim.y)) * + m_scratch_size[1]), + m_scratch_size[1], league_rank, m_league_size)); + } + if (m_scratch_size[1] > 0) { + cuda_release_scratch_index(m_scratch_locks, threadid); + } + } + + inline void execute() const { + const int64_t shmem_size_total = m_shmem_begin + m_shmem_size; + dim3 grid(int(m_league_size), 1, 1); + const dim3 block(int(m_vector_size), int(m_team_size), 1); + +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + if (Kokkos::Impl::CudaInternal::cuda_use_serial_execution()) { + grid = dim3(1, 1, 1); + } +#endif + + CudaParallelLaunch<ParallelFor, LaunchBounds>( + *this, grid, block, shmem_size_total, + m_policy.space().impl_internal_space_instance(), + true); // copy to device and execute + } + + ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), + m_policy(arg_policy), + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.impl_vector_length()) { + cudaFuncAttributes attr = + CudaParallelLaunch<ParallelFor, + LaunchBounds>::get_cuda_func_attributes(); + m_team_size = + m_team_size >= 0 + ? m_team_size + : Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, + m_functor, m_vector_size, m_policy.team_scratch_size(0), + m_policy.thread_scratch_size(0)) / + m_vector_size; + + m_shmem_begin = (sizeof(double) * (m_team_size + 2)); + m_shmem_size = + (m_policy.scratch_size(0, m_team_size) + + FunctorTeamShmemSize<FunctorType>::value(m_functor, m_team_size)); + m_scratch_size[0] = m_policy.scratch_size(0, m_team_size); + m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); + m_scratch_locks = + m_policy.space().impl_internal_space_instance()->m_scratch_locks; + + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + m_scratch_ptr[0] = nullptr; + if (m_team_size <= 0) { + m_scratch_ptr[1] = nullptr; + } else { + auto scratch_ptr_id = + m_policy.space() + .impl_internal_space_instance() + ->resize_team_scratch_space( + static_cast<std::int64_t>(m_scratch_size[1]) * + (std::min( + static_cast<std::int64_t>(Cuda::concurrency() / + (m_team_size * m_vector_size)), + static_cast<std::int64_t>(m_league_size)))); + m_scratch_ptr[1] = scratch_ptr_id.first; + m_scratch_pool_id = scratch_ptr_id.second; + } + + const int shmem_size_total = m_shmem_begin + m_shmem_size; + if (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < + shmem_size_total) { + printf( + "%i %i\n", + m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock, + shmem_size_total); + Kokkos::Impl::throw_runtime_exception(std::string( + "Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory")); + } + + if (int(m_team_size) > + int(Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, + arg_functor, arg_policy.impl_vector_length(), + arg_policy.team_scratch_size(0), + arg_policy.thread_scratch_size(0)) / + arg_policy.impl_vector_length())) { + Kokkos::Impl::throw_runtime_exception(std::string( + "Kokkos::Impl::ParallelFor< Cuda > requested too large team size.")); + } + } + + ~ParallelFor() { + if (m_scratch_pool_id >= 0) { + m_policy.space() + .impl_internal_space_instance() + ->m_team_scratch_pool[m_scratch_pool_id] = 0; + } + } +}; + +template <class FunctorType, class ReducerType, class... Properties> +class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, + ReducerType, Kokkos::Cuda> { + public: + using Policy = TeamPolicy<Properties...>; + + private: + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using LaunchBounds = typename Policy::launch_bounds; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>::type; + + using Analysis = + Kokkos::Impl::FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, + ReducerTypeFwd>; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; + + public: + using functor_type = FunctorType; + using size_type = Cuda::size_type; + using reducer_type = ReducerType; + + static constexpr bool UseShflReduction = + (true && (Analysis::StaticValueSize != 0)); + + private: + struct ShflReductionTag {}; + struct SHMEMReductionTag {}; + + // Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == + // blockDim.z == 1 shared memory utilization: + // + // [ global reduce space ] + // [ team reduce space ] + // [ team shared space ] + // + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + const bool m_result_ptr_device_accessible; + const bool m_result_ptr_host_accessible; + size_type* m_scratch_space; + size_type* m_scratch_flags; + size_type* m_unified_space; + size_type m_team_begin; + size_type m_shmem_begin; + size_type m_shmem_size; + void* m_scratch_ptr[2]; + size_t m_scratch_size[2]; + int m_scratch_pool_id = -1; + int32_t* m_scratch_locks; + const size_type m_league_size; + int m_team_size; + const size_type m_vector_size; + + template <class TagType> + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_team( + const Member& member, reference_type update) const { + m_functor(member, update); + } + + template <class TagType> + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_team( + const Member& member, reference_type update) const { + m_functor(TagType(), member, update); + } + + public: + Policy const& get_policy() const { return m_policy; } + + __device__ inline void operator()() const { + int64_t threadid = 0; + if (m_scratch_size[1] > 0) { + threadid = cuda_get_scratch_index(m_league_size, m_scratch_locks); + } + + using ReductionTag = std::conditional_t<UseShflReduction, ShflReductionTag, + SHMEMReductionTag>; + run(ReductionTag{}, threadid); + if (m_scratch_size[1] > 0) { + cuda_release_scratch_index(m_scratch_locks, threadid); + } + } + + __device__ inline void run(SHMEMReductionTag&, const int& threadid) const { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / + sizeof(size_type)> + word_count(Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)) / + sizeof(size_type)); + + reference_type value = + final_reducer.init(kokkos_impl_cuda_shared_memory<size_type>() + + threadIdx.y * word_count.value); + + // Iterate this block through the league + const int int_league_size = (int)m_league_size; + for (int league_rank = blockIdx.x; league_rank < int_league_size; + league_rank += gridDim.x) { + this->template exec_team<WorkTag>( + Member(kokkos_impl_cuda_shared_memory<char>() + m_team_begin, + m_shmem_begin, m_shmem_size, + (void*)(((char*)m_scratch_ptr[1]) + + ptrdiff_t(threadid / (blockDim.x * blockDim.y)) * + m_scratch_size[1]), + m_scratch_size[1], league_rank, m_league_size), + value); + } + + // Reduce with final value at blockDim.y - 1 location. + bool zero_length = m_league_size == 0; + bool do_final_reduction = true; + if (!zero_length) + do_final_reduction = cuda_single_inter_block_reduce_scan<false>( + final_reducer, blockIdx.x, gridDim.x, + kokkos_impl_cuda_shared_memory<size_type>(), m_scratch_space, + m_scratch_flags); + + if (do_final_reduction) { + // This is the final block with the final result at the final threads' + // location + + size_type* const shared = kokkos_impl_cuda_shared_memory<size_type>() + + (blockDim.y - 1) * word_count.value; + size_type* const global = + m_result_ptr_device_accessible + ? reinterpret_cast<size_type*>(m_result_ptr) + : (m_unified_space ? m_unified_space : m_scratch_space); + + if (threadIdx.y == 0) { + final_reducer.final(reinterpret_cast<value_type*>(shared)); + } + + if (CudaTraits::WarpSize < word_count.value) { + __syncthreads(); + } + + for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) { + global[i] = shared[i]; + } + } + } + + __device__ inline void run(ShflReductionTag, const int& threadid) const { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + value_type value; + final_reducer.init(&value); + + // Iterate this block through the league + const int int_league_size = (int)m_league_size; + for (int league_rank = blockIdx.x; league_rank < int_league_size; + league_rank += gridDim.x) { + this->template exec_team<WorkTag>( + Member(kokkos_impl_cuda_shared_memory<char>() + m_team_begin, + m_shmem_begin, m_shmem_size, + (void*)(((char*)m_scratch_ptr[1]) + + ptrdiff_t(threadid / (blockDim.x * blockDim.y)) * + m_scratch_size[1]), + m_scratch_size[1], league_rank, m_league_size), + value); + } + + pointer_type const result = + m_result_ptr_device_accessible + ? m_result_ptr + : (pointer_type)(m_unified_space ? m_unified_space + : m_scratch_space); + + value_type init; + final_reducer.init(&init); + + if (int_league_size == 0) { + final_reducer.final(&value); + *result = value; + } else if (Impl::cuda_inter_block_reduction(value, init, final_reducer, + m_scratch_space, result, + m_scratch_flags, blockDim.y)) { + const unsigned id = threadIdx.y * blockDim.x + threadIdx.x; + if (id == 0) { + final_reducer.final(&value); + *result = value; + } + } + } + + inline void execute() { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + const bool is_empty_range = m_league_size == 0 || m_team_size == 0; + const bool need_device_set = Analysis::has_init_member_function || + Analysis::has_final_member_function || + !m_result_ptr_host_accessible || +#ifdef KOKKOS_CUDA_ENABLE_GRAPHS + Policy::is_graph_kernel::value || +#endif + !std::is_same<ReducerType, InvalidType>::value; + if (!is_empty_range || need_device_set) { + const int block_count = std::max( + 1u, UseShflReduction ? std::min(m_league_size, size_type(1024 * 32)) + : std::min(int(m_league_size), m_team_size)); + + m_scratch_space = cuda_internal_scratch_space( + m_policy.space(), Analysis::value_size(ReducerConditional::select( + m_functor, m_reducer)) * + block_count); + m_scratch_flags = + cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type)); + m_unified_space = cuda_internal_scratch_unified( + m_policy.space(), Analysis::value_size(ReducerConditional::select( + m_functor, m_reducer))); + + dim3 block(m_vector_size, m_team_size, 1); + dim3 grid(block_count, 1, 1); + const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size; + + if (is_empty_range +#ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION + || Kokkos::Impl::CudaInternal::cuda_use_serial_execution() +#endif + ) { + block = dim3(1, 1, 1); + grid = dim3(1, 1, 1); + } + + CudaParallelLaunch<ParallelReduce, LaunchBounds>( + *this, grid, block, shmem_size_total, + m_policy.space().impl_internal_space_instance(), + true); // copy to device and execute + + if (!m_result_ptr_device_accessible) { + m_policy.space().fence( + "Kokkos::Impl::ParallelReduce<Cuda, TeamPolicy>::execute: Result " + "Not Device Accessible"); + + if (m_result_ptr) { + if (m_unified_space) { + const int count = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + for (int i = 0; i < count; ++i) { + m_result_ptr[i] = pointer_type(m_unified_space)[i]; + } + } else { + const int size = Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)); + DeepCopy<HostSpace, CudaSpace>(m_result_ptr, m_scratch_space, size); + } + } + } + } else { + if (m_result_ptr) { + // TODO @graph We need to effectively insert this in to the graph + final_reducer.init(m_result_ptr); + } + } + } + + template <class ViewType> + ParallelReduce( + const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value, void*> = nullptr) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_result_ptr_device_accessible( + MemorySpaceAccess<Kokkos::CudaSpace, + typename ViewType::memory_space>::accessible), + m_result_ptr_host_accessible( + MemorySpaceAccess<Kokkos::HostSpace, + typename ViewType::memory_space>::accessible), + m_scratch_space(nullptr), + m_scratch_flags(nullptr), + m_unified_space(nullptr), + m_team_begin(0), + m_shmem_begin(0), + m_shmem_size(0), + m_scratch_ptr{nullptr, nullptr}, + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.impl_vector_length()) { + cudaFuncAttributes attr = + CudaParallelLaunch<ParallelReduce, + LaunchBounds>::get_cuda_func_attributes(); + m_team_size = + m_team_size >= 0 + ? m_team_size + : Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, + m_functor, m_vector_size, m_policy.team_scratch_size(0), + m_policy.thread_scratch_size(0)) / + m_vector_size; + + m_team_begin = + UseShflReduction + ? 0 + : cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, + WorkTag>(arg_functor, + m_team_size); + m_shmem_begin = sizeof(double) * (m_team_size + 2); + m_shmem_size = + m_policy.scratch_size(0, m_team_size) + + FunctorTeamShmemSize<FunctorType>::value(arg_functor, m_team_size); + m_scratch_size[0] = m_shmem_size; + m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); + m_scratch_locks = + m_policy.space().impl_internal_space_instance()->m_scratch_locks; + if (m_team_size <= 0) { + m_scratch_ptr[1] = nullptr; + } else { + auto scratch_ptr_id = + m_policy.space() + .impl_internal_space_instance() + ->resize_team_scratch_space( + static_cast<std::int64_t>(m_scratch_size[1]) * + (std::min( + static_cast<std::int64_t>(Cuda::concurrency() / + (m_team_size * m_vector_size)), + static_cast<std::int64_t>(m_league_size)))); + m_scratch_ptr[1] = scratch_ptr_id.first; + m_scratch_pool_id = scratch_ptr_id.second; + } + + // The global parallel_reduce does not support vector_length other than 1 at + // the moment + if ((arg_policy.impl_vector_length() > 1) && !UseShflReduction) + Impl::throw_runtime_exception( + "Kokkos::parallel_reduce with a TeamPolicy using a vector length of " + "greater than 1 is not currently supported for CUDA for dynamic " + "sized reduction types."); + + if ((m_team_size < 32) && !UseShflReduction) + Impl::throw_runtime_exception( + "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller " + "than 32 is not currently supported with CUDA for dynamic sized " + "reduction types."); + + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + + const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size; + + if (!Kokkos::Impl::is_integral_power_of_two(m_team_size) && + !UseShflReduction) { + Kokkos::Impl::throw_runtime_exception( + std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); + } + + if (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < + shmem_size_total) { + Kokkos::Impl::throw_runtime_exception( + std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much " + "L0 scratch memory")); + } + + if (int(m_team_size) > + arg_policy.team_size_max(m_functor, m_reducer, ParallelReduceTag())) { + Kokkos::Impl::throw_runtime_exception( + std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too " + "large team size.")); + } + } + + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ReducerType& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_result_ptr_device_accessible( + MemorySpaceAccess<Kokkos::CudaSpace, + typename ReducerType::result_view_type:: + memory_space>::accessible), + m_result_ptr_host_accessible( + MemorySpaceAccess<Kokkos::HostSpace, + typename ReducerType::result_view_type:: + memory_space>::accessible), + m_scratch_space(nullptr), + m_scratch_flags(nullptr), + m_unified_space(nullptr), + m_team_begin(0), + m_shmem_begin(0), + m_shmem_size(0), + m_scratch_ptr{nullptr, nullptr}, + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.impl_vector_length()) { + cudaFuncAttributes attr = + CudaParallelLaunch<ParallelReduce, + LaunchBounds>::get_cuda_func_attributes(); + + // Valid team size not provided, deduce team size + m_team_size = + m_team_size >= 0 + ? m_team_size + : Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, + m_functor, m_vector_size, m_policy.team_scratch_size(0), + m_policy.thread_scratch_size(0)) / + m_vector_size; + + m_team_begin = + UseShflReduction + ? 0 + : cuda_single_inter_block_reduce_scan_shmem<false, FunctorType, + WorkTag>(arg_functor, + m_team_size); + m_shmem_begin = sizeof(double) * (m_team_size + 2); + m_shmem_size = + m_policy.scratch_size(0, m_team_size) + + FunctorTeamShmemSize<FunctorType>::value(arg_functor, m_team_size); + m_scratch_size[0] = m_shmem_size; + m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); + m_scratch_locks = + m_policy.space().impl_internal_space_instance()->m_scratch_locks; + if (m_team_size <= 0) { + m_scratch_ptr[1] = nullptr; + } else { + auto scratch_ptr_id = + m_policy.space() + .impl_internal_space_instance() + ->resize_team_scratch_space( + static_cast<std::int64_t>(m_scratch_size[1]) * + (std::min( + static_cast<std::int64_t>(Cuda::concurrency() / + (m_team_size * m_vector_size)), + static_cast<std::int64_t>(m_league_size)))); + m_scratch_ptr[1] = scratch_ptr_id.first; + m_scratch_pool_id = scratch_ptr_id.second; + } + + // The global parallel_reduce does not support vector_length other than 1 at + // the moment + if ((arg_policy.impl_vector_length() > 1) && !UseShflReduction) + Impl::throw_runtime_exception( + "Kokkos::parallel_reduce with a TeamPolicy using a vector length of " + "greater than 1 is not currently supported for CUDA for dynamic " + "sized reduction types."); + + if ((m_team_size < 32) && !UseShflReduction) + Impl::throw_runtime_exception( + "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller " + "than 32 is not currently supported with CUDA for dynamic sized " + "reduction types."); + + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + + const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size; + + if ((!Kokkos::Impl::is_integral_power_of_two(m_team_size) && + !UseShflReduction) || + m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < + shmem_size_total) { + Kokkos::Impl::throw_runtime_exception( + std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); + } + + size_type team_size_max = + Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, m_functor, + m_vector_size, m_policy.team_scratch_size(0), + m_policy.thread_scratch_size(0)) / + m_vector_size; + + if ((int)m_team_size > (int)team_size_max) { + Kokkos::Impl::throw_runtime_exception( + std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too " + "large team size.")); + } + } + + ~ParallelReduce() { + if (m_scratch_pool_id >= 0) { + m_policy.space() + .impl_internal_space_instance() + ->m_team_scratch_pool[m_scratch_pool_id] = 0; + } + } +}; + +} // namespace Impl +} // namespace Kokkos +#endif /* defined(KOKKOS_ENABLE_CUDA) */ +#endif /* #ifndef KOKKOS_CUDA_PARALLEL_HPP */ diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index 30f5221da4c40175a84352e3afbf0cf29ac79e21..078315b65dd20d37adc6973f5ffff3a94836236b 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -51,7 +51,6 @@ #include <utility> #include <Kokkos_Parallel.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_Error.hpp> #include <Cuda/Kokkos_Cuda_Vectorization.hpp> @@ -69,11 +68,10 @@ namespace Impl { * (c) blockDim.z == 1 */ -template <class ValueType, class JoinOp> -__device__ inline - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - cuda_intra_warp_reduction(ValueType& result, const JoinOp& join, - const uint32_t max_active_thread = blockDim.y) { +template <class ValueType, class ReducerType> +__device__ inline void cuda_intra_warp_reduction( + ValueType& result, const ReducerType& reducer, + const uint32_t max_active_thread = blockDim.y) { unsigned int shift = 1; // Reduce over values from threads with different threadIdx.y @@ -81,18 +79,17 @@ __device__ inline const ValueType tmp = shfl_down(result, blockDim.x * shift, 32u); // Only join if upper thread is active (this allows non power of two for // blockDim.y - if (threadIdx.y + shift < max_active_thread) join(result, tmp); + if (threadIdx.y + shift < max_active_thread) reducer.join(&result, &tmp); shift *= 2; } result = shfl(result, 0, 32); } -template <class ValueType, class JoinOp> -__device__ inline - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - cuda_inter_warp_reduction(ValueType& value, const JoinOp& join, - const int max_active_thread = blockDim.y) { +template <class ValueType, class ReducerType> +__device__ inline void cuda_inter_warp_reduction( + ValueType& value, const ReducerType& reducer, + const int max_active_thread = blockDim.y) { #define STEP_WIDTH 4 // Depending on the ValueType _shared__ memory must be aligned up to 8byte // boundaries The reason not to use ValueType directly is that for types with @@ -110,7 +107,7 @@ __device__ inline __syncthreads(); while (shift <= max_active_thread / step) { if (shift <= id && shift + STEP_WIDTH > id && threadIdx.x == 0) { - join(result[id % STEP_WIDTH], value); + reducer.join(&result[id % STEP_WIDTH], &value); } __syncthreads(); shift += STEP_WIDTH; @@ -118,35 +115,30 @@ __device__ inline value = result[0]; for (int i = 1; (i * step < max_active_thread) && i < STEP_WIDTH; i++) - join(value, result[i]); + reducer.join(&value, &result[i]); } -template <class ValueType, class JoinOp> -__device__ inline - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - cuda_intra_block_reduction(ValueType& value, const JoinOp& join, - const int max_active_thread = blockDim.y) { - cuda_intra_warp_reduction(value, join, max_active_thread); - cuda_inter_warp_reduction(value, join, max_active_thread); +template <class ValueType, class ReducerType> +__device__ inline void cuda_intra_block_reduction( + ValueType& value, const ReducerType& reducer, + const int max_active_thread = blockDim.y) { + cuda_intra_warp_reduction(value, reducer, max_active_thread); + cuda_inter_warp_reduction(value, reducer, max_active_thread); } -template <class FunctorType, class JoinOp, class ArgTag = void> +template <class FunctorType> __device__ bool cuda_inter_block_reduction( - typename FunctorValueTraits<FunctorType, ArgTag>::reference_type value, - typename FunctorValueTraits<FunctorType, ArgTag>::reference_type neutral, - const JoinOp& join, Cuda::size_type* const m_scratch_space, - typename FunctorValueTraits<FunctorType, - ArgTag>::pointer_type const /*result*/, + typename FunctorType::reference_type value, + typename FunctorType::reference_type neutral, const FunctorType& reducer, + Cuda::size_type* const m_scratch_space, + typename FunctorType::pointer_type const /*result*/, Cuda::size_type* const m_scratch_flags, const int max_active_thread = blockDim.y) { -#ifdef __CUDA_ARCH__ - using pointer_type = - typename FunctorValueTraits<FunctorType, ArgTag>::pointer_type; - using value_type = - typename FunctorValueTraits<FunctorType, ArgTag>::value_type; + using pointer_type = typename FunctorType::pointer_type; + using value_type = typename FunctorType::value_type; // Do the intra-block reduction with shfl operations and static shared memory - cuda_intra_block_reduction(value, join, max_active_thread); + cuda_intra_block_reduction(value, reducer, max_active_thread); const int id = threadIdx.y * blockDim.x + threadIdx.x; @@ -182,240 +174,51 @@ __device__ bool cuda_inter_block_reduction( blockDim.x * blockDim.y < 32 ? blockDim.x * blockDim.y : 32; for (int i = id; i < (int)gridDim.x; i += step_size) { value_type tmp = global[i]; - join(value, tmp); - } - - // Perform shfl reductions within the warp only join if contribution is - // valid (allows gridDim.x non power of two and <32) - if (int(blockDim.x * blockDim.y) > 1) { - value_type tmp = Kokkos::shfl_down(value, 1, 32); - if (id + 1 < int(gridDim.x)) join(value, tmp); - } - unsigned int mask = __activemask(); - __syncwarp(mask); - if (int(blockDim.x * blockDim.y) > 2) { - value_type tmp = Kokkos::shfl_down(value, 2, 32); - if (id + 2 < int(gridDim.x)) join(value, tmp); - } - __syncwarp(mask); - if (int(blockDim.x * blockDim.y) > 4) { - value_type tmp = Kokkos::shfl_down(value, 4, 32); - if (id + 4 < int(gridDim.x)) join(value, tmp); - } - __syncwarp(mask); - if (int(blockDim.x * blockDim.y) > 8) { - value_type tmp = Kokkos::shfl_down(value, 8, 32); - if (id + 8 < int(gridDim.x)) join(value, tmp); - } - __syncwarp(mask); - if (int(blockDim.x * blockDim.y) > 16) { - value_type tmp = Kokkos::shfl_down(value, 16, 32); - if (id + 16 < int(gridDim.x)) join(value, tmp); - } - __syncwarp(mask); - } - } - // The last block has in its thread=0 the global reduction value through - // "value" - return last_block; -#else - (void)value; - (void)neutral; - (void)join; - (void)m_scratch_space; - (void)m_scratch_flags; - (void)max_active_thread; - return true; -#endif -} - -template <class ReducerType> -__device__ inline - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - cuda_intra_warp_reduction(const ReducerType& reducer, - typename ReducerType::value_type& result, - const uint32_t max_active_thread = blockDim.y) { - using ValueType = typename ReducerType::value_type; - - unsigned int shift = 1; - - // Reduce over values from threads with different threadIdx.y - while (blockDim.x * shift < 32) { - const ValueType tmp = shfl_down(result, blockDim.x * shift, 32u); - // Only join if upper thread is active (this allows non power of two for - // blockDim.y - if (threadIdx.y + shift < max_active_thread) reducer.join(result, tmp); - shift *= 2; - } - - result = shfl(result, 0, 32); - reducer.reference() = result; -} - -template <class ReducerType> -__device__ inline - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - cuda_inter_warp_reduction(const ReducerType& reducer, - typename ReducerType::value_type value, - const int max_active_thread = blockDim.y) { - using ValueType = typename ReducerType::value_type; - -#define STEP_WIDTH 4 - // Depending on the ValueType _shared__ memory must be aligned up to 8byte - // boundaries The reason not to use ValueType directly is that for types with - // constructors it could lead to race conditions - alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType) - : alignof(double)) - __shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH]; - ValueType* result = (ValueType*)&sh_result; - const int step = 32 / blockDim.x; - int shift = STEP_WIDTH; - const int id = threadIdx.y % step == 0 ? threadIdx.y / step : 65000; - if (id < STEP_WIDTH) { - result[id] = value; - } - __syncthreads(); - while (shift <= max_active_thread / step) { - if (shift <= id && shift + STEP_WIDTH > id && threadIdx.x == 0) { - reducer.join(result[id % STEP_WIDTH], value); - } - __syncthreads(); - shift += STEP_WIDTH; - } - - value = result[0]; - for (int i = 1; (i * step < max_active_thread) && i < STEP_WIDTH; i++) - reducer.join(value, result[i]); - - reducer.reference() = value; -} - -template <class ReducerType> -__device__ inline - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - cuda_intra_block_reduction(const ReducerType& reducer, - typename ReducerType::value_type value, - const int max_active_thread = blockDim.y) { - cuda_intra_warp_reduction(reducer, value, max_active_thread); - cuda_inter_warp_reduction(reducer, value, max_active_thread); -} - -template <class ReducerType> -__device__ inline - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - cuda_intra_block_reduction(const ReducerType& reducer, - const int max_active_thread = blockDim.y) { - cuda_intra_block_reduction(reducer, reducer.reference(), max_active_thread); -} - -template <class ReducerType> -__device__ inline - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value, bool>::type - cuda_inter_block_reduction(const ReducerType& reducer, - Cuda::size_type* const m_scratch_space, - Cuda::size_type* const m_scratch_flags, - const int max_active_thread = blockDim.y) { -#ifdef __CUDA_ARCH__ - using pointer_type = typename ReducerType::value_type*; - using value_type = typename ReducerType::value_type; - - // Do the intra-block reduction with shfl operations and static shared memory - cuda_intra_block_reduction(reducer, max_active_thread); - - value_type value = reducer.reference(); - - const int id = threadIdx.y * blockDim.x + threadIdx.x; - - // One thread in the block writes block result to global scratch_memory - if (id == 0) { - pointer_type global = ((pointer_type)m_scratch_space) + blockIdx.x; - *global = value; - } - - // One warp of last block performs inter block reduction through loading the - // block values from global scratch_memory - bool last_block = false; - - __threadfence(); - __syncthreads(); - if (id < 32) { - Cuda::size_type count; - - // Figure out whether this is the last block - if (id == 0) count = Kokkos::atomic_fetch_add(m_scratch_flags, 1); - count = Kokkos::shfl(count, 0, 32); - - // Last block does the inter block reduction - if (count == gridDim.x - 1) { - // set flag back to zero - if (id == 0) *m_scratch_flags = 0; - last_block = true; - reducer.init(value); - - pointer_type const volatile global = (pointer_type)m_scratch_space; - - // Reduce all global values with splitting work over threads in one warp - const int step_size = - blockDim.x * blockDim.y < 32 ? blockDim.x * blockDim.y : 32; - for (int i = id; i < (int)gridDim.x; i += step_size) { - value_type tmp = global[i]; - reducer.join(value, tmp); + reducer.join(&value, &tmp); } // Perform shfl reductions within the warp only join if contribution is // valid (allows gridDim.x non power of two and <32) if (int(blockDim.x * blockDim.y) > 1) { value_type tmp = Kokkos::shfl_down(value, 1, 32); - if (id + 1 < int(gridDim.x)) reducer.join(value, tmp); + if (id + 1 < int(gridDim.x)) reducer.join(&value, &tmp); } unsigned int mask = __activemask(); __syncwarp(mask); if (int(blockDim.x * blockDim.y) > 2) { value_type tmp = Kokkos::shfl_down(value, 2, 32); - if (id + 2 < int(gridDim.x)) reducer.join(value, tmp); + if (id + 2 < int(gridDim.x)) reducer.join(&value, &tmp); } __syncwarp(mask); if (int(blockDim.x * blockDim.y) > 4) { value_type tmp = Kokkos::shfl_down(value, 4, 32); - if (id + 4 < int(gridDim.x)) reducer.join(value, tmp); + if (id + 4 < int(gridDim.x)) reducer.join(&value, &tmp); } __syncwarp(mask); if (int(blockDim.x * blockDim.y) > 8) { value_type tmp = Kokkos::shfl_down(value, 8, 32); - if (id + 8 < int(gridDim.x)) reducer.join(value, tmp); + if (id + 8 < int(gridDim.x)) reducer.join(&value, &tmp); } __syncwarp(mask); if (int(blockDim.x * blockDim.y) > 16) { value_type tmp = Kokkos::shfl_down(value, 16, 32); - if (id + 16 < int(gridDim.x)) reducer.join(value, tmp); + if (id + 16 < int(gridDim.x)) reducer.join(&value, &tmp); } __syncwarp(mask); } } - // The last block has in its thread=0 the global reduction value through // "value" return last_block; -#else - (void)reducer; - (void)m_scratch_space; - (void)m_scratch_flags; - (void)max_active_thread; - return true; -#endif } -template <class FunctorType, class ArgTag, bool DoScan, bool UseShfl> +template <class FunctorType, bool DoScan, bool UseShfl> struct CudaReductionsFunctor; -template <class FunctorType, class ArgTag> -struct CudaReductionsFunctor<FunctorType, ArgTag, false, true> { - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - using ValueJoin = FunctorValueJoin<FunctorType, ArgTag>; - using ValueInit = FunctorValueInit<FunctorType, ArgTag>; - using ValueOps = FunctorValueOps<FunctorType, ArgTag>; - using pointer_type = typename ValueTraits::pointer_type; - using Scalar = typename ValueTraits::value_type; +template <class FunctorType> +struct CudaReductionsFunctor<FunctorType, false, true> { + using pointer_type = typename FunctorType::pointer_type; + using Scalar = typename FunctorType::value_type; __device__ static inline void scalar_intra_warp_reduction( const FunctorType& functor, @@ -431,7 +234,7 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, true> { << ((threadIdx.y * blockDim.x + threadIdx.x) / width) * width; for (int delta = skip_vector ? blockDim.x : 1; delta < width; delta *= 2) { Scalar tmp = Kokkos::shfl_down(value, delta, width, mask); - ValueJoin::join(functor, &value, &tmp); + functor.join(&value, &tmp); } Impl::in_place_shfl(result, value, 0, width, mask); @@ -459,16 +262,16 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, true> { for (int w = shared_elements; w < num_warps; w += shared_elements) { if (warp_id >= w && warp_id < w + shared_elements) { if ((threadIdx.y * blockDim.x + threadIdx.x) % 32 == 0) - ValueJoin::join(functor, my_shared_team_buffer_element, &value); + functor.join(my_shared_team_buffer_element, &value); } __syncthreads(); } if (warp_id == 0) { - ValueInit::init(functor, &value); + functor.init(&value); for (unsigned int i = threadIdx.y * blockDim.x + threadIdx.x; i < blockDim.y * blockDim.x / 32; i += 32) - ValueJoin::join(functor, &value, &shared_team_buffer_element[i]); + functor.join(&value, &shared_team_buffer_element[i]); scalar_intra_warp_reduction(functor, value, false, 32, *my_global_team_buffer_element); } @@ -504,10 +307,10 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, true> { if (__syncthreads_or(num_teams_done == gridDim.x)) { is_last_block = true; *global_flags = 0; - ValueInit::init(functor, &value); + functor.init(&value); for (int i = threadIdx.y * blockDim.x + threadIdx.x; i < global_elements; i += blockDim.x * blockDim.y) { - ValueJoin::join(functor, &value, &global_team_buffer_element[i]); + functor.join(&value, &global_team_buffer_element[i]); } scalar_intra_block_reduction( functor, value, false, shared_team_buffer_elements + (blockDim.y - 1), @@ -517,14 +320,10 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, true> { } }; -template <class FunctorType, class ArgTag> -struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> { - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - using ValueJoin = FunctorValueJoin<FunctorType, ArgTag>; - using ValueInit = FunctorValueInit<FunctorType, ArgTag>; - using ValueOps = FunctorValueOps<FunctorType, ArgTag>; - using pointer_type = typename ValueTraits::pointer_type; - using Scalar = typename ValueTraits::value_type; +template <class FunctorType> +struct CudaReductionsFunctor<FunctorType, false, false> { + using pointer_type = typename FunctorType::pointer_type; + using Scalar = typename FunctorType::value_type; __device__ static inline void scalar_intra_warp_reduction( const FunctorType& functor, @@ -539,13 +338,18 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> { : ((1 << width) - 1) << ((threadIdx.y * blockDim.x + threadIdx.x) / width) * width; const int lane_id = (threadIdx.y * blockDim.x + threadIdx.x) % 32; + + __syncwarp(mask); + for (int delta = skip_vector ? blockDim.x : 1; delta < width; delta *= 2) { if (lane_id + delta < 32) { - ValueJoin::join(functor, value, value + delta); + functor.join(value, value + delta); } __syncwarp(mask); } - *value = *(value - lane_id); + if (lane_id != 0) { + *value = *(value - lane_id); + } } __device__ static inline void scalar_intra_block_reduction( @@ -605,10 +409,10 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> { if (__syncthreads_or(num_teams_done == gridDim.x)) { is_last_block = true; *global_flags = 0; - ValueInit::init(functor, &value); + functor.init(&value); for (int i = threadIdx.y * blockDim.x + threadIdx.x; i < global_elements; i += blockDim.x * blockDim.y) { - ValueJoin::join(functor, &value, &global_team_buffer_element[i]); + functor.join(&value, &global_team_buffer_element[i]); } scalar_intra_block_reduction( functor, value, false, shared_team_buffer_elements + (blockDim.y - 1), @@ -630,108 +434,127 @@ struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> { //---------------------------------------------------------------------------- /* * Algorithmic constraints: - * (a) blockDim.y is a power of two - * (b) blockDim.y <= 1024 - * (c) blockDim.x == blockDim.z == 1 + * (a) blockDim.y <= 1024 + * (b) blockDim.x == blockDim.z == 1 */ -template <bool DoScan, class FunctorType, class ArgTag> +template <bool DoScan, class FunctorType> __device__ void cuda_intra_block_reduce_scan( const FunctorType& functor, - const typename FunctorValueTraits<FunctorType, ArgTag>::pointer_type - base_data) { - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - using ValueJoin = FunctorValueJoin<FunctorType, ArgTag>; - - using pointer_type = typename ValueTraits::pointer_type; - - const unsigned value_count = ValueTraits::value_count(functor); - const unsigned BlockSizeMask = blockDim.y - 1; - - // Must have power of two thread count - - if (BlockSizeMask & blockDim.y) { - Kokkos::abort("Cuda::cuda_intra_block_scan requires power-of-two blockDim"); - } - -#define BLOCK_REDUCE_STEP(R, TD, S) \ - if (!(R & ((1 << (S + 1)) - 1))) { \ - ValueJoin::join(functor, TD, (TD - (value_count << S))); \ - } - -#define BLOCK_SCAN_STEP(TD, N, S) \ - if (N == (1 << S)) { \ - ValueJoin::join(functor, TD, (TD - (value_count << S))); \ - } - - const unsigned rtid_intra = threadIdx.y ^ BlockSizeMask; + const typename FunctorType::pointer_type base_data) { + using pointer_type = typename FunctorType::pointer_type; + + const unsigned value_count = functor.length(); + const unsigned not_less_power_of_two = + (1 << (Impl::int_log2(blockDim.y - 1) + 1)); + const unsigned BlockSizeMask = not_less_power_of_two - 1; + // There is at most one warp that is neither completely full or empty. + // For that warp, we shift all indices logically to the end and ignore join + // operations with unassigned indices in the warp when performing the intra + // warp reduction/scan. + const bool is_full_warp = (((threadIdx.y >> CudaTraits::WarpIndexShift) + 1) + << CudaTraits::WarpIndexShift) <= blockDim.y; + + const unsigned mapped_idx = + threadIdx.y + (is_full_warp ? 0 + : (not_less_power_of_two - blockDim.y) & + (CudaTraits::WarpSize - 1)); const pointer_type tdata_intra = base_data + value_count * threadIdx.y; + const pointer_type warp_start = + base_data + value_count * ((threadIdx.y >> CudaTraits::WarpIndexShift) + << CudaTraits::WarpIndexShift); + + auto block_reduce_step = [&functor, value_count]( + int const R, pointer_type const TD, int const S, + pointer_type memory_start, int index_shift) { + const auto join_ptr = TD - (value_count << S) + value_count * index_shift; + if (((R + 1) & ((1 << (S + 1)) - 1)) == 0 && join_ptr >= memory_start) { + functor.join(TD, join_ptr); + } + }; + + auto block_scan_step = [&functor, value_count]( + int const R, pointer_type const TD, int const S, + pointer_type memory_start, int index_shift) { + const auto N = (1 << (S + 1)); + const auto join_ptr = TD - (value_count << S) + value_count * index_shift; + if (R >= N && ((R + 1) & (N - 1)) == (N >> 1) && join_ptr >= memory_start) { + functor.join(TD, join_ptr); + } + }; { // Intra-warp reduction: __syncwarp(0xffffffff); - BLOCK_REDUCE_STEP(rtid_intra, tdata_intra, 0) + block_reduce_step(mapped_idx, tdata_intra, 0, warp_start, 0); __syncwarp(0xffffffff); - BLOCK_REDUCE_STEP(rtid_intra, tdata_intra, 1) + block_reduce_step(mapped_idx, tdata_intra, 1, warp_start, 0); __syncwarp(0xffffffff); - BLOCK_REDUCE_STEP(rtid_intra, tdata_intra, 2) + block_reduce_step(mapped_idx, tdata_intra, 2, warp_start, 0); __syncwarp(0xffffffff); - BLOCK_REDUCE_STEP(rtid_intra, tdata_intra, 3) + block_reduce_step(mapped_idx, tdata_intra, 3, warp_start, 0); __syncwarp(0xffffffff); - BLOCK_REDUCE_STEP(rtid_intra, tdata_intra, 4) + block_reduce_step(mapped_idx, tdata_intra, 4, warp_start, 0); __syncwarp(0xffffffff); } __syncthreads(); // Wait for all warps to reduce - { // Inter-warp reduce-scan by a single warp to avoid extra synchronizations - const unsigned rtid_inter = (threadIdx.y ^ BlockSizeMask) - << CudaTraits::WarpIndexShift; - - unsigned inner_mask = __ballot_sync(0xffffffff, (rtid_inter < blockDim.y)); - if (rtid_inter < blockDim.y) { + // Inter-warp reduce-scan by a single warp to avoid extra synchronizations. + { + // There is at most one warp where the memory address to be used is not + // (CudaTraits::WarpSize - 1) away from the warp start adress. For the + // following reduction, we shift all indices logically to the end of the + // next power-of-two to the number of warps. + const unsigned n_active_warps = + ((blockDim.y - 1) >> CudaTraits::WarpIndexShift) + 1; + const unsigned inner_mask = + __ballot_sync(0xffffffff, (threadIdx.y < n_active_warps)); + if (threadIdx.y < n_active_warps) { + const bool is_full_warp_inter = + threadIdx.y < (blockDim.y >> CudaTraits::WarpIndexShift); const pointer_type tdata_inter = - base_data + value_count * (rtid_inter ^ BlockSizeMask); + base_data + + value_count * (is_full_warp_inter + ? (threadIdx.y << CudaTraits::WarpIndexShift) + + (CudaTraits::WarpSize - 1) + : blockDim.y - 1); + const unsigned index_shift = + is_full_warp_inter + ? 0 + : blockDim.y - (threadIdx.y << CudaTraits::WarpIndexShift); + const int rtid_inter = (threadIdx.y << CudaTraits::WarpIndexShift) + + (CudaTraits::WarpSize - 1) - index_shift; if ((1 << 5) < BlockSizeMask) { __syncwarp(inner_mask); - BLOCK_REDUCE_STEP(rtid_inter, tdata_inter, 5) + block_reduce_step(rtid_inter, tdata_inter, 5, base_data, index_shift); } if ((1 << 6) < BlockSizeMask) { __syncwarp(inner_mask); - BLOCK_REDUCE_STEP(rtid_inter, tdata_inter, 6) + block_reduce_step(rtid_inter, tdata_inter, 6, base_data, index_shift); } if ((1 << 7) < BlockSizeMask) { __syncwarp(inner_mask); - BLOCK_REDUCE_STEP(rtid_inter, tdata_inter, 7) + block_reduce_step(rtid_inter, tdata_inter, 7, base_data, index_shift); } if ((1 << 8) < BlockSizeMask) { __syncwarp(inner_mask); - BLOCK_REDUCE_STEP(rtid_inter, tdata_inter, 8) + block_reduce_step(rtid_inter, tdata_inter, 8, base_data, index_shift); } if ((1 << 9) < BlockSizeMask) { __syncwarp(inner_mask); - BLOCK_REDUCE_STEP(rtid_inter, tdata_inter, 9) + block_reduce_step(rtid_inter, tdata_inter, 9, base_data, index_shift); } if (DoScan) { - int n = - (rtid_inter & 32) - ? 32 - : ((rtid_inter & 64) - ? 64 - : ((rtid_inter & 128) ? 128 - : ((rtid_inter & 256) ? 256 : 0))); - - if (!(rtid_inter + n < blockDim.y)) n = 0; - __syncwarp(inner_mask); - BLOCK_SCAN_STEP(tdata_inter, n, 8) + block_scan_step(rtid_inter, tdata_inter, 8, base_data, index_shift); __syncwarp(inner_mask); - BLOCK_SCAN_STEP(tdata_inter, n, 7) + block_scan_step(rtid_inter, tdata_inter, 7, base_data, index_shift); __syncwarp(inner_mask); - BLOCK_SCAN_STEP(tdata_inter, n, 6) + block_scan_step(rtid_inter, tdata_inter, 6, base_data, index_shift); __syncwarp(inner_mask); - BLOCK_SCAN_STEP(tdata_inter, n, 5) + block_scan_step(rtid_inter, tdata_inter, 5, base_data, index_shift); } } } @@ -739,32 +562,27 @@ __device__ void cuda_intra_block_reduce_scan( __syncthreads(); // Wait for inter-warp reduce-scan to complete if (DoScan) { - int n = - (rtid_intra & 1) - ? 1 - : ((rtid_intra & 2) - ? 2 - : ((rtid_intra & 4) - ? 4 - : ((rtid_intra & 8) ? 8 - : ((rtid_intra & 16) ? 16 : 0)))); - - if (!(rtid_intra + n < blockDim.y)) n = 0; + block_scan_step(mapped_idx, tdata_intra, 4, warp_start, 0); + __threadfence_block(); __syncwarp(0xffffffff); - BLOCK_SCAN_STEP(tdata_intra, n, 4) __threadfence_block(); + block_scan_step(mapped_idx, tdata_intra, 3, warp_start, 0); + __threadfence_block(); __syncwarp(0xffffffff); - BLOCK_SCAN_STEP(tdata_intra, n, 3) __threadfence_block(); + block_scan_step(mapped_idx, tdata_intra, 2, warp_start, 0); + __threadfence_block(); __syncwarp(0xffffffff); - BLOCK_SCAN_STEP(tdata_intra, n, 2) __threadfence_block(); + block_scan_step(mapped_idx, tdata_intra, 1, warp_start, 0); + __threadfence_block(); __syncwarp(0xffffffff); - BLOCK_SCAN_STEP(tdata_intra, n, 1) __threadfence_block(); + block_scan_step(mapped_idx, tdata_intra, 0, warp_start, 0); + __threadfence_block(); __syncwarp(0xffffffff); - BLOCK_SCAN_STEP(tdata_intra, n, 0) __threadfence_block(); + // Update with total from previous warps + if (mapped_idx >= CudaTraits::WarpSize && + (mapped_idx & (CudaTraits::WarpSize - 1)) != (CudaTraits::WarpSize - 1)) + functor.join(tdata_intra, warp_start - value_count); __syncwarp(0xffffffff); } - -#undef BLOCK_SCAN_STEP -#undef BLOCK_REDUCE_STEP } //---------------------------------------------------------------------------- @@ -776,19 +594,14 @@ __device__ void cuda_intra_block_reduce_scan( * Global reduce result is in the last threads' 'shared_data' location. */ -template <bool DoScan, class FunctorType, class ArgTag, - class SizeType = Cuda::size_type> +template <bool DoScan, class FunctorType, class SizeType = Cuda::size_type> __device__ bool cuda_single_inter_block_reduce_scan2( const FunctorType& functor, const Cuda::size_type block_id, const Cuda::size_type block_count, SizeType* const shared_data, SizeType* const global_data, Cuda::size_type* const global_flags) { - using size_type = SizeType; - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - using ValueJoin = FunctorValueJoin<FunctorType, ArgTag>; - using ValueInit = FunctorValueInit<FunctorType, ArgTag>; - using ValueOps = FunctorValueOps<FunctorType, ArgTag>; - - using pointer_type = typename ValueTraits::pointer_type; + using size_type = SizeType; + using value_type = typename FunctorType::value_type; + using pointer_type = typename FunctorType::pointer_type; // '__ffs' = position of the least significant bit set to 1. // 'blockDim.y' is guaranteed to be a power of two so this @@ -803,14 +616,14 @@ __device__ bool cuda_single_inter_block_reduce_scan2( "blockDim"); } - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / - sizeof(size_type)> - word_count(ValueTraits::value_size(functor) / sizeof(size_type)); + const integral_nonzero_constant< + size_type, std::is_pointer<typename FunctorType::reference_type>::value + ? 0 + : sizeof(value_type) / sizeof(size_type)> + word_count((sizeof(value_type) * functor.length()) / sizeof(size_type)); // Reduce the accumulation for the entire block. - cuda_intra_block_reduce_scan<false, FunctorType, ArgTag>( - functor, pointer_type(shared_data)); - + cuda_intra_block_reduce_scan<false>(functor, pointer_type(shared_data)); { // Write accumulation total to global scratch space. // Accumulation total is the last thread's data. @@ -840,31 +653,34 @@ __device__ bool cuda_single_inter_block_reduce_scan2( { void* const shared_ptr = shared_data + word_count.value * threadIdx.y; - /* reference_type shared_value = */ ValueInit::init(functor, shared_ptr); + /* reference_type shared_value = */ functor.init( + static_cast<pointer_type>(shared_ptr)); for (size_type i = b; i < e; ++i) { - ValueJoin::join(functor, shared_ptr, - global_data + word_count.value * i); + functor.join( + static_cast<pointer_type>(shared_ptr), + reinterpret_cast<pointer_type>(global_data + word_count.value * i)); } } - cuda_intra_block_reduce_scan<DoScan, FunctorType, ArgTag>( - functor, pointer_type(shared_data)); + cuda_intra_block_reduce_scan<DoScan>(functor, pointer_type(shared_data)); if (DoScan) { - size_type* const shared_value = + pointer_type const shared_value = reinterpret_cast<pointer_type>( shared_data + - word_count.value * (threadIdx.y ? threadIdx.y - 1 : blockDim.y); + word_count.value * (threadIdx.y ? threadIdx.y - 1 : blockDim.y)); if (!threadIdx.y) { - ValueInit::init(functor, shared_value); + functor.init(shared_value); } // Join previous inclusive scan value to each member for (size_type i = b; i < e; ++i) { size_type* const global_value = global_data + word_count.value * i; - ValueJoin::join(functor, shared_value, global_value); - ValueOps ::copy(functor, global_value, shared_value); + functor.join(shared_value, + reinterpret_cast<pointer_type>(global_value)); + functor.copy(reinterpret_cast<pointer_type>(global_value), + reinterpret_cast<pointer_type>(shared_value)); } } } @@ -872,29 +688,42 @@ __device__ bool cuda_single_inter_block_reduce_scan2( return is_last_block; } -template <bool DoScan, class FunctorType, class ArgTag, - class SizeType = Cuda::size_type> +template <bool DoScan, class FunctorType, class SizeType = Cuda::size_type> __device__ bool cuda_single_inter_block_reduce_scan( const FunctorType& functor, const Cuda::size_type block_id, const Cuda::size_type block_count, SizeType* const shared_data, SizeType* const global_data, Cuda::size_type* const global_flags) { - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - if (!DoScan && ValueTraits::StaticValueSize > 0) + if (!DoScan && !std::is_pointer<typename FunctorType::reference_type>::value) return Kokkos::Impl::CudaReductionsFunctor< - FunctorType, ArgTag, false, (ValueTraits::StaticValueSize > 16)>:: + FunctorType, false, (sizeof(typename FunctorType::value_type) > 16)>:: scalar_inter_block_reduction(functor, block_id, block_count, shared_data, global_data, global_flags); else - return cuda_single_inter_block_reduce_scan2<DoScan, FunctorType, ArgTag>( + return cuda_single_inter_block_reduce_scan2<DoScan>( functor, block_id, block_count, shared_data, global_data, global_flags); } // Size in bytes required for inter block reduce or scan template <bool DoScan, class FunctorType, class ArgTag> -inline unsigned cuda_single_inter_block_reduce_scan_shmem( - const FunctorType& functor, const unsigned BlockSize) { - return (BlockSize + 2) * - Impl::FunctorValueTraits<FunctorType, ArgTag>::value_size(functor); +inline std::enable_if_t<DoScan, unsigned> +cuda_single_inter_block_reduce_scan_shmem(const FunctorType& functor, + const unsigned BlockSize) { + using Analysis = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + RangePolicy<Cuda, ArgTag>, FunctorType>; + + return (BlockSize + 2) * Analysis::value_size(functor); +} + +template <bool DoScan, class FunctorType, class ArgTag> +inline std::enable_if_t<!DoScan, unsigned> +cuda_single_inter_block_reduce_scan_shmem(const FunctorType& functor, + const unsigned BlockSize) { + using Analysis = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + RangePolicy<Cuda, ArgTag>, FunctorType>; + + return (BlockSize + 2) * Analysis::value_size(functor); } template <typename WorkTag, typename Policy, typename FunctorType> diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp index 777f57ced45b246af52cea73e796fbeae01cb57c..8f05448b17a6e584f16c4efbc8d3faa0debbcb4c 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_TASKDAG) diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp index 88ac0d1878a911a876210fe06cc52fa1d8285be6..1f2e394f1ebff0af1a89ca1650fef1fdea7fe40b 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -338,9 +338,8 @@ class TaskQueueSpecialization<SimpleTaskScheduler<Kokkos::Cuda, QueueType>> { template <class Scheduler> class TaskQueueSpecializationConstrained< - Scheduler, - typename std::enable_if<std::is_same<typename Scheduler::execution_space, - Kokkos::Cuda>::value>::type> { + Scheduler, std::enable_if_t<std::is_same< + typename Scheduler::execution_space, Kokkos::Cuda>::value>> { public: using scheduler_type = Scheduler; using execution_space = Kokkos::Cuda; @@ -780,12 +779,12 @@ namespace Kokkos { // template<typename iType1, typename iType2> // KOKKOS_INLINE_FUNCTION // Impl::TeamThreadRangeBoundariesStruct -// < typename std::common_type<iType1,iType2>::type +// < std::common_type_t<iType1,iType2> // , Impl::TaskExec< Kokkos::Cuda > > // TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread // , const iType1 & begin, const iType2 & end ) //{ -// using iType = typename std::common_type< iType1, iType2 >::type; +// using iType = std::common_type_t< iType1, iType2 >; // return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< // Kokkos::Cuda > >( // thread, iType(begin), iType(end) ); diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp index 922b980a2545b4e35d573d44806d76fdf1ca1ea2..ffafc47f010fdd667b8a9bad3a71ea0467226897 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp @@ -74,8 +74,7 @@ struct CudaJoinFunctor { using value_type = Type; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - volatile const value_type& input) { + static void join(value_type& update, const value_type& input) { update += input; } }; @@ -131,28 +130,20 @@ class CudaTeamMember { KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank; } KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size; } KOKKOS_INLINE_FUNCTION int team_rank() const { -#ifdef __CUDA_ARCH__ - return threadIdx.y; -#else - return 0; -#endif + KOKKOS_IF_ON_DEVICE((return threadIdx.y;)) + KOKKOS_IF_ON_HOST((return 0;)) } KOKKOS_INLINE_FUNCTION int team_size() const { -#ifdef __CUDA_ARCH__ - return blockDim.y; -#else - return 1; -#endif + KOKKOS_IF_ON_DEVICE((return blockDim.y;)) + KOKKOS_IF_ON_HOST((return 1;)) } KOKKOS_INLINE_FUNCTION void team_barrier() const { -#ifdef __CUDA_ARCH__ - if (1 == blockDim.z) - __syncthreads(); // team == block - else - __threadfence_block(); // team <= warp -#endif + KOKKOS_IF_ON_DEVICE(( + if (1 == blockDim.z) { __syncthreads(); } // team == block + else { __threadfence_block(); } // team <= warp + )) } //-------------------------------------------------------------------------- @@ -162,21 +153,21 @@ class CudaTeamMember { const int& thread_id) const { (void)val; (void)thread_id; -#ifdef __CUDA_ARCH__ - if (1 == blockDim.z) { // team == block - __syncthreads(); - // Wait for shared data write until all threads arrive here - if (threadIdx.x == 0u && threadIdx.y == (uint32_t)thread_id) { - *((ValueType*)m_team_reduce) = val; - } - __syncthreads(); // Wait for shared data read until root thread writes - val = *((ValueType*)m_team_reduce); - } else { // team <= warp - ValueType tmp(val); // input might not be a register variable - Impl::in_place_shfl(val, tmp, blockDim.x * thread_id, - blockDim.x * blockDim.y); - } -#endif + KOKKOS_IF_ON_DEVICE(( + if (1 == blockDim.z) { // team == block + __syncthreads(); + // Wait for shared data write until all threads arrive here + if (threadIdx.x == 0u && threadIdx.y == (uint32_t)thread_id) { + *((ValueType*)m_team_reduce) = val; + } + __syncthreads(); // Wait for shared data read until root thread + // writes + val = *((ValueType*)m_team_reduce); + } else { // team <= warp + ValueType tmp(val); // input might not be a register variable + Impl::in_place_shfl(val, tmp, blockDim.x * thread_id, + blockDim.x * blockDim.y); + })) } template <class Closure, class ValueType> @@ -185,23 +176,23 @@ class CudaTeamMember { (void)f; (void)val; (void)thread_id; -#ifdef __CUDA_ARCH__ - f(val); - - if (1 == blockDim.z) { // team == block - __syncthreads(); - // Wait for shared data write until all threads arrive here - if (threadIdx.x == 0u && threadIdx.y == (uint32_t)thread_id) { - *((ValueType*)m_team_reduce) = val; - } - __syncthreads(); // Wait for shared data read until root thread writes - val = *((ValueType*)m_team_reduce); - } else { // team <= warp - ValueType tmp(val); // input might not be a register variable - Impl::in_place_shfl(val, tmp, blockDim.x * thread_id, - blockDim.x * blockDim.y); - } -#endif + KOKKOS_IF_ON_DEVICE(( + f(val); + + if (1 == blockDim.z) { // team == block + __syncthreads(); + // Wait for shared data write until all threads arrive here + if (threadIdx.x == 0u && threadIdx.y == (uint32_t)thread_id) { + *((ValueType*)m_team_reduce) = val; + } + __syncthreads(); // Wait for shared data read until root thread + // writes + val = *((ValueType*)m_team_reduce); + } else { // team <= warp + ValueType tmp(val); // input might not be a register variable + Impl::in_place_shfl(val, tmp, blockDim.x * thread_id, + blockDim.x * blockDim.y); + })) } //-------------------------------------------------------------------------- @@ -220,22 +211,23 @@ class CudaTeamMember { * ( 1 == blockDim.z ) */ template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer) const noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer) const noexcept { team_reduce(reducer, reducer.reference()); } template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer, - typename ReducerType::value_type& value) const noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) const noexcept { (void)reducer; (void)value; -#ifdef __CUDA_ARCH__ - cuda_intra_block_reduction(reducer, value, blockDim.y); -#endif /* #ifdef __CUDA_ARCH__ */ + KOKKOS_IF_ON_DEVICE( + (typename Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + TeamPolicy<Cuda>, ReducerType>::Reducer + wrapped_reducer(&reducer); + cuda_intra_block_reduction(value, wrapped_reducer, blockDim.y); + reducer.reference() = value;)) } //-------------------------------------------------------------------------- @@ -251,36 +243,33 @@ class CudaTeamMember { template <typename Type> KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value, Type* const global_accum) const { -#ifdef __CUDA_ARCH__ - Type* const base_data = (Type*)m_team_reduce; + KOKKOS_IF_ON_DEVICE(( + Type* const base_data = (Type*)m_team_reduce; - __syncthreads(); // Don't write in to shared data until all threads have - // entered this function + __syncthreads(); // Don't write in to shared data until all threads + // have entered this function - if (0 == threadIdx.y) { - base_data[0] = 0; - } + if (0 == threadIdx.y) { base_data[0] = 0; } - base_data[threadIdx.y + 1] = value; + base_data[threadIdx.y + 1] = value; + Impl::CudaJoinFunctor<Type> cuda_join_functor; + typename Impl::FunctorAnalysis< + Impl::FunctorPatternInterface::SCAN, TeamPolicy<Cuda>, + Impl::CudaJoinFunctor<Type>>::Reducer reducer(&cuda_join_functor); + Impl::cuda_intra_block_reduce_scan<true>(reducer, base_data + 1); - Impl::cuda_intra_block_reduce_scan<true, Impl::CudaJoinFunctor<Type>, void>( - Impl::CudaJoinFunctor<Type>(), base_data + 1); + if (global_accum) { + if (blockDim.y == threadIdx.y + 1) { + base_data[blockDim.y] = + atomic_fetch_add(global_accum, base_data[blockDim.y]); + } + __syncthreads(); // Wait for atomic + base_data[threadIdx.y] += base_data[blockDim.y]; + } - if (global_accum) { - if (blockDim.y == threadIdx.y + 1) { - base_data[blockDim.y] = - atomic_fetch_add(global_accum, base_data[blockDim.y]); - } - __syncthreads(); // Wait for atomic - base_data[threadIdx.y] += base_data[blockDim.y]; - } + return base_data[threadIdx.y];)) - return base_data[threadIdx.y]; -#else - (void)value; - (void)global_accum; - return Type(); -#endif + KOKKOS_IF_ON_HOST(((void)value; (void)global_accum; return Type();)) } /** \brief Intra-team exclusive prefix sum with team_rank() ordering. @@ -296,57 +285,54 @@ class CudaTeamMember { //---------------------------------------- template <typename ReducerType> - KOKKOS_INLINE_FUNCTION static - typename std::enable_if<is_reducer<ReducerType>::value>::type - vector_reduce(ReducerType const& reducer) { + KOKKOS_INLINE_FUNCTION static std::enable_if_t<is_reducer<ReducerType>::value> + vector_reduce(ReducerType const& reducer) { vector_reduce(reducer, reducer.reference()); } template <typename ReducerType> - KOKKOS_INLINE_FUNCTION static - typename std::enable_if<is_reducer<ReducerType>::value>::type - vector_reduce(ReducerType const& reducer, - typename ReducerType::value_type& value) { + KOKKOS_INLINE_FUNCTION static std::enable_if_t<is_reducer<ReducerType>::value> + vector_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) { (void)reducer; (void)value; -#ifdef __CUDA_ARCH__ - if (blockDim.x == 1) return; - - // Intra vector lane shuffle reduction: - typename ReducerType::value_type tmp(value); - typename ReducerType::value_type tmp2 = tmp; - - unsigned mask = - blockDim.x == 32 - ? 0xffffffff - : ((1 << blockDim.x) - 1) - << ((threadIdx.y % (32 / blockDim.x)) * blockDim.x); - - for (int i = blockDim.x; (i >>= 1);) { - Impl::in_place_shfl_down(tmp2, tmp, i, blockDim.x, mask); - if ((int)threadIdx.x < i) { - reducer.join(tmp, tmp2); - } - } + KOKKOS_IF_ON_DEVICE( + (if (blockDim.x == 1) return; - // Broadcast from root lane to all other lanes. - // Cannot use "butterfly" algorithm to avoid the broadcast - // because floating point summation is not associative - // and thus different threads could have different results. + // Intra vector lane shuffle reduction: + typename ReducerType::value_type tmp(value); + typename ReducerType::value_type tmp2 = tmp; - Impl::in_place_shfl(tmp2, tmp, 0, blockDim.x, mask); - value = tmp2; - reducer.reference() = tmp2; -#endif + unsigned mask = + blockDim.x == 32 + ? 0xffffffff + : ((1 << blockDim.x) - 1) + << ((threadIdx.y % (32 / blockDim.x)) * blockDim.x); + + for (int i = blockDim.x; (i >>= 1);) { + Impl::in_place_shfl_down(tmp2, tmp, i, blockDim.x, mask); + if ((int)threadIdx.x < i) { + reducer.join(tmp, tmp2); + } + } + + // Broadcast from root lane to all other lanes. + // Cannot use "butterfly" algorithm to avoid the broadcast + // because floating point summation is not associative + // and thus different threads could have different results. + + Impl::in_place_shfl(tmp2, tmp, 0, blockDim.x, mask); + value = tmp2; reducer.reference() = tmp2;)) } //---------------------------------------- // Private for the driver KOKKOS_INLINE_FUNCTION - CudaTeamMember(void* shared, const int shared_begin, const int shared_size, - void* scratch_level_1_ptr, const int scratch_level_1_size, - const int arg_league_rank, const int arg_league_size) + CudaTeamMember(void* shared, const size_t shared_begin, + const size_t shared_size, void* scratch_level_1_ptr, + const size_t scratch_level_1_size, const int arg_league_rank, + const int arg_league_size) : m_team_reduce(shared), m_team_shared(static_cast<char*>(shared) + shared_begin, shared_size, scratch_level_1_ptr, scratch_level_1_size), @@ -443,9 +429,9 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::CudaTeamMember> + std::common_type_t<iType1, iType2>, Impl::CudaTeamMember> TeamThreadRange(const Impl::CudaTeamMember& thread, iType1 begin, iType2 end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::CudaTeamMember>( thread, iType(begin), iType(end)); } @@ -460,10 +446,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::CudaTeamMember> + std::common_type_t<iType1, iType2>, Impl::CudaTeamMember> TeamVectorRange(const Impl::CudaTeamMember& thread, const iType1& begin, const iType2& end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamVectorRangeBoundariesStruct<iType, Impl::CudaTeamMember>( thread, iType(begin), iType(end)); } @@ -478,10 +464,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::CudaTeamMember> + std::common_type_t<iType1, iType2>, Impl::CudaTeamMember> ThreadVectorRange(const Impl::CudaTeamMember& thread, iType1 arg_begin, iType2 arg_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::CudaTeamMember>( thread, iType(arg_begin), iType(arg_end)); } @@ -513,11 +499,9 @@ KOKKOS_INLINE_FUNCTION void parallel_for( const Closure& closure) { (void)loop_boundaries; (void)closure; -#ifdef __CUDA_ARCH__ - for (iType i = loop_boundaries.start + threadIdx.y; i < loop_boundaries.end; - i += blockDim.y) - closure(i); -#endif + KOKKOS_IF_ON_DEVICE( + (for (iType i = loop_boundaries.start + threadIdx.y; + i < loop_boundaries.end; i += blockDim.y) { closure(i); })) } //---------------------------------------------------------------------------- @@ -531,26 +515,22 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * performed and put into result. */ template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::CudaTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::CudaTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { (void)loop_boundaries; (void)closure; (void)reducer; -#ifdef __CUDA_ARCH__ - typename ReducerType::value_type value; - reducer.init(value); + KOKKOS_IF_ON_DEVICE( + (typename ReducerType::value_type value; - for (iType i = loop_boundaries.start + threadIdx.y; i < loop_boundaries.end; - i += blockDim.y) { - closure(i, value); - } + reducer.init(value); - loop_boundaries.member.team_reduce(reducer, value); + for (iType i = loop_boundaries.start + threadIdx.y; + i < loop_boundaries.end; i += blockDim.y) { closure(i, value); } -#endif + loop_boundaries.member.team_reduce(reducer, value);)) } /** \brief Inter-thread parallel_reduce assuming summation. @@ -562,28 +542,23 @@ KOKKOS_INLINE_FUNCTION * performed and put into result. */ template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::CudaTeamMember>& loop_boundaries, - const Closure& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::CudaTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { (void)loop_boundaries; (void)closure; (void)result; -#ifdef __CUDA_ARCH__ - ValueType val; - Kokkos::Sum<ValueType> reducer(val); + KOKKOS_IF_ON_DEVICE( + (ValueType val; Kokkos::Sum<ValueType> reducer(val); - reducer.init(reducer.reference()); + reducer.init(reducer.reference()); - for (iType i = loop_boundaries.start + threadIdx.y; i < loop_boundaries.end; - i += blockDim.y) { - closure(i, val); - } + for (iType i = loop_boundaries.start + threadIdx.y; + i < loop_boundaries.end; i += blockDim.y) { closure(i, val); } - loop_boundaries.member.team_reduce(reducer, val); - result = reducer.reference(); -#endif + loop_boundaries.member.team_reduce(reducer, val); + result = reducer.reference();)) } template <typename iType, class Closure> @@ -593,60 +568,52 @@ KOKKOS_INLINE_FUNCTION void parallel_for( const Closure& closure) { (void)loop_boundaries; (void)closure; -#ifdef __CUDA_ARCH__ - for (iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x; - i < loop_boundaries.end; i += blockDim.y * blockDim.x) - closure(i); -#endif + KOKKOS_IF_ON_DEVICE((for (iType i = loop_boundaries.start + + threadIdx.y * blockDim.x + threadIdx.x; + i < loop_boundaries.end; + i += blockDim.y * blockDim.x) { closure(i); })) } template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::CudaTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::CudaTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { (void)loop_boundaries; (void)closure; (void)reducer; -#ifdef __CUDA_ARCH__ - typename ReducerType::value_type value; - reducer.init(value); + KOKKOS_IF_ON_DEVICE((typename ReducerType::value_type value; + reducer.init(value); - for (iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x; - i < loop_boundaries.end; i += blockDim.y * blockDim.x) { - closure(i, value); - } + for (iType i = loop_boundaries.start + + threadIdx.y * blockDim.x + threadIdx.x; + i < loop_boundaries.end; + i += blockDim.y * blockDim.x) { closure(i, value); } - loop_boundaries.member.vector_reduce(reducer, value); - loop_boundaries.member.team_reduce(reducer, value); -#endif + loop_boundaries.member.vector_reduce(reducer, value); + loop_boundaries.member.team_reduce(reducer, value);)) } template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::CudaTeamMember>& loop_boundaries, - const Closure& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::CudaTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { (void)loop_boundaries; (void)closure; (void)result; -#ifdef __CUDA_ARCH__ - ValueType val; - Kokkos::Sum<ValueType> reducer(val); + KOKKOS_IF_ON_DEVICE((ValueType val; Kokkos::Sum<ValueType> reducer(val); - reducer.init(reducer.reference()); + reducer.init(reducer.reference()); - for (iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x; - i < loop_boundaries.end; i += blockDim.y * blockDim.x) { - closure(i, val); - } + for (iType i = loop_boundaries.start + + threadIdx.y * blockDim.x + threadIdx.x; + i < loop_boundaries.end; + i += blockDim.y * blockDim.x) { closure(i, val); } - loop_boundaries.member.vector_reduce(reducer); - loop_boundaries.member.team_reduce(reducer); - result = reducer.reference(); -#endif + loop_boundaries.member.vector_reduce(reducer); + loop_boundaries.member.team_reduce(reducer); + result = reducer.reference();)) } //---------------------------------------------------------------------------- @@ -664,16 +631,14 @@ KOKKOS_INLINE_FUNCTION void parallel_for( const Closure& closure) { (void)loop_boundaries; (void)closure; -#ifdef __CUDA_ARCH__ - for (iType i = loop_boundaries.start + threadIdx.x; i < loop_boundaries.end; - i += blockDim.x) { - closure(i); - } - __syncwarp(blockDim.x == 32 - ? 0xffffffff - : ((1 << blockDim.x) - 1) - << (threadIdx.y % (32 / blockDim.x)) * blockDim.x); -#endif + KOKKOS_IF_ON_DEVICE(( + for (iType i = loop_boundaries.start + threadIdx.x; + i < loop_boundaries.end; i += blockDim.x) { closure(i); } + + __syncwarp(blockDim.x == 32 + ? 0xffffffff + : ((1 << blockDim.x) - 1) + << (threadIdx.y % (32 / blockDim.x)) * blockDim.x);)) } //---------------------------------------------------------------------------- @@ -690,26 +655,24 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * constructed value. */ template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::CudaTeamMember> const& loop_boundaries, - Closure const& closure, ReducerType const& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> +parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::CudaTeamMember> const& loop_boundaries, + Closure const& closure, ReducerType const& reducer) { (void)loop_boundaries; (void)closure; (void)reducer; -#ifdef __CUDA_ARCH__ + KOKKOS_IF_ON_DEVICE(( - reducer.init(reducer.reference()); + reducer.init(reducer.reference()); - for (iType i = loop_boundaries.start + threadIdx.x; i < loop_boundaries.end; - i += blockDim.x) { - closure(i, reducer.reference()); - } + for (iType i = loop_boundaries.start + threadIdx.x; + i < loop_boundaries.end; + i += blockDim.x) { closure(i, reducer.reference()); } - Impl::CudaTeamMember::vector_reduce(reducer); + Impl::CudaTeamMember::vector_reduce(reducer); -#endif + )) } /** \brief Intra-thread vector parallel_reduce. @@ -724,25 +687,22 @@ KOKKOS_INLINE_FUNCTION * constructed value. */ template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!is_reducer<ValueType>::value>::type - parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::CudaTeamMember> const& loop_boundaries, - Closure const& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!is_reducer<ValueType>::value> +parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::CudaTeamMember> const& loop_boundaries, + Closure const& closure, ValueType& result) { (void)loop_boundaries; (void)closure; (void)result; -#ifdef __CUDA_ARCH__ - result = ValueType(); + KOKKOS_IF_ON_DEVICE( + (result = ValueType(); - for (iType i = loop_boundaries.start + threadIdx.x; i < loop_boundaries.end; - i += blockDim.x) { - closure(i, result); - } + for (iType i = loop_boundaries.start + threadIdx.x; + i < loop_boundaries.end; i += blockDim.x) { closure(i, result); } - Impl::CudaTeamMember::vector_reduce(Kokkos::Sum<ValueType>(result)); + Impl::CudaTeamMember::vector_reduce(Kokkos::Sum<ValueType>(result)); -#endif + )) } //---------------------------------------------------------------------------- @@ -804,79 +764,84 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( * The last call to closure has final == true. */ template <typename iType, class Closure, typename ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::CudaTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::CudaTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { (void)loop_boundaries; (void)closure; (void)reducer; -#ifdef __CUDA_ARCH__ - - using value_type = typename ReducerType::value_type; - value_type accum; - reducer.init(accum); - const value_type identity = accum; - - // Loop through boundaries by vector-length chunks - // must scan at each iteration - - // All thread "lanes" must loop the same number of times. - // Determine an loop end for all thread "lanes." - // Requires: - // blockDim.x is power of two and thus - // ( end % blockDim.x ) == ( end & ( blockDim.x - 1 ) ) - // 1 <= blockDim.x <= CudaTraits::WarpSize - - const int mask = blockDim.x - 1; - const unsigned active_mask = - blockDim.x == 32 ? 0xffffffff - : ((1 << blockDim.x) - 1) - << (threadIdx.y % (32 / blockDim.x)) * blockDim.x; - const int rem = loop_boundaries.end & mask; // == end % blockDim.x - const int end = loop_boundaries.end + (rem ? blockDim.x - rem : 0); - - for (int i = threadIdx.x; i < end; i += blockDim.x) { - value_type val = identity; - - // First acquire per-lane contributions. - // This sets i's val to i-1's contribution - // to make the latter in_place_shfl_up an - // exclusive scan -- the final accumulation - // of i's val will be included in the second - // closure call later. - if (i < loop_boundaries.end && threadIdx.x > 0) closure(i - 1, val, false); - - // Bottom up exclusive scan in triangular pattern - // where each CUDA thread is the root of a reduction tree - // from the zeroth "lane" to itself. - // [t] += [t-1] if t >= 1 - // [t] += [t-2] if t >= 2 - // [t] += [t-4] if t >= 4 - // ... - // This differs from the non-reducer overload, where an inclusive scan was - // implemented, because in general the binary operator cannot be inverted - // and we would not be able to remove the inclusive contribution by - // inversion. - for (int j = 1; j < (int)blockDim.x; j <<= 1) { - value_type tmp = identity; - Impl::in_place_shfl_up(tmp, val, j, blockDim.x, active_mask); - if (j <= (int)threadIdx.x) { - reducer.join(val, tmp); + KOKKOS_IF_ON_DEVICE(( + + using value_type = typename ReducerType::value_type; + + value_type accum; + + reducer.init(accum); + + const value_type identity = accum; + + // Loop through boundaries by vector-length chunks + // must scan at each iteration + + // All thread "lanes" must loop the same number of times. + // Determine an loop end for all thread "lanes." + // Requires: + // blockDim.x is power of two and thus + // ( end % blockDim.x ) == ( end & ( blockDim.x - 1 ) ) + // 1 <= blockDim.x <= CudaTraits::WarpSize + + const int mask = blockDim.x - 1; + const unsigned active_mask = + blockDim.x == 32 + ? 0xffffffff + : ((1 << blockDim.x) - 1) + << (threadIdx.y % (32 / blockDim.x)) * blockDim.x; + const int rem = loop_boundaries.end & mask; // == end % blockDim.x + const int end = loop_boundaries.end + (rem ? blockDim.x - rem : 0); + + for (int i = threadIdx.x; i < end; i += blockDim.x) { + value_type val = identity; + + // First acquire per-lane contributions. + // This sets i's val to i-1's contribution + // to make the latter in_place_shfl_up an + // exclusive scan -- the final accumulation + // of i's val will be included in the second + // closure call later. + if (i < loop_boundaries.end && threadIdx.x > 0) { + closure(i - 1, val, false); + } + + // Bottom up exclusive scan in triangular pattern + // where each CUDA thread is the root of a reduction tree + // from the zeroth "lane" to itself. + // [t] += [t-1] if t >= 1 + // [t] += [t-2] if t >= 2 + // [t] += [t-4] if t >= 4 + // ... + // This differs from the non-reducer overload, where an inclusive scan + // was implemented, because in general the binary operator cannot be + // inverted and we would not be able to remove the inclusive + // contribution by inversion. + for (int j = 1; j < (int)blockDim.x; j <<= 1) { + value_type tmp = identity; + Impl::in_place_shfl_up(tmp, val, j, blockDim.x, active_mask); + if (j <= (int)threadIdx.x) { + reducer.join(val, tmp); + } + } + + // Include accumulation + reducer.join(val, accum); + + // Update i's contribution into the val + // and add it to accum for next round + if (i < loop_boundaries.end) closure(i, val, true); + Impl::in_place_shfl(accum, val, mask, blockDim.x, active_mask); } - } - - // Include accumulation - reducer.join(val, accum); - // Update i's contribution into the val - // and add it to accum for next round - if (i < loop_boundaries.end) closure(i, val, true); - Impl::in_place_shfl(accum, val, mask, blockDim.x, active_mask); - } - -#endif + )) } //---------------------------------------------------------------------------- @@ -909,13 +874,13 @@ KOKKOS_INLINE_FUNCTION void single( const Impl::VectorSingleStruct<Impl::CudaTeamMember>&, const FunctorType& lambda) { (void)lambda; -#ifdef __CUDA_ARCH__ - if (threadIdx.x == 0) lambda(); - __syncwarp(blockDim.x == 32 - ? 0xffffffff - : ((1 << blockDim.x) - 1) - << (threadIdx.y % (32 / blockDim.x)) * blockDim.x); -#endif + KOKKOS_IF_ON_DEVICE(( + if (threadIdx.x == 0) { lambda(); } + + __syncwarp(blockDim.x == 32 + ? 0xffffffff + : ((1 << blockDim.x) - 1) + << (threadIdx.y % (32 / blockDim.x)) * blockDim.x);)) } template <class FunctorType> @@ -923,13 +888,13 @@ KOKKOS_INLINE_FUNCTION void single( const Impl::ThreadSingleStruct<Impl::CudaTeamMember>&, const FunctorType& lambda) { (void)lambda; -#ifdef __CUDA_ARCH__ - if (threadIdx.x == 0 && threadIdx.y == 0) lambda(); - __syncwarp(blockDim.x == 32 - ? 0xffffffff - : ((1 << blockDim.x) - 1) - << (threadIdx.y % (32 / blockDim.x)) * blockDim.x); -#endif + KOKKOS_IF_ON_DEVICE(( + if (threadIdx.x == 0 && threadIdx.y == 0) { lambda(); } + + __syncwarp(blockDim.x == 32 + ? 0xffffffff + : ((1 << blockDim.x) - 1) + << (threadIdx.y % (32 / blockDim.x)) * blockDim.x);)) } template <class FunctorType, class ValueType> @@ -938,14 +903,16 @@ KOKKOS_INLINE_FUNCTION void single( const FunctorType& lambda, ValueType& val) { (void)lambda; (void)val; -#ifdef __CUDA_ARCH__ - if (threadIdx.x == 0) lambda(val); - unsigned mask = blockDim.x == 32 - ? 0xffffffff - : ((1 << blockDim.x) - 1) - << ((threadIdx.y % (32 / blockDim.x)) * blockDim.x); - Impl::in_place_shfl(val, val, 0, blockDim.x, mask); -#endif + KOKKOS_IF_ON_DEVICE( + (if (threadIdx.x == 0) { lambda(val); } + + unsigned mask = + blockDim.x == 32 + ? 0xffffffff + : ((1 << blockDim.x) - 1) + << ((threadIdx.y % (32 / blockDim.x)) * blockDim.x); + + Impl::in_place_shfl(val, val, 0, blockDim.x, mask);)) } template <class FunctorType, class ValueType> @@ -955,12 +922,10 @@ KOKKOS_INLINE_FUNCTION void single( (void)single_struct; (void)lambda; (void)val; -#ifdef __CUDA_ARCH__ - if (threadIdx.x == 0 && threadIdx.y == 0) { - lambda(val); - } - single_struct.team_member.team_broadcast(val, 0); -#endif + KOKKOS_IF_ON_DEVICE( + (if (threadIdx.x == 0 && threadIdx.y == 0) { lambda(val); } + + single_struct.team_member.team_broadcast(val, 0);)) } } // namespace Kokkos diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp index 31d3c47e1c9c9af3b6c6d8c918abe01dd0b238fe..d3d881424c1359f77402519196f083eb7a8a75f4 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp @@ -79,9 +79,9 @@ struct in_place_shfl_op { // sizeof(Scalar) <= sizeof(int) case template <class Scalar> // requires _assignable_from_bits<Scalar> - __device__ inline typename std::enable_if<sizeof(Scalar) <= sizeof(int)>::type - operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width, - unsigned mask = shfl_all_mask) const noexcept { + __device__ inline std::enable_if_t<sizeof(Scalar) <= sizeof(int)> operator()( + Scalar& out, Scalar const& in, int lane_or_delta, int width, + unsigned mask = shfl_all_mask) const noexcept { using shfl_type = int; union conv_type { Scalar orig; @@ -106,10 +106,9 @@ struct in_place_shfl_op { // sizeof(Scalar) == sizeof(double) case // requires _assignable_from_bits<Scalar> template <class Scalar> - __device__ inline - typename std::enable_if<sizeof(Scalar) == sizeof(double)>::type - operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width, - unsigned mask = shfl_all_mask) const noexcept { + __device__ inline std::enable_if_t<sizeof(Scalar) == sizeof(double)> + operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width, + unsigned mask = shfl_all_mask) const noexcept { //------------------------------------------------ reinterpret_cast<double&>(out) = self().do_shfl_op( mask, *reinterpret_cast<double const*>(&in), lane_or_delta, width); @@ -119,10 +118,9 @@ struct in_place_shfl_op { // sizeof(Scalar) == sizeof(double) case // requires _assignable_from_bits<Scalar> template <typename Scalar> - __device__ inline - typename std::enable_if<sizeof(Scalar) == sizeof(double)>::type - operator()(Scalar& out, const Scalar& val, int lane_or_delta, int width, - unsigned mask = shfl_all_mask) const noexcept { + __device__ inline std::enable_if_t<sizeof(Scalar) == sizeof(double)> + operator()(Scalar& out, const Scalar& val, int lane_or_delta, int width, + unsigned mask = shfl_all_mask) const noexcept { //------------------------------------------------ int lo = __double2loint(*reinterpret_cast<const double*>(&val)); int hi = __double2hiint(*reinterpret_cast<const double*>(&val)); @@ -136,10 +134,9 @@ struct in_place_shfl_op { // sizeof(Scalar) > sizeof(double) case template <typename Scalar> - __device__ inline - typename std::enable_if<(sizeof(Scalar) > sizeof(double))>::type - operator()(Scalar& out, const Scalar& val, int lane_or_delta, int width, - unsigned mask = shfl_all_mask) const noexcept { + __device__ inline std::enable_if_t<(sizeof(Scalar) > sizeof(double))> + operator()(Scalar& out, const Scalar& val, int lane_or_delta, int width, + unsigned mask = shfl_all_mask) const noexcept { // TODO DSH shouldn't this be KOKKOS_IMPL_CUDA_MAX_SHFL_SIZEOF instead of // sizeof(int)? (Need benchmarks to decide which is faster) using shuffle_as_t = int; diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index dec6ef15e13c01b2778b78fc91a5ed53f200ae38..a1758208252c9c1e22aa8d1c1f636b5b2e12278c 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -69,12 +69,10 @@ struct CudaTextureFetch { // Deference operator pulls through texture object and returns by value template <typename iType> KOKKOS_INLINE_FUNCTION ValueType operator[](const iType& i) const { -#if defined(__CUDA_ARCH__) && (300 <= __CUDA_ARCH__) - AliasType v = tex1Dfetch<AliasType>(m_obj, i + m_offset); - return *(reinterpret_cast<ValueType*>(&v)); -#else - return m_ptr[i]; -#endif + KOKKOS_IF_ON_DEVICE( + (AliasType v = tex1Dfetch<AliasType>(m_obj, i + m_offset); + return *(reinterpret_cast<ValueType*>(&v));)) + KOKKOS_IF_ON_HOST((return m_ptr[i];)) } // Pointer to referenced memory @@ -139,11 +137,13 @@ struct CudaLDGFetch { template <typename iType> KOKKOS_INLINE_FUNCTION ValueType operator[](const iType& i) const { -#if defined(__CUDA_ARCH__) && (350 <= __CUDA_ARCH__) - AliasType v = __ldg(reinterpret_cast<const AliasType*>(&m_ptr[i])); - return *(reinterpret_cast<ValueType*>(&v)); -#else +#if defined(KOKKOS_ARCH_KEPLER30) || defined(KOKKOS_ARCH_KEPLER32) return m_ptr[i]; +#else + KOKKOS_IF_ON_DEVICE( + (AliasType v = __ldg(reinterpret_cast<const AliasType*>(&m_ptr[i])); + return *(reinterpret_cast<ValueType*>(&v));)) + KOKKOS_IF_ON_HOST((return m_ptr[i];)) #endif } @@ -201,7 +201,7 @@ namespace Impl { */ template <class Traits> class ViewDataHandle< - Traits, typename std::enable_if<( + Traits, std::enable_if_t<( // Is Cuda memory space (std::is_same<typename Traits::memory_space, Kokkos::CudaSpace>::value || @@ -215,19 +215,18 @@ class ViewDataHandle< sizeof(typename Traits::const_value_type) == 8 || sizeof(typename Traits::const_value_type) == 16) && // Random access trait - (Traits::memory_traits::is_random_access != 0))>::type> { + (Traits::memory_traits::is_random_access != 0))>> { public: using track_type = Kokkos::Impl::SharedAllocationTracker; using value_type = typename Traits::const_value_type; using return_type = typename Traits::const_value_type; // NOT a reference - using alias_type = typename std::conditional< + using alias_type = std::conditional_t< (sizeof(value_type) == 4), int, - typename std::conditional< + std::conditional_t< (sizeof(value_type) == 8), ::int2, - typename std::conditional<(sizeof(value_type) == 16), ::int4, - void>::type>::type>::type; + std::conditional_t<(sizeof(value_type) == 16), ::int4, void>>>; #if defined(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC) using handle_type = Kokkos::Impl::CudaLDGFetch<value_type, alias_type>; diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp index fc52e415145218afa2c495e9f055e051e9921305..fb3a6b138fea5bbf48654320b8c787b3f999cdad 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp @@ -63,16 +63,14 @@ class ParallelFor<FunctorType, Kokkos::WorkGraphPolicy<Traits...>, FunctorType m_functor; template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_one(const std::int32_t w) const noexcept { + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_one( + const std::int32_t w) const noexcept { m_functor(w); } template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_one(const std::int32_t w) const noexcept { + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_one( + const std::int32_t w) const noexcept { const TagType t{}; m_functor(t, w); } diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp index 59aac2b5269de2eae3a3861d8289de479c720e04..dcc5863721e9dc364969316f5aef59efd92de48e 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp @@ -50,6 +50,18 @@ #include <hip/hip_runtime.h> +// FIXME_HIP ROCm 4.5 version header include would be <rocm/rocm_version.h> +#if __has_include(<rocm_version.h>) +#include <rocm_version.h> +#define KOKKOS_IMPL_ROCM_VERSION \ + ROCM_VERSION_MAJOR * 10000 + ROCM_VERSION_MINOR * 100 + ROCM_VERSION_PATCH +#endif + +// FIXME_HIP workaround for ROCm version less than 5.0.2 +#if KOKKOS_IMPL_ROCM_VERSION < 50002 +#define KOKKOS_IMPL_HIP_ABORT_DOES_NOT_PRINT_MESSAGE +#endif + namespace Kokkos { namespace Impl { @@ -57,14 +69,8 @@ namespace Impl { // directive to the optimizer. [[noreturn]] __device__ __attribute__((noinline)) inline void hip_abort( char const *msg) { -#ifdef NDEBUG - (void)msg; -#else - // disable printf on release builds, as it has a non-trivial performance - // impact - printf("Aborting with message `%s'.\n", msg); -#endif - abort(); + const char empty[] = ""; + __assert_fail(msg, empty, 0, empty); // This loop is never executed. It's intended to suppress warnings that the // function returns, even though it does not. This is necessary because // abort() is not marked as [[noreturn]], even though it does not return. diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Atomic.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Atomic.hpp index 263ba97d735705c9c02c67938e0a2aa3bf215654..88bcab62645e651f5dba4d76c832c2d9cb7013d2 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Atomic.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Atomic.hpp @@ -80,9 +80,9 @@ __inline__ __device__ float atomic_exchange(volatile float *const dest, } template <typename T> -__inline__ __device__ T atomic_exchange( - volatile T *const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T &>::type val) { +__inline__ __device__ T +atomic_exchange(volatile T *const dest, + std::enable_if_t<sizeof(T) == sizeof(int), const T &> val) { int tmp = atomicExch(reinterpret_cast<int *>(const_cast<T *>(dest)), *reinterpret_cast<int *>(const_cast<T *>(&val))); return reinterpret_cast<T &>(tmp); @@ -91,9 +91,10 @@ __inline__ __device__ T atomic_exchange( template <typename T> __inline__ __device__ T atomic_exchange( volatile T *const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T &>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T &> + val) { using type = unsigned long long int; type tmp = atomicExch(reinterpret_cast<type *>(const_cast<T *>(dest)), @@ -102,11 +103,10 @@ __inline__ __device__ T atomic_exchange( } template <typename T> -__inline__ __device__ T -atomic_exchange(volatile T *const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(long long), - const T>::type &val) { +__inline__ __device__ T atomic_exchange( + volatile T *const dest, + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long long), + const T> &val) { T return_val; int done = 0; unsigned int active = __ballot(1); @@ -130,7 +130,7 @@ atomic_exchange(volatile T *const dest, template <typename T> __inline__ __device__ void atomic_assign( volatile T *const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T &>::type val) { + std::enable_if_t<sizeof(T) == sizeof(int), const T &> val) { atomicExch(reinterpret_cast<int *>(const_cast<T *>(dest)), *reinterpret_cast<int *>(const_cast<T *>(&val))); } @@ -138,9 +138,10 @@ __inline__ __device__ void atomic_assign( template <typename T> __inline__ __device__ void atomic_assign( volatile T *const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T &>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T &> + val) { using type = unsigned long long int; atomicExch(reinterpret_cast<type *>(const_cast<T *>(dest)), *reinterpret_cast<type *>(const_cast<T *>(&val))); @@ -149,9 +150,10 @@ __inline__ __device__ void atomic_assign( template <typename T> __inline__ __device__ void atomic_assign( volatile T *const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(unsigned long long int), - const T &>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) != sizeof(unsigned long long int), + const T &> + val) { atomic_exchange(dest, val); } @@ -177,7 +179,7 @@ inline __device__ unsigned long long int atomic_compare_exchange( template <class T> __inline__ __device__ T atomic_compare_exchange( volatile T *dest, T compare, - typename std::enable_if<sizeof(T) == sizeof(int), const T &>::type val) { + std::enable_if_t<sizeof(T) == sizeof(int), const T &> val) { // FIXME_HIP UB union U { int i; @@ -194,8 +196,8 @@ __inline__ __device__ T atomic_compare_exchange( template <class T> __inline__ __device__ T atomic_compare_exchange( volatile T *dest, T compare, - typename std::enable_if<sizeof(T) == sizeof(unsigned long long int), - const T &>::type val) { + std::enable_if_t<sizeof(T) == sizeof(unsigned long long int), const T &> + val) { // FIXME_HIP UB union U { unsigned long long int i; @@ -213,9 +215,8 @@ __inline__ __device__ T atomic_compare_exchange( template <typename T> __inline__ __device__ T atomic_compare_exchange( volatile T *const dest, const T &compare, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(long long), - const T>::type &val) { + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long long), + const T> &val) { T return_val; int done = 0; unsigned int active = __ballot(1); @@ -256,9 +257,9 @@ inline __device__ float atomic_fetch_add(volatile float *dest, } template <typename T> -inline __device__ T atomic_fetch_add( - volatile T *const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T>::type val) { +inline __device__ T +atomic_fetch_add(volatile T *const dest, + std::enable_if_t<sizeof(T) == sizeof(int), const T> val) { // FIXME_HIP UB union U { int i; @@ -281,8 +282,7 @@ inline __device__ T atomic_fetch_add( template <typename T> inline __device__ T atomic_fetch_add( volatile T *const dest, - typename std::enable_if<sizeof(T) == sizeof(long long), const T>::type - val) { + std::enable_if_t<sizeof(T) == sizeof(long long), const T> val) { // FIXME_HIP UB union U { unsigned long long i; @@ -343,11 +343,11 @@ __inline__ __device__ long long atomic_fetch_add(volatile long long *dest, } template <class T> -__inline__ __device__ T -atomic_fetch_add(volatile T *dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(long long), - const T &>::type val) { +__inline__ __device__ T atomic_fetch_add( + volatile T *dest, + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long long), + const T &> + val) { T return_val; int done = 0; unsigned int active = __ballot(1); @@ -424,8 +424,7 @@ __inline__ __device__ long long atomic_fetch_sub(volatile long long *dest, template <class T> __inline__ __device__ T atomic_fetch_sub( - volatile T *dest, - typename std::enable_if<sizeof(T) == sizeof(int), T>::type val) { + volatile T *dest, std::enable_if_t<sizeof(T) == sizeof(int), T> val) { // FIXME_HIP UB union U { int i; @@ -448,8 +447,7 @@ __inline__ __device__ T atomic_fetch_sub( template <typename T> inline __device__ T atomic_fetch_sub( volatile T *const dest, - typename std::enable_if<sizeof(T) == sizeof(long long), const T>::type - val) { + std::enable_if_t<sizeof(T) == sizeof(long long), const T> val) { // FIXME_HIP UB union U { unsigned long long i; @@ -472,8 +470,7 @@ inline __device__ T atomic_fetch_sub( template <class T> __inline__ __device__ T atomic_fetch_sub( - volatile T *dest, - typename std::enable_if<sizeof(T) == sizeof(char), T>::type val) { + volatile T *dest, std::enable_if_t<sizeof(T) == sizeof(char), T> val) { unsigned int oldval, newval, assume; oldval = *reinterpret_cast<volatile unsigned int *>(dest); @@ -488,8 +485,7 @@ __inline__ __device__ T atomic_fetch_sub( template <class T> __inline__ __device__ T atomic_fetch_sub( - volatile T *dest, - typename std::enable_if<sizeof(T) == sizeof(short), T>::type val) { + volatile T *dest, std::enable_if_t<sizeof(T) == sizeof(short), T> val) { unsigned int oldval, newval, assume; oldval = *reinterpret_cast<int *>(dest); @@ -503,11 +499,10 @@ __inline__ __device__ T atomic_fetch_sub( } template <typename T> -__inline__ __device__ T -atomic_fetch_sub(volatile T *const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(long long), - const T>::type &val) { +__inline__ __device__ T atomic_fetch_sub( + volatile T *const dest, + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long long), + const T> &val) { T return_val; int done = 0; unsigned int active = __ballot(1); diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp index 10d9bc0150a2abe477f502c3ef05d4dd7dd1aa9f..87551ae50896d0f497ab561babc5890b4d04b3c6 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp @@ -249,12 +249,11 @@ unsigned hip_get_preferred_team_blocksize(HIPInternal const *hip_instance, get_hip_func_attributes_impl<DriverType, LaunchBounds, BlockType::Preferred>(); // get preferred blocksize limited by register usage - using namespace std::placeholders; const unsigned tperb_reg = hip_get_preferred_blocksize<DriverType, LaunchBounds>(); return hip_internal_get_block_size<BlockType::Preferred, DriverType, LaunchBounds>( - hip_instance, std::bind(f, attr, _1), tperb_reg); + hip_instance, std::bind(f, attr, std::placeholders::_1), tperb_reg); } // Standardized blocksize deduction for non-teams parallel constructs with LDS @@ -291,10 +290,9 @@ unsigned hip_get_max_team_blocksize(HIPInternal const *hip_instance, hipFuncAttributes attr = get_hip_func_attributes_impl<DriverType, LaunchBounds, BlockType::Max>(); // get max blocksize - using namespace std::placeholders; const unsigned tperb_reg = hip_get_max_blocksize<DriverType, LaunchBounds>(); return hip_internal_get_block_size<BlockType::Max, DriverType, LaunchBounds>( - hip_instance, std::bind(f, attr, _1), tperb_reg); + hip_instance, std::bind(f, attr, std::placeholders::_1), tperb_reg); } } // namespace Impl diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp index a8a0496afebc8776ef99ba4195d6b7b43a58a497..3785cfe80b4974a4a0f74d2e0376b116f7641e6a 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp @@ -45,6 +45,10 @@ /*--------------------------------------------------------------------------*/ /* Kokkos interfaces */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> #include <HIP/Kokkos_HIP_Instance.hpp> @@ -175,8 +179,9 @@ HIPInternal::~HIPInternal() { int HIPInternal::verify_is_initialized(const char *const label) const { if (m_hipDev < 0) { - std::cerr << "Kokkos::Experimental::HIP::" << label - << " : ERROR device not initialized" << std::endl; + Kokkos::abort((std::string("Kokkos::Experimental::HIP::") + label + + " : ERROR device not initialized\n") + .c_str()); } return 0 <= m_hipDev; } @@ -421,10 +426,13 @@ void HIPInternal::finalize() { this->fence("Kokkos::HIPInternal::finalize: fence on finalization"); was_finalized = true; - if (nullptr != m_scratchSpace || nullptr != m_scratchFlags) { - if (this == &singleton()) - (void)Kokkos::Impl::hip_global_unique_token_locks(true); + if (this == &singleton()) { + (void)Kokkos::Impl::hip_global_unique_token_locks(true); + KOKKOS_IMPL_HIP_SAFE_CALL(hipHostFree(constantMemHostStaging)); + KOKKOS_IMPL_HIP_SAFE_CALL(hipEventDestroy(constantMemReusable)); + } + if (nullptr != m_scratchSpace || nullptr != m_scratchFlags) { using RecordHIP = Kokkos::Impl::SharedAllocationRecord<Kokkos::Experimental::HIPSpace>; @@ -436,35 +444,30 @@ void HIPInternal::finalize() { if (m_manage_stream && m_stream != nullptr) KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamDestroy(m_stream)); + } - m_hipDev = -1; - m_hipArch = -1; - m_multiProcCount = 0; - m_maxWarpCount = 0; - m_maxBlock = {0, 0, 0}; - m_maxSharedWords = 0; - m_maxShmemPerBlock = 0; - m_scratchSpaceCount = 0; - m_scratchFlagsCount = 0; - m_scratchSpace = nullptr; - m_scratchFlags = nullptr; - m_stream = nullptr; - m_team_scratch_current_size = 0; - m_team_scratch_ptr = nullptr; + m_hipDev = -1; + m_hipArch = -1; + m_multiProcCount = 0; + m_maxWarpCount = 0; + m_maxBlock = {0, 0, 0}; + m_maxSharedWords = 0; + m_maxShmemPerBlock = 0; + m_scratchSpaceCount = 0; + m_scratchFlagsCount = 0; + m_scratchSpace = nullptr; + m_scratchFlags = nullptr; + m_stream = nullptr; + m_team_scratch_current_size = 0; + m_team_scratch_ptr = nullptr; + + KOKKOS_IMPL_HIP_SAFE_CALL(hipFree(m_scratch_locks)); + m_scratch_locks = nullptr; - KOKKOS_IMPL_HIP_SAFE_CALL(hipFree(m_scratch_locks)); - m_scratch_locks = nullptr; - } if (nullptr != d_driverWorkArray) { KOKKOS_IMPL_HIP_SAFE_CALL(hipHostFree(d_driverWorkArray)); d_driverWorkArray = nullptr; } - - // only destroy these if we're finalizing the singleton - if (this == &singleton()) { - KOKKOS_IMPL_HIP_SAFE_CALL(hipHostFree(constantMemHostStaging)); - KOKKOS_IMPL_HIP_SAFE_CALL(hipEventDestroy(constantMemReusable)); - } } char *HIPInternal::get_next_driver(size_t driverTypeSize) const { diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp index e9cfbf99f7cf11a3773a18b4637363e42a377888..f1ffaf3753a3edef4431f1838d7cbd302f410277 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #include <HIP/Kokkos_HIP_Locks.hpp> diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp index 24b05f293c8ea5d0422b7261befb0a4de976a83a..212bbb9ecd8a35f30f0d121d1b368af087d13c8a 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp @@ -226,15 +226,14 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, WorkTag, void>::type; - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; + using Analysis = + Kokkos::Impl::FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, + ReducerTypeFwd>; public: - using pointer_type = typename ValueTraits::pointer_type; - using value_type = typename ValueTraits::value_type; - using reference_type = typename ValueTraits::reference_type; + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; using functor_type = FunctorType; using size_type = Experimental::HIP::size_type; @@ -261,17 +260,19 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, } inline __device__ void operator()() const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / sizeof(size_type)> - word_count(ValueTraits::value_size( + word_count(Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)) / sizeof(size_type)); { - reference_type value = ValueInit::init( - ReducerConditional::select(m_functor, m_reducer), + reference_type value = final_reducer.init(reinterpret_cast<pointer_type>( Experimental::kokkos_impl_hip_shared_memory<size_type>() + - threadIdx.y * word_count.value); + threadIdx.y * word_count.value)); // Number of blocks is bounded so that the reduction can be limited to two // passes. Each thread block is given an approximately equal amount of @@ -284,10 +285,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, // Reduce with final value at blockDim.y - 1 location. // Problem: non power-of-two blockDim - if (::Kokkos::Impl::hip_single_inter_block_reduce_scan< - false, ReducerTypeFwd, WorkTagFwd>( - ReducerConditional::select(m_functor, m_reducer), blockIdx.x, - gridDim.x, Experimental::kokkos_impl_hip_shared_memory<size_type>(), + if (::Kokkos::Impl::hip_single_inter_block_reduce_scan<false>( + final_reducer, blockIdx.x, gridDim.x, + Experimental::kokkos_impl_hip_shared_memory<size_type>(), m_scratch_space, m_scratch_flags)) { // This is the final block with the final result at the final threads' // location @@ -299,8 +299,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, : m_scratch_space; if (threadIdx.y == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), shared); + final_reducer.final(reinterpret_cast<value_type*>(shared)); } if (Experimental::Impl::HIPTraits::WarpSize < word_count.value) { @@ -337,6 +336,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, } inline void execute() { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + using ClosureType = ParallelReduce<FunctorType, Policy, ReducerType, Kokkos::Experimental::HIP>; const auto nwork = m_policy.m_num_tiles; @@ -356,7 +358,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, m_scratch_space = ::Kokkos::Experimental::Impl::hip_internal_scratch_space( m_policy.space(), - ValueTraits::value_size( + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)) * block_size /* block_size == max block_count */); m_scratch_flags = @@ -380,31 +382,24 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, m_policy.space().impl_internal_space_instance(), false); // copy to device and execute - if (!m_result_ptr_device_accessible) { - m_policy.space().fence( - "Kokkos::Impl::ParallelReduce<MDRangePolicy,HIP>: fence because " - "reduction can't access result storage location"); - - if (m_result_ptr) { - const int size = ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)); - DeepCopy<HostSpace, Experimental::HIPSpace>(m_result_ptr, - m_scratch_space, size); - } + if (!m_result_ptr_device_accessible && m_result_ptr) { + const int size = Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)); + DeepCopy<HostSpace, Experimental::HIPSpace, Experimental::HIP>( + m_policy.space(), m_result_ptr, m_scratch_space, size); } } else { if (m_result_ptr) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); + final_reducer.init(m_result_ptr); } } } template <class ViewType> - ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value, - void*>::type = nullptr) + ParallelReduce( + const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value, void*> = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp index 14a282cc31dbb5d0eaa6e3a4578aec9c0cbaa0df..5c871e0d615fc58bb01b93566bc0ab7a0ad892b2 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp @@ -76,16 +76,14 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, ParallelFor& operator=(const ParallelFor&) = delete; template <class TagType> - inline __device__ - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const Member i) const { + inline __device__ std::enable_if_t<std::is_void<TagType>::value> exec_range( + const Member i) const { m_functor(i); } template <class TagType> - inline __device__ - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const Member i) const { + inline __device__ std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const Member i) const { m_functor(TagType(), i); } @@ -154,15 +152,14 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, WorkTag, void>::type; - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; + using Analysis = + Kokkos::Impl::FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, + ReducerTypeFwd>; public: - using pointer_type = typename ValueTraits::pointer_type; - using value_type = typename ValueTraits::value_type; - using reference_type = typename ValueTraits::reference_type; + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; using functor_type = FunctorType; using size_type = Kokkos::Experimental::HIP::size_type; using index_type = typename Policy::index_type; @@ -183,7 +180,7 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, std::lock_guard<std::mutex> m_shared_memory_lock; static bool constexpr UseShflReduction = - static_cast<bool>(ValueTraits::StaticValueSize); + static_cast<bool>(Analysis::StaticValueSize); private: struct ShflReductionTag {}; @@ -191,39 +188,37 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, // Make the exec_range calls call to Reduce::DeviceIterateTile template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update) const { + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update) const { m_functor(i, update); } template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update) const { + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update) const { m_functor(TagType(), i, update); } public: __device__ inline void operator()() const { - using ReductionTag = - typename std::conditional<UseShflReduction, ShflReductionTag, - SHMEMReductionTag>::type; + using ReductionTag = std::conditional_t<UseShflReduction, ShflReductionTag, + SHMEMReductionTag>; run(ReductionTag{}); } __device__ inline void run(SHMEMReductionTag) const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / sizeof(size_type)> - word_count(ValueTraits::value_size( + word_count(Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)) / sizeof(size_type)); + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); { - reference_type value = ValueInit::init( - ReducerConditional::select(m_functor, m_reducer), + reference_type value = final_reducer.init(reinterpret_cast<pointer_type>( ::Kokkos::Experimental::kokkos_impl_hip_shared_memory<size_type>() + - threadIdx.y * word_count.value); + threadIdx.y * word_count.value)); // Number of blocks is bounded so that the reduction can be limited to two // passes. Each thread block is given an approximately equal amount of @@ -243,10 +238,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, // Shortcut for length zero reduction bool do_final_reduction = m_policy.begin() == m_policy.end(); if (!do_final_reduction) - do_final_reduction = hip_single_inter_block_reduce_scan< - false, ReducerTypeFwd, WorkTagFwd>( - ReducerConditional::select(m_functor, m_reducer), blockIdx.x, - gridDim.x, + do_final_reduction = hip_single_inter_block_reduce_scan<false>( + final_reducer, blockIdx.x, gridDim.x, ::Kokkos::Experimental::kokkos_impl_hip_shared_memory<size_type>(), m_scratch_space, m_scratch_flags); if (do_final_reduction) { @@ -261,8 +254,7 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, : m_scratch_space; if (threadIdx.y == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), shared); + final_reducer.final(reinterpret_cast<value_type*>(shared)); } if (::Kokkos::Experimental::Impl::HIPTraits::WarpSize < @@ -277,8 +269,11 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, } __device__ inline void run(ShflReductionTag) const { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + value_type value; - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), &value); + final_reducer.init(&value); // Number of blocks is bounded so that the reduction can be limited to two // passes. Each thread block is given an approximately equal amount of work // to perform. Accumulate the values for this block. The accumulation @@ -302,25 +297,18 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, (max_active_thread == 0) ? blockDim.y : max_active_thread; value_type init; - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), &init); + final_reducer.init(&init); if (m_policy.begin() == m_policy.end()) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<void*>(&value)); + final_reducer.final(&value); pointer_type const final_result = m_result_ptr_device_accessible ? m_result_ptr : result; *final_result = value; - } else if (Impl::hip_inter_block_shuffle_reduction<ReducerTypeFwd, - ValueJoin, WorkTagFwd>( - value, init, - ValueJoin(ReducerConditional::select(m_functor, m_reducer)), - m_scratch_space, result, m_scratch_flags, - max_active_thread)) { + } else if (Impl::hip_inter_block_shuffle_reduction<>( + value, init, final_reducer, m_scratch_space, result, + m_scratch_flags, max_active_thread)) { unsigned int const id = threadIdx.y * blockDim.x + threadIdx.x; if (id == 0) { - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<void*>(&value)); + final_reducer.final(&value); pointer_type const final_result = m_result_ptr_device_accessible ? m_result_ptr : result; *final_result = value; @@ -342,9 +330,12 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, } inline void execute() { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + const index_type nwork = m_policy.end() - m_policy.begin(); - const bool need_device_set = ReduceFunctorHasInit<FunctorType>::value || - ReduceFunctorHasFinal<FunctorType>::value || + const bool need_device_set = Analysis::has_init_member_function || + Analysis::has_final_member_function || !m_result_ptr_host_accessible || !std::is_same<ReducerType, InvalidType>::value; if ((nwork > 0) || need_device_set) { @@ -358,7 +349,7 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, m_scratch_space = ::Kokkos::Experimental::Impl::hip_internal_scratch_space( m_policy.space(), - ValueTraits::value_size( + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)) * block_size /* block_size == max block_count */); m_scratch_flags = @@ -390,31 +381,25 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, m_policy.space().impl_internal_space_instance(), false); // copy to device and execute - if (!m_result_ptr_device_accessible) { - m_policy.space().impl_internal_space_instance()->fence( - "Kokkos::Impl::ParallelReduce<RangePolicy,HIP>: fence because " - "reduction can't access result storage location"); - - if (m_result_ptr) { - const int size = ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)); - DeepCopy<HostSpace, ::Kokkos::Experimental::HIPSpace>( - m_result_ptr, m_scratch_space, size); - } + if (!m_result_ptr_device_accessible && m_result_ptr) { + const int size = Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)); + DeepCopy<HostSpace, ::Kokkos::Experimental::HIPSpace, + ::Kokkos::Experimental::HIP>(m_policy.space(), m_result_ptr, + m_scratch_space, size); } } else { if (m_result_ptr) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); + final_reducer.init(m_result_ptr); } } } template <class ViewType> - ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value, - void*>::type = nullptr) + ParallelReduce( + const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value, void*> = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -459,13 +444,12 @@ class ParallelScanHIPBase { using WorkRange = typename Policy::WorkRange; using LaunchBounds = typename Policy::launch_bounds; - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; + using Analysis = Kokkos::Impl::FunctorAnalysis<FunctorPatternInterface::SCAN, + Policy, FunctorType>; public: - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; using functor_type = FunctorType; using size_type = Kokkos::Experimental::HIP::size_type; using index_type = typename Policy::index_type; @@ -489,33 +473,31 @@ class ParallelScanHIPBase { private: template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update, - const bool final_result) const { + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update, const bool final_result) const { m_functor(i, update, final_result); } template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const Member& i, reference_type update, - const bool final_result) const { + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const Member& i, reference_type update, const bool final_result) const { m_functor(TagType(), i, update, final_result); } //---------------------------------------- __device__ inline void initial() const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / + typename Analysis::Reducer final_reducer(&m_functor); + + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / sizeof(size_type)> - word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); + word_count(Analysis::value_size(m_functor) / sizeof(size_type)); - size_type* const shared_value = + pointer_type const shared_value = reinterpret_cast<pointer_type>( Kokkos::Experimental::kokkos_impl_hip_shared_memory<size_type>() + - word_count.value * threadIdx.y; + word_count.value * threadIdx.y); - ValueInit::init(m_functor, shared_value); + final_reducer.init(shared_value); // Number of blocks is bounded so that the reduction can be limited to two // passes. Each thread block is given an approximately equal amount of work @@ -527,15 +509,15 @@ class ParallelScanHIPBase { for (Member iwork = range.begin() + threadIdx.y, iwork_end = range.end(); iwork < iwork_end; iwork += blockDim.y) { this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_value), false); + iwork, final_reducer.reference(shared_value), false); } // Reduce and scan, writing out scan of blocks' totals and block-groups' // totals. Blocks' scan values are written to 'blockIdx.x' location. // Block-groups' scan values are at: i = ( j * blockDim.y - 1 ) for i < // gridDim.x - hip_single_inter_block_reduce_scan<true, FunctorType, WorkTag>( - m_functor, blockIdx.x, gridDim.x, + hip_single_inter_block_reduce_scan<true>( + final_reducer, blockIdx.x, gridDim.x, Kokkos::Experimental::kokkos_impl_hip_shared_memory<size_type>(), m_scratch_space, m_scratch_flags); } @@ -543,9 +525,11 @@ class ParallelScanHIPBase { //---------------------------------------- __device__ inline void final() const { - const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / + typename Analysis::Reducer final_reducer(&m_functor); + + const integral_nonzero_constant<size_type, Analysis::StaticValueSize / sizeof(size_type)> - word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); + word_count(Analysis::value_size(m_functor) / sizeof(size_type)); // Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , // value[2] , ... } @@ -564,7 +548,7 @@ class ParallelScanHIPBase { shared_accum[i] = block_total[i]; } } else if (0 == threadIdx.y) { - ValueInit::init(m_functor, shared_accum); + final_reducer.init(reinterpret_cast<pointer_type>(shared_accum)); } const WorkRange range(m_policy, blockIdx.x, gridDim.x); @@ -576,7 +560,8 @@ class ParallelScanHIPBase { __syncthreads(); // Don't overwrite previous iteration values until they // are used - ValueInit::init(m_functor, shared_prefix + word_count.value); + final_reducer.init( + reinterpret_cast<pointer_type>(shared_prefix + word_count.value)); // Copy previous block's accumulation total into thread[0] prefix and // inclusive scan value of this block @@ -591,14 +576,16 @@ class ParallelScanHIPBase { const bool doWork = (iwork < range.end()); if (doWork) { this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_prefix + word_count.value), + iwork, + final_reducer.reference(reinterpret_cast<pointer_type>( + shared_prefix + word_count.value)), false); } // Scan block values into locations shared_data[1..blockDim.y] - hip_intra_block_reduce_scan<true, FunctorType, WorkTag>( - m_functor, - typename ValueTraits::pointer_type(shared_data + word_count.value)); + hip_intra_block_reduce_scan<true>( + final_reducer, + typename Analysis::pointer_type(shared_data + word_count.value)); { size_type* const block_total = @@ -611,7 +598,10 @@ class ParallelScanHIPBase { // Call functor with exclusive scan value if (doWork) { this->template exec_range<WorkTag>( - iwork, ValueOps::reference(shared_prefix), true); + iwork, + final_reducer.reference( + reinterpret_cast<pointer_type>(shared_prefix)), + true); } } } @@ -658,13 +648,13 @@ class ParallelScanHIPBase { m_grid_x = (nwork + work_per_block - 1) / work_per_block; m_scratch_space = Kokkos::Experimental::Impl::hip_internal_scratch_space( - m_policy.space(), ValueTraits::value_size(m_functor) * m_grid_x); + m_policy.space(), Analysis::value_size(m_functor) * m_grid_x); m_scratch_flags = Kokkos::Experimental::Impl::hip_internal_scratch_flags( m_policy.space(), sizeof(size_type) * 1); dim3 grid(m_grid_x, 1, 1); dim3 block(1, block_size, 1); // REQUIRED DIMENSIONS ( 1 , N , 1 ) - const int shmem = ValueTraits::value_size(m_functor) * (block_size + 2); + const int shmem = Analysis::value_size(m_functor) * (block_size + 2); m_final = false; // these ones are OK to be just the base because the specializations @@ -712,7 +702,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, const auto& instance = Base::m_policy.space().impl_internal_space_instance(); auto shmem_functor = [&f](unsigned n) { - return hip_single_inter_block_reduce_scan_shmem<false, FunctorType, + return hip_single_inter_block_reduce_scan_shmem<true, FunctorType, typename Base::WorkTag>( f, n); }; @@ -740,9 +730,10 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, const auto nwork = Base::m_policy.end() - Base::m_policy.begin(); if (nwork) { - const int size = Base::ValueTraits::value_size(Base::m_functor); - DeepCopy<HostSpace, Kokkos::Experimental::HIPSpace>( - &m_returnvalue, + const int size = Base::Analysis::value_size(Base::m_functor); + DeepCopy<HostSpace, Kokkos::Experimental::HIPSpace, + Kokkos::Experimental::HIP>( + Base::m_policy.space(), &m_returnvalue, Base::m_scratch_space + (Base::m_grid_x - 1) * size / sizeof(int), size); } @@ -760,7 +751,7 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, const auto& instance = Base::m_policy.space().impl_internal_space_instance(); auto shmem_functor = [&f](unsigned n) { - return hip_single_inter_block_reduce_scan_shmem<false, FunctorType, + return hip_single_inter_block_reduce_scan_shmem<true, FunctorType, typename Base::WorkTag>( f, n); }; diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp index 0ec0761f7dd40d4c805b781330df4bee5a0898ca..69ced48a9b33f438f382862171cad0e738270f28 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp @@ -75,8 +75,8 @@ class TeamPolicyInternal<Kokkos::Experimental::HIP, Properties...> int m_league_size; int m_team_size; int m_vector_length; - int m_team_scratch_size[2]; - int m_thread_scratch_size[2]; + size_t m_team_scratch_size[2]; + size_t m_thread_scratch_size[2]; int m_chunk_size; bool m_tune_team_size; bool m_tune_vector_length; @@ -206,15 +206,17 @@ class TeamPolicyInternal<Kokkos::Experimental::HIP, Properties...> int league_size() const { return m_league_size; } - int scratch_size(int level, int team_size_ = -1) const { + size_t scratch_size(int level, int team_size_ = -1) const { if (team_size_ < 0) team_size_ = m_team_size; return m_team_scratch_size[level] + team_size_ * m_thread_scratch_size[level]; } - int team_scratch_size(int level) const { return m_team_scratch_size[level]; } + size_t team_scratch_size(int level) const { + return m_team_scratch_size[level]; + } - int thread_scratch_size(int level) const { + size_t thread_scratch_size(int level) const { return m_thread_scratch_size[level]; } @@ -359,7 +361,7 @@ class TeamPolicyInternal<Kokkos::Experimental::HIP, Properties...> // internal_team_size_common_reduce // once we can turn c++17 constexpr on by default. // The problem right now is that we can't turn off the evaluation - // of the functor_value_traits's valuesize / StaticValueSize + // of the Analysis' valuesize / StaticValueSize const unsigned shmem_block = team_scratch_size(0) + 2 * sizeof(double); const unsigned shmem_thread = thread_scratch_size(0) + sizeof(double); @@ -395,14 +397,16 @@ class TeamPolicyInternal<Kokkos::Experimental::HIP, Properties...> template <BlockType BlockSize, class ClosureType, class FunctorType> int internal_team_size_common_reduce(const FunctorType& f) const { - using functor_value_traits = - Impl::FunctorValueTraits<FunctorType, typename traits::work_tag>; - - const unsigned shmem_block = team_scratch_size(0) + 2 * sizeof(double); - const unsigned shmem_thread = thread_scratch_size(0) + sizeof(double) + - ((functor_value_traits::StaticValueSize != 0) - ? 0 - : functor_value_traits::value_size(f)); + using Interface = + typename Impl::DeduceFunctorPatternInterface<ClosureType>::type; + using Analysis = + Impl::FunctorAnalysis<Interface, typename ClosureType::Policy, + FunctorType>; + + const unsigned shmem_block = team_scratch_size(0) + 2 * sizeof(double); + const unsigned shmem_thread = + thread_scratch_size(0) + sizeof(double) + + ((Analysis::StaticValueSize != 0) ? 0 : Analysis::value_size(f)); const int vector_length = impl_vector_length(); const auto functor = [&f, shmem_block, shmem_thread, vector_length]( @@ -455,10 +459,10 @@ __device__ inline int64_t hip_get_scratch_index( int64_t threadid = 0; __shared__ int64_t base_thread_id; if (threadIdx.x == 0 && threadIdx.y == 0) { - int64_t const wraparound_len = Kokkos::Experimental::min( - int64_t(league_size), - (int64_t(Kokkos::Impl::g_device_hip_lock_arrays.n)) / - (blockDim.x * blockDim.y)); + int64_t const wraparound_len = + Kokkos::min(int64_t(league_size), + (int64_t(Kokkos::Impl::g_device_hip_lock_arrays.n)) / + (blockDim.x * blockDim.y)); threadid = (blockIdx.x * blockDim.z + threadIdx.z) % wraparound_len; threadid *= blockDim.x * blockDim.y; int done = 0; @@ -513,23 +517,21 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, int m_shmem_begin; int m_shmem_size; void* m_scratch_ptr[2]; - int m_scratch_size[2]; + size_t m_scratch_size[2]; int32_t* m_scratch_locks; // Only let one ParallelFor/Reduce modify the team scratch memory. The // constructor acquires the mutex which is released in the destructor. std::lock_guard<std::mutex> m_scratch_lock_guard; template <typename TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_team(const member_type& member) const { + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_team( + const member_type& member) const { m_functor(member); } template <typename TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_team(const member_type& member) const { + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_team( + const member_type& member) const { m_functor(TagType(), member); } @@ -647,22 +649,18 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, work_tag, void>::type; - using value_traits = - Kokkos::Impl::FunctorValueTraits<reducer_type_fwd, work_tag_fwd>; - using value_init = - Kokkos::Impl::FunctorValueInit<reducer_type_fwd, work_tag_fwd>; - using value_join = - Kokkos::Impl::FunctorValueJoin<reducer_type_fwd, work_tag_fwd>; + using analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + Policy, reducer_type_fwd>; - using pointer_type = typename value_traits::pointer_type; - using reference_type = typename value_traits::reference_type; - using value_type = typename value_traits::value_type; + using pointer_type = typename analysis::pointer_type; + using reference_type = typename analysis::reference_type; + using value_type = typename analysis::value_type; public: using functor_type = FunctorType; using size_type = Kokkos::Experimental::HIP::size_type; - static int constexpr UseShflReduction = (value_traits::StaticValueSize != 0); + static int constexpr UseShflReduction = (analysis::StaticValueSize != 0); private: struct ShflReductionTag {}; @@ -688,7 +686,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, size_type m_shmem_begin; size_type m_shmem_size; void* m_scratch_ptr[2]; - int m_scratch_size[2]; + size_t m_scratch_size[2]; int32_t* m_scratch_locks; const size_type m_league_size; int m_team_size; @@ -698,16 +696,14 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, std::lock_guard<std::mutex> m_scratch_lock_guard; template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_team(member_type const& member, reference_type update) const { + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_team( + member_type const& member, reference_type update) const { m_functor(member, update); } template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_team(member_type const& member, reference_type update) const { + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_team( + member_type const& member, reference_type update) const { m_functor(TagType(), member, update); } @@ -747,16 +743,18 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } __device__ inline void run(SHMEMReductionTag, int const threadid) const { - integral_nonzero_constant<size_type, value_traits::StaticValueSize / + typename analysis::Reducer final_reducer( + &reducer_conditional::select(m_functor, m_reducer)); + + integral_nonzero_constant<size_type, analysis::StaticValueSize / sizeof(size_type)> const - word_count(value_traits::value_size( + word_count(analysis::value_size( reducer_conditional::select(m_functor, m_reducer)) / sizeof(size_type)); - reference_type value = value_init::init( - reducer_conditional::select(m_functor, m_reducer), + reference_type value = final_reducer.init( Kokkos::Experimental::kokkos_impl_hip_shared_memory<size_type>() + - threadIdx.y * word_count.value); + threadIdx.y * word_count.value); // Iterate this block through the league iterate_through_league(threadid, value); @@ -782,8 +780,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, : m_scratch_space; if (threadIdx.y == 0) { - Kokkos::Impl::FunctorFinal<reducer_type_fwd, work_tag_fwd>::final( - reducer_conditional::select(m_functor, m_reducer), shared); + final_reducer.final(reinterpret_cast<value_type*>(shared)); } if (Kokkos::Experimental::Impl::HIPTraits::WarpSize < word_count.value) { @@ -797,8 +794,11 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } __device__ inline void run(ShflReductionTag, int const threadid) const { + typename analysis::Reducer final_reducer( + &reducer_conditional::select(m_functor, m_reducer)); + value_type value; - value_init::init(reducer_conditional::select(m_functor, m_reducer), &value); + final_reducer.init(&value); // Iterate this block through the league iterate_through_league(threadid, value); @@ -809,32 +809,28 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, : reinterpret_cast<pointer_type>(m_scratch_space); value_type init; - value_init::init(reducer_conditional::select(m_functor, m_reducer), &init); + final_reducer.init(&init); if (m_league_size == 0) { - Kokkos::Impl::FunctorFinal<reducer_type_fwd, work_tag_fwd>::final( - reducer_conditional::select(m_functor, m_reducer), - reinterpret_cast<void*>(&value)); + final_reducer.final(&value); *result = value; - } else if (Impl::hip_inter_block_shuffle_reduction<FunctorType, value_join, - work_tag>( - value, init, - value_join( - reducer_conditional::select(m_functor, m_reducer)), - m_scratch_space, result, m_scratch_flags, blockDim.y)) { + } else if (Impl::hip_inter_block_shuffle_reduction( + value, init, final_reducer, m_scratch_space, result, + m_scratch_flags, blockDim.y)) { unsigned int const id = threadIdx.y * blockDim.x + threadIdx.x; if (id == 0) { - Kokkos::Impl::FunctorFinal<reducer_type_fwd, work_tag_fwd>::final( - reducer_conditional::select(m_functor, m_reducer), - reinterpret_cast<void*>(&value)); + final_reducer.final(&value); *result = value; } } } inline void execute() { + typename analysis::Reducer final_reducer( + &reducer_conditional::select(m_functor, m_reducer)); + const bool is_empty_range = m_league_size == 0 || m_team_size == 0; - const bool need_device_set = ReduceFunctorHasInit<FunctorType>::value || - ReduceFunctorHasFinal<FunctorType>::value || + const bool need_device_set = analysis::has_init_member_function || + analysis::has_final_member_function || !m_result_ptr_host_accessible || !std::is_same<ReducerType, InvalidType>::value; if (!is_empty_range || need_device_set) { @@ -847,10 +843,9 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, : std::min(static_cast<int>(m_league_size), m_team_size); m_scratch_space = Kokkos::Experimental::Impl::hip_internal_scratch_space( - m_policy.space(), - value_traits::value_size( - reducer_conditional::select(m_functor, m_reducer)) * - block_count); + m_policy.space(), analysis::value_size(reducer_conditional::select( + m_functor, m_reducer)) * + block_count); m_scratch_flags = Kokkos::Experimental::Impl::hip_internal_scratch_flags( m_policy.space(), sizeof(size_type)); @@ -875,7 +870,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, m_policy.space().impl_internal_space_instance()->fence(); if (m_result_ptr) { - const int size = value_traits::value_size( + const int size = analysis::value_size( reducer_conditional::select(m_functor, m_reducer)); DeepCopy<HostSpace, Kokkos::Experimental::HIPSpace>( m_result_ptr, m_scratch_space, size); @@ -883,17 +878,16 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } } else { if (m_result_ptr) { - value_init::init(reducer_conditional::select(m_functor, m_reducer), - m_result_ptr); + final_reducer.init(m_result_ptr); } } } template <class ViewType> - ParallelReduce(FunctorType const& arg_functor, Policy const& arg_policy, - ViewType const& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value, - void*>::type = nullptr) + ParallelReduce( + FunctorType const& arg_functor, Policy const& arg_policy, + ViewType const& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value, void*> = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp index 7929e6df7445f8c461c733cfd81202d76e34c8c5..1091ad5ceadf6a14b2f162c49d797c6c9564d390 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_ReduceScan.hpp @@ -58,17 +58,13 @@ namespace Impl { // Reduction-only implementation //---------------------------------------------------------------------------- -template <class FunctorType, class ArgTag, bool UseShfl> +template <class FunctorType, bool UseShfl> struct HIPReductionsFunctor; -template <typename FunctorType, typename ArgTag> -struct HIPReductionsFunctor<FunctorType, ArgTag, true> { - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - using ValueJoin = FunctorValueJoin<FunctorType, ArgTag>; - using ValueInit = FunctorValueInit<FunctorType, ArgTag>; - using ValueOps = FunctorValueOps<FunctorType, ArgTag>; - using pointer_type = typename ValueTraits::pointer_type; - using Scalar = typename ValueTraits::value_type; +template <typename FunctorType> +struct HIPReductionsFunctor<FunctorType, true> { + using pointer_type = typename FunctorType::pointer_type; + using Scalar = typename FunctorType::value_type; __device__ static inline void scalar_intra_warp_reduction( FunctorType const& functor, @@ -79,7 +75,7 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, true> { Scalar& result) { for (int delta = skip_vector ? blockDim.x : 1; delta < width; delta *= 2) { Scalar tmp = Kokkos::Experimental::shfl_down(value, delta, width); - ValueJoin::join(functor, &value, &tmp); + functor.join(&value, &tmp); } Experimental::Impl::in_place_shfl(result, value, 0, width); @@ -109,16 +105,16 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, true> { for (int w = shared_elements; w < num_warps; w += shared_elements) { if (warp_id >= w && warp_id < w + shared_elements) { if ((threadIdx.y * blockDim.x + threadIdx.x) % warp_size == 0) - ValueJoin::join(functor, my_shared_team_buffer_element, &value); + functor.join(my_shared_team_buffer_element, &value); } __syncthreads(); } if (warp_id == 0) { - ValueInit::init(functor, &value); + functor.init(&value); for (unsigned int i = threadIdx.y * blockDim.x + threadIdx.x; i < blockDim.y * blockDim.x / warp_size; i += warp_size) { - ValueJoin::join(functor, &value, &shared_team_buffer_element[i]); + functor.join(&value, &shared_team_buffer_element[i]); } scalar_intra_warp_reduction(functor, value, false, warp_size, *my_global_team_buffer_element); @@ -163,10 +159,10 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, true> { if (num_teams_done == gridDim.x) { is_last_block = true; *global_flags = 0; - ValueInit::init(functor, &value); + functor.init(&value); for (int i = threadIdx.y * blockDim.x + threadIdx.x; i < global_elements; i += blockDim.x * blockDim.y) { - ValueJoin::join(functor, &value, &global_team_buffer_element[i]); + functor.join(&value, &global_team_buffer_element[i]); } scalar_intra_block_reduction( functor, value, false, shared_team_buffer_elements + blockDim.y - 1, @@ -177,14 +173,10 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, true> { } }; -template <typename FunctorType, typename ArgTag> -struct HIPReductionsFunctor<FunctorType, ArgTag, false> { - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - using ValueJoin = FunctorValueJoin<FunctorType, ArgTag>; - using ValueInit = FunctorValueInit<FunctorType, ArgTag>; - using ValueOps = FunctorValueOps<FunctorType, ArgTag>; - using pointer_type = typename ValueTraits::pointer_type; - using Scalar = typename ValueTraits::value_type; +template <typename FunctorType> +struct HIPReductionsFunctor<FunctorType, false> { + using pointer_type = typename FunctorType::pointer_type; + using Scalar = typename FunctorType::value_type; __device__ static inline void scalar_intra_warp_reduction( FunctorType const& functor, @@ -197,7 +189,7 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, false> { ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; for (int delta = skip_vector ? blockDim.x : 1; delta < width; delta *= 2) { if (lane_id + delta < ::Kokkos::Experimental::Impl::HIPTraits::WarpSize) { - ValueJoin::join(functor, value, value + delta); + functor.join(value, value + delta); } } *value = *(value - lane_id); @@ -271,10 +263,10 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, false> { if (num_teams_done == gridDim.x) { is_last_block = true; *global_flags = 0; - ValueInit::init(functor, &value); + functor.init(&value); for (int i = threadIdx.y * blockDim.x + threadIdx.x; i < global_elements; i += blockDim.x * blockDim.y) { - ValueJoin::join(functor, &value, &global_team_buffer_element[i]); + functor.join(&value, &global_team_buffer_element[i]); } scalar_intra_block_reduction( functor, value, false, shared_team_buffer_elements + (blockDim.y - 1), @@ -290,75 +282,103 @@ struct HIPReductionsFunctor<FunctorType, ArgTag, false> { //---------------------------------------------------------------------------- /* * Algorithmic constraints: - * (a) blockDim.y is a power of two - * (b) blockDim.y <= 1024 - * (c) blockDim.x == blockDim.z == 1 + * (a) blockDim.y <= 1024 + * (b) blockDim.x == blockDim.z == 1 */ -template <bool DoScan, class FunctorType, class ArgTag> +template <bool DoScan, class FunctorType> __device__ void hip_intra_block_reduce_scan( FunctorType const& functor, - typename FunctorValueTraits<FunctorType, ArgTag>::pointer_type const - base_data) { - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - using ValueJoin = FunctorValueJoin<FunctorType, ArgTag>; - - using pointer_type = typename ValueTraits::pointer_type; - - unsigned int const value_count = ValueTraits::value_count(functor); - unsigned int const BlockSizeMask = blockDim.y - 1; - int const WarpMask = Experimental::Impl::HIPTraits::WarpSize - 1; - - // Must have power of two thread count - if ((blockDim.y - 1) & blockDim.y) { - Kokkos::abort( - "HIP::hip_intra_block_reduce_scan requires power-of-two " - "blockDim.y\n"); - } - - auto block_reduce_step = - [&functor, value_count](int const R, pointer_type const TD, int const S) { - if (R > ((1 << S) - 1)) { - ValueJoin::join(functor, TD, (TD - (value_count << S))); - } - }; + typename FunctorType::pointer_type const base_data) { + using pointer_type = typename FunctorType::pointer_type; + + const unsigned value_count = functor.length(); + const unsigned not_less_power_of_two = + (1 << (Impl::int_log2(blockDim.y - 1) + 1)); + const unsigned BlockSizeMask = not_less_power_of_two - 1; + // There is at most one warp that is neither completely full or empty. + // For that warp, we shift all indices logically to the end and ignore join + // operations with unassigned indices in the warp when performing the intra + // warp reduction/scan. + const bool is_full_warp = + (((threadIdx.y >> Experimental::Impl::HIPTraits::WarpIndexShift) + 1) + << Experimental::Impl::HIPTraits::WarpIndexShift) <= blockDim.y; + + auto block_reduce_step = [&functor, value_count]( + int const R, pointer_type const TD, int const S, + pointer_type memory_start, int index_shift) { + const auto join_ptr = TD - (value_count << S) + value_count * index_shift; + if (R > ((1 << S) - 1) && join_ptr >= memory_start) { + functor.join(TD, join_ptr); + } + }; - { // Intra-warp reduction: - const unsigned rtid_intra = threadIdx.y & WarpMask; + // Intra-warp reduction: + { + const unsigned mapped_idx = + threadIdx.y + (is_full_warp + ? 0 + : (not_less_power_of_two - blockDim.y) & + (Experimental::Impl::HIPTraits::WarpSize - 1)); const pointer_type tdata_intra = base_data + value_count * threadIdx.y; - - block_reduce_step(rtid_intra, tdata_intra, 0); - block_reduce_step(rtid_intra, tdata_intra, 1); - block_reduce_step(rtid_intra, tdata_intra, 2); - block_reduce_step(rtid_intra, tdata_intra, 3); - block_reduce_step(rtid_intra, tdata_intra, 4); - block_reduce_step(rtid_intra, tdata_intra, 5); + const pointer_type warp_start = + base_data + + value_count * + ((threadIdx.y >> Experimental::Impl::HIPTraits::WarpIndexShift) + << Experimental::Impl::HIPTraits::WarpIndexShift); + block_reduce_step(mapped_idx, tdata_intra, 0, warp_start, 0); + block_reduce_step(mapped_idx, tdata_intra, 1, warp_start, 0); + block_reduce_step(mapped_idx, tdata_intra, 2, warp_start, 0); + block_reduce_step(mapped_idx, tdata_intra, 3, warp_start, 0); + block_reduce_step(mapped_idx, tdata_intra, 4, warp_start, 0); + block_reduce_step(mapped_idx, tdata_intra, 5, warp_start, 0); } __syncthreads(); // Wait for all warps to reduce - { // Inter-warp reduce-scan by a single warp to avoid extra synchronizations - unsigned int const rtid_inter = - ((threadIdx.y + 1) << Experimental::Impl::HIPTraits::WarpIndexShift) - - 1; - - if (rtid_inter < blockDim.y) { - pointer_type const tdata_inter = base_data + value_count * rtid_inter; + // Inter-warp reduce-scan by a single warp to avoid extra synchronizations + { + // There is at most one warp where the memory address to be used is not + // (HIPTraits::WarpSize - 1) away from the warp start adress. For the + // following reduction, we shift all indices logically to the end of the + // next power-of-two to the number of warps. + const unsigned n_active_warps = + ((blockDim.y - 1) >> Experimental::Impl::HIPTraits::WarpIndexShift) + 1; + if (threadIdx.y < n_active_warps) { + const bool is_full_warp_inter = + threadIdx.y < + (blockDim.y >> Experimental::Impl::HIPTraits::WarpIndexShift); + pointer_type const tdata_inter = + base_data + + value_count * + (is_full_warp_inter + ? (threadIdx.y + << Experimental::Impl::HIPTraits::WarpIndexShift) + + (Experimental::Impl::HIPTraits::WarpSize - 1) + : blockDim.y - 1); + const unsigned index_shift = + is_full_warp_inter + ? 0 + : blockDim.y - (threadIdx.y + << Experimental::Impl::HIPTraits::WarpIndexShift); + const int rtid_inter = + (threadIdx.y << Experimental::Impl::HIPTraits::WarpIndexShift) + + (Experimental::Impl::HIPTraits::WarpSize - 1) - index_shift; if ((1 << 6) < BlockSizeMask) { - block_reduce_step(rtid_inter, tdata_inter, 6); + block_reduce_step(rtid_inter, tdata_inter, 6, base_data, index_shift); } if ((1 << 7) < BlockSizeMask) { - block_reduce_step(rtid_inter, tdata_inter, 7); + block_reduce_step(rtid_inter, tdata_inter, 7, base_data, index_shift); } if ((1 << 8) < BlockSizeMask) { - block_reduce_step(rtid_inter, tdata_inter, 8); + block_reduce_step(rtid_inter, tdata_inter, 8, base_data, index_shift); } if ((1 << 9) < BlockSizeMask) { - block_reduce_step(rtid_inter, tdata_inter, 9); + block_reduce_step(rtid_inter, tdata_inter, 9, base_data, index_shift); } if ((1 << 10) < BlockSizeMask) { - block_reduce_step(rtid_inter, tdata_inter, 10); + block_reduce_step(rtid_inter, tdata_inter, 10, base_data, index_shift); } } } @@ -368,12 +388,16 @@ __device__ void hip_intra_block_reduce_scan( if (DoScan) { // Update all the values for the respective warps (except for the last one) // by adding from the last value of the previous warp. + const unsigned int WarpMask = Experimental::Impl::HIPTraits::WarpSize - 1; + const int is_last_thread_in_warp = + is_full_warp ? ((threadIdx.y & WarpMask) == + Experimental::Impl::HIPTraits::WarpSize - 1) + : (threadIdx.y == blockDim.y - 1); if (threadIdx.y >= Experimental::Impl::HIPTraits::WarpSize && - (threadIdx.y & WarpMask) != - Experimental::Impl::HIPTraits::WarpSize - 1) { + !is_last_thread_in_warp) { const int offset_to_previous_warp_total = (threadIdx.y & (~WarpMask)) - 1; - ValueJoin::join(functor, base_data + value_count * threadIdx.y, - base_data + value_count * offset_to_previous_warp_total); + functor.join(base_data + value_count * threadIdx.y, + base_data + value_count * offset_to_previous_warp_total); } } } @@ -387,7 +411,7 @@ __device__ void hip_intra_block_reduce_scan( * Global reduce result is in the last threads' 'shared_data' location. */ -template <bool DoScan, class FunctorType, class ArgTag> +template <bool DoScan, class FunctorType> __device__ bool hip_single_inter_block_reduce_scan_impl( FunctorType const& functor, ::Kokkos::Experimental::HIP::size_type const block_id, @@ -395,13 +419,10 @@ __device__ bool hip_single_inter_block_reduce_scan_impl( ::Kokkos::Experimental::HIP::size_type* const shared_data, ::Kokkos::Experimental::HIP::size_type* const global_data, ::Kokkos::Experimental::HIP::size_type* const global_flags) { - using size_type = ::Kokkos::Experimental::HIP::size_type; - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - using ValueJoin = FunctorValueJoin<FunctorType, ArgTag>; - using ValueInit = FunctorValueInit<FunctorType, ArgTag>; - using ValueOps = FunctorValueOps<FunctorType, ArgTag>; + using size_type = ::Kokkos::Experimental::HIP::size_type; - using pointer_type = typename ValueTraits::pointer_type; + using value_type = typename FunctorType::value_type; + using pointer_type = typename FunctorType::pointer_type; // '__ffs' = position of the least significant bit set to 1. // 'blockDim.y' is guaranteed to be a power of two so this @@ -416,13 +437,14 @@ __device__ bool hip_single_inter_block_reduce_scan_impl( "blockDim"); } - integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / - sizeof(size_type)> const - word_count(ValueTraits::value_size(functor) / sizeof(size_type)); + const integral_nonzero_constant< + size_type, std::is_pointer<typename FunctorType::reference_type>::value + ? 0 + : sizeof(value_type) / sizeof(size_type)> + word_count((sizeof(value_type) * functor.length()) / sizeof(size_type)); // Reduce the accumulation for the entire block. - hip_intra_block_reduce_scan<false, FunctorType, ArgTag>( - functor, pointer_type(shared_data)); + hip_intra_block_reduce_scan<false>(functor, pointer_type(shared_data)); { // Write accumulation total to global scratch space. @@ -461,32 +483,34 @@ __device__ bool hip_single_inter_block_reduce_scan_impl( BlockSizeShift; { - void* const shared_ptr = shared_data + word_count.value * threadIdx.y; - /* reference_type shared_value = */ ValueInit::init(functor, shared_ptr); + pointer_type const shared_data_thread = reinterpret_cast<pointer_type>( + shared_data + word_count.value * threadIdx.y); + /* reference_type shared_value = */ functor.init(shared_data_thread); for (size_type i = b; i < e; ++i) { - ValueJoin::join(functor, shared_ptr, - global_data + word_count.value * i); + functor.join( + shared_data_thread, + reinterpret_cast<pointer_type>(global_data + word_count.value * i)); } } - hip_intra_block_reduce_scan<DoScan, FunctorType, ArgTag>( - functor, pointer_type(shared_data)); + hip_intra_block_reduce_scan<DoScan>(functor, pointer_type(shared_data)); if (DoScan) { - size_type* const shared_value = + pointer_type const shared_value = reinterpret_cast<pointer_type>( shared_data + - word_count.value * (threadIdx.y ? threadIdx.y - 1 : blockDim.y); + word_count.value * (threadIdx.y ? threadIdx.y - 1 : blockDim.y)); if (!threadIdx.y) { - ValueInit::init(functor, shared_value); + functor.init(shared_value); } // Join previous inclusive scan value to each member for (size_type i = b; i < e; ++i) { - size_type* const global_value = global_data + word_count.value * i; - ValueJoin::join(functor, shared_value, global_value); - ValueOps::copy(functor, global_value, shared_value); + pointer_type const global_value = + reinterpret_cast<pointer_type>(global_data + word_count.value * i); + functor.join(shared_value, global_value); + functor.copy(global_value, shared_value); } } } @@ -494,7 +518,7 @@ __device__ bool hip_single_inter_block_reduce_scan_impl( return is_last_block; } -template <bool DoScan, typename FunctorType, typename ArgTag> +template <bool DoScan, typename FunctorType> __device__ bool hip_single_inter_block_reduce_scan( FunctorType const& functor, ::Kokkos::Experimental::HIP::size_type const block_id, @@ -502,29 +526,45 @@ __device__ bool hip_single_inter_block_reduce_scan( ::Kokkos::Experimental::HIP::size_type* const shared_data, ::Kokkos::Experimental::HIP::size_type* const global_data, ::Kokkos::Experimental::HIP::size_type* const global_flags) { - using ValueTraits = FunctorValueTraits<FunctorType, ArgTag>; - // If we are doing a reduction and StaticValueSize is true, we use the + // If we are doing a reduction and we don't do an array reduction, we use the // reduction-only path. Otherwise, we use the common path between reduction // and scan. - if (!DoScan && static_cast<bool>(ValueTraits::StaticValueSize)) + if (!DoScan && !std::is_pointer<typename FunctorType::reference_type>::value) // FIXME_HIP_PERFORMANCE I don't know where 16 comes from. This inequality // determines if we use shared memory (false) or shuffle (true) return Kokkos::Impl::HIPReductionsFunctor< - FunctorType, ArgTag, (ValueTraits::StaticValueSize > 16)>:: - scalar_inter_block_reduction(functor, block_count, shared_data, - global_data, global_flags); + FunctorType, (sizeof(typename FunctorType::value_type) > + 16)>::scalar_inter_block_reduction(functor, block_count, + shared_data, + global_data, + global_flags); else { - return hip_single_inter_block_reduce_scan_impl<DoScan, FunctorType, ArgTag>( + return hip_single_inter_block_reduce_scan_impl<DoScan>( functor, block_id, block_count, shared_data, global_data, global_flags); } } // Size in bytes required for inter block reduce or scan template <bool DoScan, class FunctorType, class ArgTag> -inline unsigned hip_single_inter_block_reduce_scan_shmem( - const FunctorType& functor, const unsigned BlockSize) { - return (BlockSize + 2) * - Impl::FunctorValueTraits<FunctorType, ArgTag>::value_size(functor); +inline std::enable_if_t<DoScan, unsigned> +hip_single_inter_block_reduce_scan_shmem(const FunctorType& functor, + const unsigned BlockSize) { + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + RangePolicy<Experimental::HIP, ArgTag>, + FunctorType>; + + return (BlockSize + 2) * Analysis::value_size(functor); +} + +template <bool DoScan, class FunctorType, class ArgTag> +inline std::enable_if_t<!DoScan, unsigned> +hip_single_inter_block_reduce_scan_shmem(const FunctorType& functor, + const unsigned BlockSize) { + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + RangePolicy<Experimental::HIP, ArgTag>, + FunctorType>; + + return (BlockSize + 2) * Analysis::value_size(functor); } } // namespace Impl diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp index 5a27e2e0ddd2a02a03866b2d4334770518da94c2..eb85ed4709ed453f40856b05b07e76fd50e06430 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp @@ -61,11 +61,9 @@ namespace Impl { * (b) blockDim.x == power of two * (x) blockDim.z == 1 */ -template <typename ValueType, typename JoinOp, - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value, - int>::type = 0> +template <typename ValueType, typename ReducerType> __device__ inline void hip_intra_warp_shuffle_reduction( - ValueType& result, JoinOp const& join, + ValueType& result, ReducerType const& reducer, uint32_t const max_active_thread = blockDim.y) { unsigned int shift = 1; @@ -78,7 +76,7 @@ __device__ inline void hip_intra_warp_shuffle_reduction( // Only join if upper thread is active (this allows non power of two for // blockDim.y) if (threadIdx.y + shift < max_active_thread) { - join(result, tmp); + reducer.join(&result, &tmp); } shift *= 2; } @@ -87,11 +85,9 @@ __device__ inline void hip_intra_warp_shuffle_reduction( result = Kokkos::Experimental::shfl(result, 0, warp_size); } -template <typename ValueType, typename JoinOp, - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value, - int>::type = 0> +template <typename ValueType, typename ReducerType> __device__ inline void hip_inter_warp_shuffle_reduction( - ValueType& value, const JoinOp& join, + ValueType& value, const ReducerType& reducer, const int max_active_thread = blockDim.y) { unsigned int constexpr warp_size = Kokkos::Experimental::Impl::HIPTraits::WarpSize; @@ -111,7 +107,7 @@ __device__ inline void hip_inter_warp_shuffle_reduction( __syncthreads(); while (shift <= max_active_thread / step) { if (shift <= id && shift + step_width > id && threadIdx.x == 0) { - join(result[id % step_width], value); + reducer.join(&result[id % step_width], &value); } __syncthreads(); shift += step_width; @@ -119,37 +115,31 @@ __device__ inline void hip_inter_warp_shuffle_reduction( value = result[0]; for (int i = 1; (i * step < max_active_thread) && (i < step_width); ++i) - join(value, result[i]); + reducer.join(&value, &result[i]); } -template <typename ValueType, typename JoinOp, - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value, - int>::type = 0> +template <typename ValueType, typename ReducerType> __device__ inline void hip_intra_block_shuffle_reduction( - ValueType& value, JoinOp const& join, + ValueType& value, ReducerType const& reducer, int const max_active_thread = blockDim.y) { - hip_intra_warp_shuffle_reduction(value, join, max_active_thread); - hip_inter_warp_shuffle_reduction(value, join, max_active_thread); + hip_intra_warp_shuffle_reduction(value, reducer, max_active_thread); + hip_inter_warp_shuffle_reduction(value, reducer, max_active_thread); } -template <class FunctorType, class JoinOp, class ArgTag = void> +template <class FunctorType> __device__ inline bool hip_inter_block_shuffle_reduction( - typename FunctorValueTraits<FunctorType, ArgTag>::reference_type value, - typename FunctorValueTraits<FunctorType, ArgTag>::reference_type neutral, - JoinOp const& join, + typename FunctorType::reference_type value, + typename FunctorType::reference_type neutral, FunctorType const& reducer, Kokkos::Experimental::HIP::size_type* const m_scratch_space, - typename FunctorValueTraits<FunctorType, - ArgTag>::pointer_type const /*result*/, + typename FunctorType::pointer_type const /*result*/, Kokkos::Experimental::HIP::size_type* const m_scratch_flags, int const max_active_thread = blockDim.y) { - using pointer_type = - typename FunctorValueTraits<FunctorType, ArgTag>::pointer_type; - using value_type = - typename FunctorValueTraits<FunctorType, ArgTag>::value_type; + using pointer_type = typename FunctorType::pointer_type; + using value_type = typename FunctorType::value_type; // Do the intra-block reduction with shfl operations for the intra warp // reduction and static shared memory for the inter warp reduction - hip_intra_block_shuffle_reduction(value, join, max_active_thread); + hip_intra_block_shuffle_reduction(value, reducer, max_active_thread); int const id = threadIdx.y * blockDim.x + threadIdx.x; @@ -188,7 +178,7 @@ __device__ inline bool hip_inter_block_shuffle_reduction( : warp_size; for (int i = id; i < static_cast<int>(gridDim.x); i += step_size) { value_type tmp = global[i]; - join(value, tmp); + reducer.join(&value, &tmp); } // Perform shfl reductions within the warp only join if contribution is @@ -196,7 +186,7 @@ __device__ inline bool hip_inter_block_shuffle_reduction( for (unsigned int i = 1; i < warp_size; i *= 2) { if ((blockDim.x * blockDim.y) > i) { value_type tmp = Kokkos::Experimental::shfl_down(value, i, warp_size); - if (id + i < gridDim.x) join(value, tmp); + if (id + i < gridDim.x) reducer.join(&value, &tmp); } } } @@ -205,130 +195,6 @@ __device__ inline bool hip_inter_block_shuffle_reduction( // "value" return last_block; } - -// We implemente the same functions as above but the user provide a Reducer -// instead of JoinOP -template <typename ReducerType, - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value, - int>::type = 0> -__device__ inline void hip_intra_warp_shuffle_reduction( - const ReducerType& reducer, typename ReducerType::value_type& result, - const uint32_t max_active_thread = blockDim.y) { - using ValueType = typename ReducerType::value_type; - auto join_op = [&](ValueType& result, ValueType const& tmp) { - reducer.join(result, tmp); - }; - hip_intra_warp_shuffle_reduction(result, join_op, max_active_thread); - - reducer.reference() = result; -} - -template <typename ReducerType, - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value, - int>::type = 0> -__device__ inline void hip_inter_warp_shuffle_reduction( - ReducerType const& reducer, typename ReducerType::value_type value, - int const max_active_thread = blockDim.y) { - using ValueType = typename ReducerType::value_type; - auto join_op = [&](ValueType& a, ValueType& b) { reducer.join(a, b); }; - hip_inter_warp_shuffle_reduction(value, join_op, max_active_thread); - - reducer.reference() = value; -} - -template <typename ReducerType, - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value, - int>::type = 0> -__device__ inline void hip_intra_block_shuffle_reduction( - ReducerType const& reducer, typename ReducerType::value_type value, - int const max_active_thread = blockDim.y) { - hip_intra_warp_shuffle_reduction(reducer, value, max_active_thread); - hip_inter_warp_shuffle_reduction(reducer, value, max_active_thread); -} - -template <typename ReducerType, - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value, - int>::type = 0> -__device__ inline void hip_intra_block_shuffle_reduction( - ReducerType const& reducer, int const max_active_thread = blockDim.y) { - hip_intra_block_shuffle_reduction(reducer, reducer.reference(), - max_active_thread); -} - -template <typename ReducerType, - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value, - int>::type = 0> -__device__ inline bool hip_inter_block_shuffle_reduction( - ReducerType const& reducer, - Kokkos::Experimental::HIP::size_type* const m_scratch_space, - Kokkos::Experimental::HIP::size_type* const m_scratch_flags, - int const max_active_thread = blockDim.y) { - using pointer_type = typename ReducerType::value_type*; - using value_type = typename ReducerType::value_type; - - // Do the intra-block reduction with shfl operations for the intra warp - // reduction and static shared memory for the inter warp reduction - hip_intra_block_shuffle_reduction(reducer, max_active_thread); - - value_type value = reducer.reference(); - - int const id = threadIdx.y * blockDim.x + threadIdx.x; - - // One thread in the block writes block result to global scratch_memory - if (id == 0) { - pointer_type global = - reinterpret_cast<pointer_type>(m_scratch_space) + blockIdx.x; - *global = value; - } - - // One warp of last block performs inter block reduction through loading the - // block values from global scratch_memory - bool last_block = false; - - __syncthreads(); - int constexpr warp_size = Kokkos::Experimental::Impl::HIPTraits::WarpSize; - if (id < warp_size) { - Kokkos::Experimental::HIP::size_type count; - - // Figure out whether this is the last block - if (id == 0) count = Kokkos::atomic_fetch_add(m_scratch_flags, 1); - count = Kokkos::Experimental::shfl(count, 0, warp_size); - - // Last block does the inter block reduction - if (count == gridDim.x - 1) { - // Set flag back to zero - if (id == 0) *m_scratch_flags = 0; - last_block = true; - reducer.init(value); - - pointer_type const global = - reinterpret_cast<pointer_type>(m_scratch_space); - - // Reduce all global values with splitting work over threads in one warp - int const step_size = blockDim.x * blockDim.y < warp_size - ? blockDim.x * blockDim.y - : warp_size; - for (int i = id; i < static_cast<int>(gridDim.x); i += step_size) { - value_type tmp = global[i]; - reducer.join(value, tmp); - } - - // Perform shfl reductions within the warp only join if contribution is - // valid (allows gridDim.x non power of two and <warp_size) - for (unsigned int i = 1; i < warp_size; i *= 2) { - if ((blockDim.x * blockDim.y) > i) { - value_type tmp = Kokkos::Experimental::shfl_down(value, i, warp_size); - if (id + i < gridDim.x) reducer.join(value, tmp); - } - __syncthreads(); - } - } - } - - // The last block has in its thread = 0 the global reduction value through - // "value" - return last_block; -} } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp index 776b7c6abea81e75ac11497e4447409478c64ce4..aee9756af0d166c113ee7f8c986b53cba4872981 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #include <Kokkos_Core.hpp> @@ -50,16 +54,30 @@ #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_MemorySpace.hpp> +#include <impl/Kokkos_DeviceManagement.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> #include <stdlib.h> #include <iostream> #include <sstream> -#include <stdexcept> #include <algorithm> #include <atomic> /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ +namespace { + +static std::atomic<bool> is_first_hip_managed_allocation(true); + +bool hip_driver_check_page_migration(int deviceId) { + // check with driver if page migrating memory is available + // this driver query is copied from the hip documentation + int hasManagedMemory = 0; // false by default + KOKKOS_IMPL_HIP_SAFE_CALL(hipDeviceGetAttribute( + &hasManagedMemory, hipDeviceAttributeManagedMemory, deviceId)); + return static_cast<bool>(hasManagedMemory); +} +} // namespace namespace Kokkos { namespace Impl { @@ -131,6 +149,8 @@ HIPSpace::HIPSpace() : m_device(HIP().hip_device()) {} HIPHostPinnedSpace::HIPHostPinnedSpace() {} +HIPManagedSpace::HIPManagedSpace() : m_device(HIP().hip_device()) {} + void* HIPSpace::allocate(const size_t arg_alloc_size) const { return allocate("[unlabeled]", arg_alloc_size); } @@ -179,7 +199,8 @@ void* HIPHostPinnedSpace::impl_allocate( const Kokkos::Tools::SpaceHandle arg_handle) const { void* ptr = nullptr; - auto const error_code = hipHostMalloc(&ptr, arg_alloc_size); + auto const error_code = + hipHostMalloc(&ptr, arg_alloc_size, hipHostMallocNonCoherent); if (error_code != hipSuccess) { // This is the only way to clear the last error, which we should do here // since we're turning it into an exception here @@ -196,6 +217,73 @@ void* HIPHostPinnedSpace::impl_allocate( return ptr; } + +void* HIPManagedSpace::allocate(const size_t arg_alloc_size) const { + return allocate("[unlabeled]", arg_alloc_size); +} +void* HIPManagedSpace::allocate(const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return impl_allocate(arg_label, arg_alloc_size, arg_logical_size); +} +void* HIPManagedSpace::impl_allocate( + const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + void* ptr = nullptr; + + if (arg_alloc_size > 0) { + if (is_first_hip_managed_allocation.exchange(false) && + Kokkos::show_warnings()) { + if (!hip_driver_check_page_migration(m_device)) { + std::cerr << R"warning( +Kokkos::HIP::allocation WARNING: The combination of device and system configuration + does not support page migration between device and host. + HIPManagedSpace might not work as expected. + Please refer to the ROCm documentation on unified/managed memory.)warning" + << std::endl; + } + + // check for correct runtime environment + const char* hsa_xnack = std::getenv("HSA_XNACK"); + if (!hsa_xnack) + std::cerr << R"warning( +Kokkos::HIP::runtime WARNING: Kokkos did not find an environment variable 'HSA_XNACK' + for the current process. + Nevertheless, xnack is enabled for all processes if + amdgpu.noretry=0 was set in the Linux kernel boot line. + Without xnack enabled, Kokkos::HIPManaged might not behave + as expected.)warning" + << std::endl; + else if (Kokkos::Impl::strcmp(hsa_xnack, "1") != 0) + std::cerr << "Kokkos::HIP::runtime WARNING: Kokkos detected the " + "environement variable " + << "'HSA_XNACK=" << hsa_xnack << "\n" + << "Kokkos advises to set it to '1' to enable it per process." + << std::endl; + } + auto const error_code = hipMallocManaged(&ptr, arg_alloc_size); + if (error_code != hipSuccess) { + // This is the only way to clear the last error, which we should do here + // since we're turning it into an exception here + (void)hipGetLastError(); + throw HIPRawMemoryAllocationFailure( + arg_alloc_size, error_code, + RawMemoryAllocationFailure::AllocationMechanism::HIPMallocManaged); + } + KOKKOS_IMPL_HIP_SAFE_CALL(hipMemAdvise( + ptr, arg_alloc_size, hipMemAdviseSetCoarseGrain, m_device)); + } + + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + Kokkos::Profiling::allocateData(arg_handle, arg_label, ptr, reported_size); + } + + return ptr; +} + void HIPSpace::deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const { deallocate("[unlabeled]", arg_alloc_ptr, arg_alloc_size); @@ -242,6 +330,35 @@ void HIPHostPinnedSpace::impl_deallocate( KOKKOS_IMPL_HIP_SAFE_CALL(hipHostFree(arg_alloc_ptr)); } +void HIPManagedSpace::deallocate(void* const arg_alloc_ptr, + const size_t arg_alloc_size) const { + deallocate("[unlabeled]", arg_alloc_ptr, arg_alloc_size); +} + +void HIPManagedSpace::deallocate(const char* arg_label, + void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + impl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size); +} +void HIPManagedSpace::impl_deallocate( + const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, + reported_size); + } + // We have to unset the CoarseGrain property manually as hipFree does not take + // care of it. Otherwise, the allocation would continue to linger in the + // kernel mem page table. + KOKKOS_IMPL_HIP_SAFE_CALL(hipMemAdvise( + arg_alloc_ptr, arg_alloc_size, hipMemAdviseUnsetCoarseGrain, m_device)); + KOKKOS_IMPL_HIP_SAFE_CALL(hipFree(arg_alloc_ptr)); +} + } // namespace Experimental } // namespace Kokkos @@ -257,6 +374,9 @@ SharedAllocationRecord<void, void> SharedAllocationRecord<void, void> SharedAllocationRecord< Kokkos::Experimental::HIPHostPinnedSpace, void>::s_root_record; + +SharedAllocationRecord<void, void> SharedAllocationRecord< + Kokkos::Experimental::HIPManagedSpace, void>::s_root_record; #endif SharedAllocationRecord<Kokkos::Experimental::HIPSpace, @@ -274,6 +394,13 @@ SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, SharedAllocationRecord<void, void>::m_alloc_size); } +SharedAllocationRecord<Kokkos::Experimental::HIPManagedSpace, + void>::~SharedAllocationRecord() { + m_space.deallocate(m_label.c_str(), + SharedAllocationRecord<void, void>::m_alloc_ptr, + SharedAllocationRecord<void, void>::m_alloc_size); +} + SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>:: SharedAllocationRecord( const Kokkos::Experimental::HIPSpace& arg_space, @@ -306,6 +433,35 @@ SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>:: "HostSpace"); } +SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>:: + SharedAllocationRecord( + const Kokkos::Experimental::HIP& arg_exec_space, + const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord<void, void>::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::Experimental::HIPSpace, + void>::s_root_record, +#endif + Kokkos::Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_space(arg_space) { + + SharedAllocationHeader header; + + this->base_t::_fill_host_accessible_header_info(header, arg_label); + + // Copy to device memory + Kokkos::Impl::DeepCopy<Kokkos::Experimental::HIPSpace, HostSpace>( + arg_exec_space, RecordBase::m_alloc_ptr, &header, + sizeof(SharedAllocationHeader)); +} + SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>:: SharedAllocationRecord( const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, @@ -328,15 +484,34 @@ SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>:: arg_label); } +SharedAllocationRecord<Kokkos::Experimental::HIPManagedSpace, void>:: + SharedAllocationRecord( + const Kokkos::Experimental::HIPManagedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord<void, void>::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::Experimental::HIPManagedSpace, + void>::s_root_record, +#endif + Kokkos::Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_space(arg_space) { + // Fill in the Header information, directly accessible via managed memory + this->base_t::_fill_host_accessible_header_info(*RecordBase::m_alloc_ptr, + arg_label); +} + } // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ namespace Kokkos { -namespace Impl { -int get_gpu(const InitArguments& args); -} namespace Experimental { int HIP::concurrency() { @@ -347,8 +522,8 @@ int HIP::impl_is_initialized() { return Impl::HIPInternal::singleton().is_initialized(); } -void HIP::impl_initialize(const HIP::SelectDevice config) { - Impl::HIPInternal::singleton().initialize(config.hip_device_id); +void HIP::impl_initialize(InitializationSettings const& settings) { + Impl::HIPInternal::singleton().initialize(::Kokkos::Impl::get_gpu(settings)); } void HIP::impl_finalize() { Impl::HIPInternal::singleton().finalize(); } @@ -371,8 +546,21 @@ HIP::HIP(hipStream_t const stream, bool manage_stream) manage_stream); } -void HIP::print_configuration(std::ostream& s, const bool) { - Impl::HIPInternal::singleton().print_configuration(s); +void HIP::print_configuration(std::ostream& os, bool /*verbose*/) const { + os << "Device Execution Space:\n"; + os << " KOKKOS_ENABLE_HIP: yes\n"; + + os << "HIP Options:\n"; + os << " KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE: "; +#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE + os << "yes\n"; +#else + os << "no\n"; +#endif + + os << "\nRuntime Configuration:\n"; + + m_space_instance->print_configuration(os); } uint32_t HIP::impl_instance_id() const noexcept { @@ -386,16 +574,10 @@ void HIP::impl_static_fence(const std::string& name) { GlobalDeviceSynchronization, [&]() { KOKKOS_IMPL_HIP_SAFE_CALL(hipDeviceSynchronize()); }); } -void HIP::impl_static_fence() { - impl_static_fence("Kokkos::HIP::impl_static_fence: Unnamed Static Fence"); -} void HIP::fence(const std::string& name) const { m_space_instance->fence(name); } -void HIP::fence() const { - fence("Kokkos::HIP::fence(): Unnamed Instance Fence"); -} hipStream_t HIP::hip_stream() const { return m_space_instance->m_stream; } @@ -412,56 +594,7 @@ const char* HIP::name() { return "HIP"; } namespace Impl { int g_hip_space_factory_initialized = - initialize_space_factory<HIPSpaceInitializer>("150_HIP"); - -void HIPSpaceInitializer::initialize(const InitArguments& args) { - int use_gpu = Impl::get_gpu(args); - - if (std::is_same<Kokkos::Experimental::HIP, - Kokkos::DefaultExecutionSpace>::value || - 0 < use_gpu) { - if (use_gpu > -1) { - Kokkos::Experimental::HIP::impl_initialize( - Kokkos::Experimental::HIP::SelectDevice(use_gpu)); - } else { - Kokkos::Experimental::HIP::impl_initialize(); - } - } -} - -void HIPSpaceInitializer::finalize(const bool all_spaces) { - if (std::is_same<Kokkos::Experimental::HIP, - Kokkos::DefaultExecutionSpace>::value || - all_spaces) { - if (Kokkos::Experimental::HIP::impl_is_initialized()) - Kokkos::Experimental::HIP::impl_finalize(); - } -} - -void HIPSpaceInitializer::fence() { - Kokkos::Experimental::HIP::impl_static_fence(); -} -void HIPSpaceInitializer::fence(const std::string& name) { - Kokkos::Experimental::HIP::impl_static_fence(name); -} - -void HIPSpaceInitializer::print_configuration(std::ostream& msg, - const bool detail) { - msg << "Devices:" << std::endl; - msg << " KOKKOS_ENABLE_HIP: "; - msg << "yes" << std::endl; - - msg << "HIP Options:" << std::endl; - msg << " KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE: "; -#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - - msg << "\nRuntime Configuration:" << std::endl; - Experimental::HIP::print_configuration(msg, detail); -} + initialize_space_factory<::Kokkos::Experimental::HIP>("150_HIP"); } // namespace Impl @@ -491,6 +624,8 @@ template class HostInaccessibleSharedAllocationRecordCommon< template class SharedAllocationRecordCommon<Kokkos::Experimental::HIPSpace>; template class SharedAllocationRecordCommon< Kokkos::Experimental::HIPHostPinnedSpace>; +template class SharedAllocationRecordCommon< + Kokkos::Experimental::HIPManagedSpace>; } // end namespace Impl } // end namespace Kokkos diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp index fb67a25c5e7f5e3b0a48118ffe14372f0b1cd2dc..9ddfa5f65106dd6409d4502c27d0599bb4ba5c61 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp @@ -69,8 +69,7 @@ struct HIPJoinFunctor { using value_type = Type; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - volatile const value_type& input) { + static void join(value_type& update, const value_type& input) { update += input; } }; @@ -199,19 +198,21 @@ class HIPTeamMember { * ( 1 == blockDim.z ) */ template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer) const noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer) const noexcept { team_reduce(reducer, reducer.reference()); } template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer, - typename ReducerType::value_type& value) const noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) const noexcept { #ifdef __HIP_DEVICE_COMPILE__ - hip_intra_block_shuffle_reduction(reducer, value, blockDim.y); + typename Kokkos::Impl::FunctorAnalysis< + FunctorPatternInterface::REDUCE, TeamPolicy<Experimental::HIP>, + ReducerType>::Reducer wrapped_reducer(&reducer); + hip_intra_block_shuffle_reduction(value, wrapped_reducer, blockDim.y); + reducer.reference() = value; #else (void)reducer; (void)value; @@ -243,8 +244,11 @@ class HIPTeamMember { base_data[threadIdx.y + 1] = value; - Impl::hip_intra_block_reduce_scan<true, Impl::HIPJoinFunctor<Type>, void>( - Impl::HIPJoinFunctor<Type>(), base_data + 1); + Impl::HIPJoinFunctor<Type> hip_join_functor; + typename Kokkos::Impl::FunctorAnalysis< + FunctorPatternInterface::REDUCE, TeamPolicy<Experimental::HIP>, + Impl::HIPJoinFunctor<Type>>::Reducer reducer(&hip_join_functor); + Impl::hip_intra_block_reduce_scan<true>(reducer, base_data + 1); if (global_accum) { if (blockDim.y == threadIdx.y + 1) { @@ -276,17 +280,15 @@ class HIPTeamMember { //---------------------------------------- template <typename ReducerType> - KOKKOS_INLINE_FUNCTION static - typename std::enable_if<is_reducer<ReducerType>::value>::type - vector_reduce(ReducerType const& reducer) { + KOKKOS_INLINE_FUNCTION static std::enable_if_t<is_reducer<ReducerType>::value> + vector_reduce(ReducerType const& reducer) { vector_reduce(reducer, reducer.reference()); } template <typename ReducerType> - KOKKOS_INLINE_FUNCTION static - typename std::enable_if<is_reducer<ReducerType>::value>::type - vector_reduce(ReducerType const& reducer, - typename ReducerType::value_type& value) { + KOKKOS_INLINE_FUNCTION static std::enable_if_t<is_reducer<ReducerType>::value> + vector_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) { #ifdef __HIP_DEVICE_COMPILE__ if (blockDim.x == 1) return; @@ -320,9 +322,10 @@ class HIPTeamMember { // Private for the driver KOKKOS_INLINE_FUNCTION - HIPTeamMember(void* shared, const int shared_begin, const int shared_size, - void* scratch_level_1_ptr, const int scratch_level_1_size, - const int arg_league_rank, const int arg_league_size) + HIPTeamMember(void* shared, const size_t shared_begin, + const size_t shared_size, void* scratch_level_1_ptr, + const size_t scratch_level_1_size, const int arg_league_rank, + const int arg_league_size) : m_team_reduce(shared), m_team_shared(((char*)shared) + shared_begin, shared_size, scratch_level_1_ptr, scratch_level_1_size), @@ -419,9 +422,9 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::HIPTeamMember> + std::common_type_t<iType1, iType2>, Impl::HIPTeamMember> TeamThreadRange(const Impl::HIPTeamMember& thread, iType1 begin, iType2 end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::HIPTeamMember>( thread, iType(begin), iType(end)); } @@ -436,10 +439,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::HIPTeamMember> + std::common_type_t<iType1, iType2>, Impl::HIPTeamMember> TeamVectorRange(const Impl::HIPTeamMember& thread, const iType1& begin, const iType2& end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamVectorRangeBoundariesStruct<iType, Impl::HIPTeamMember>( thread, iType(begin), iType(end)); } @@ -454,10 +457,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::HIPTeamMember> + std::common_type_t<iType1, iType2>, Impl::HIPTeamMember> ThreadVectorRange(const Impl::HIPTeamMember& thread, iType1 arg_begin, iType2 arg_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HIPTeamMember>( thread, iType(arg_begin), iType(arg_end)); } @@ -508,11 +511,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * performed and put into result. */ template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::HIPTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { #ifdef __HIP_DEVICE_COMPILE__ typename ReducerType::value_type value; reducer.init(value); @@ -539,11 +541,10 @@ KOKKOS_INLINE_FUNCTION * performed and put into result. */ template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::HIPTeamMember>& loop_boundaries, - const Closure& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { #ifdef __HIP_DEVICE_COMPILE__ ValueType val; Kokkos::Sum<ValueType> reducer(val); @@ -626,11 +627,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( } template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::HIPTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { #ifdef __HIP_DEVICE_COMPILE__ typename ReducerType::value_type value; reducer.init(value); @@ -650,11 +650,10 @@ KOKKOS_INLINE_FUNCTION } template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::HIPTeamMember>& loop_boundaries, - const Closure& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { #ifdef __HIP_DEVICE_COMPILE__ ValueType val; Kokkos::Sum<ValueType> reducer(val); @@ -714,11 +713,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * constructed value. */ template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::HIPTeamMember> const& loop_boundaries, - Closure const& closure, ReducerType const& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> +parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember> const& loop_boundaries, + Closure const& closure, ReducerType const& reducer) { #ifdef __HIP_DEVICE_COMPILE__ reducer.init(reducer.reference()); @@ -747,11 +745,10 @@ KOKKOS_INLINE_FUNCTION * constructed value. */ template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!is_reducer<ValueType>::value>::type - parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::HIPTeamMember> const& loop_boundaries, - Closure const& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!is_reducer<ValueType>::value> +parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember> const& loop_boundaries, + Closure const& closure, ValueType& result) { #ifdef __HIP_DEVICE_COMPILE__ result = ValueType(); @@ -779,11 +776,10 @@ KOKKOS_INLINE_FUNCTION * The last call to closure has final == true. */ template <typename iType, class Closure, typename ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::HIPTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { #ifdef __HIP_DEVICE_COMPILE__ using value_type = typename ReducerType::value_type; value_type accum; diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp index 99f61ed36f9ceece136595157d36abffaab82555..a0722f618b4559b759e538b9676ea0d4661a6f6f 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_UniqueToken.hpp @@ -103,8 +103,7 @@ class UniqueToken<HIP, UniqueTokenScope::Global> { size_type size() const noexcept { return m_locks.extent(0); } private: - // FIXME_HIP - KOKKOS_INLINE_FUNCTION size_type impl_acquire() const { + __device__ size_type impl_acquire() const { int idx = blockIdx.x * (blockDim.x * blockDim.y) + threadIdx.y * blockDim.x + threadIdx.x; idx = idx % size(); diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp index a6c65ee5eb08c8fe308adc7bbe70158e440dfcc9..18b5f57c2254e3791b9100fdb9640e5713b61cbd 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Vectorization.hpp @@ -68,8 +68,8 @@ struct in_place_shfl_op { // sizeof(Scalar) < sizeof(int) case template <class Scalar> // requires _assignable_from_bits<Scalar> - __device__ inline typename std::enable_if<sizeof(Scalar) < sizeof(int)>::type - operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width) const + __device__ inline std::enable_if_t<sizeof(Scalar) < sizeof(int)> operator()( + Scalar& out, Scalar const& in, int lane_or_delta, int width) const noexcept { using shfl_type = int; union conv_type { @@ -93,28 +93,26 @@ struct in_place_shfl_op { // sizeof(Scalar) == sizeof(int) case template <class Scalar> // requires _assignable_from_bits<Scalar> - __device__ inline typename std::enable_if<sizeof(Scalar) == sizeof(int)>::type - operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width) const + __device__ inline std::enable_if_t<sizeof(Scalar) == sizeof(int)> operator()( + Scalar& out, Scalar const& in, int lane_or_delta, int width) const noexcept { reinterpret_cast<int&>(out) = self().do_shfl_op( reinterpret_cast<int const&>(in), lane_or_delta, width); } template <class Scalar> - __device__ inline - typename std::enable_if<sizeof(Scalar) == sizeof(double)>::type - operator()(Scalar& out, Scalar const& in, int lane_or_delta, - int width) const noexcept { + __device__ inline std::enable_if_t<sizeof(Scalar) == sizeof(double)> + operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width) const + noexcept { reinterpret_cast<double&>(out) = self().do_shfl_op( *reinterpret_cast<double const*>(&in), lane_or_delta, width); } // sizeof(Scalar) > sizeof(double) case template <typename Scalar> - __device__ inline - typename std::enable_if<(sizeof(Scalar) > sizeof(double))>::type - operator()(Scalar& out, const Scalar& val, int lane_or_delta, - int width) const noexcept { + __device__ inline std::enable_if_t<(sizeof(Scalar) > sizeof(double))> + operator()(Scalar& out, const Scalar& val, int lane_or_delta, int width) const + noexcept { using shuffle_as_t = int; int constexpr N = sizeof(Scalar) / sizeof(shuffle_as_t); diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp index 3e053d8f14a5ce5211ac6687851c6dd807c56d94..081f6f4047ea3a4ae34f4f027133f100f2752783 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp @@ -62,16 +62,14 @@ class ParallelFor<FunctorType, Kokkos::WorkGraphPolicy<Traits...>, FunctorType m_functor; template <class TagType> - __device__ inline - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_one(const std::int32_t w) const noexcept { + __device__ inline std::enable_if_t<std::is_void<TagType>::value> exec_one( + const std::int32_t w) const noexcept { m_functor(w); } template <class TagType> - __device__ inline - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_one(const std::int32_t w) const noexcept { + __device__ inline std::enable_if_t<!std::is_void<TagType>::value> exec_one( + const std::int32_t w) const noexcept { const TagType t{}; m_functor(t, w); } diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp b/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp index 623c7da02569c4763f5d830fea9f2b7cf75855ff..6027ead01bda8194d8d0ea95b9f6e82d4ce0d0dd 100644 --- a/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp +++ b/packages/kokkos/core/src/HPX/Kokkos_HPX.cpp @@ -42,11 +42,17 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> #ifdef KOKKOS_ENABLE_HPX #include <Kokkos_HPX.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> + #include <hpx/local/condition_variable.hpp> #include <hpx/local/init.hpp> #include <hpx/local/thread.hpp> @@ -87,26 +93,7 @@ int HPX::concurrency() { } } -void HPX::impl_initialize(int thread_count) { - hpx::runtime *rt = hpx::get_runtime_ptr(); - if (rt == nullptr) { - hpx::local::init_params i; - i.cfg = { - "hpx.os_threads=" + std::to_string(thread_count), -#ifdef KOKKOS_ENABLE_DEBUG - "--hpx:attach-debugger=exception", -#endif - }; - int argc_hpx = 1; - char name[] = "kokkos_hpx"; - char *argv_hpx[] = {name, nullptr}; - hpx::local::start(nullptr, argc_hpx, argv_hpx, i); - - m_hpx_initialized = true; - } -} - -void HPX::impl_initialize() { +void HPX::impl_initialize(InitializationSettings const &settings) { hpx::runtime *rt = hpx::get_runtime_ptr(); if (rt == nullptr) { hpx::local::init_params i; @@ -115,6 +102,10 @@ void HPX::impl_initialize() { "--hpx:attach-debugger=exception", #endif }; + if (settings.has_num_threads()) { + i.cfg.emplace_back("hpx.os_threads=" + + std::to_string(settings.get_num_threads())); + } int argc_hpx = 1; char name[] = "kokkos_hpx"; char *argv_hpx[] = {name, nullptr}; @@ -148,55 +139,7 @@ void HPX::impl_finalize() { namespace Impl { int g_hpx_space_factory_initialized = - initialize_space_factory<HPXSpaceInitializer>("060_HPX"); - -void HPXSpaceInitializer::initialize(const InitArguments &args) { - const int num_threads = args.num_threads; - - if (std::is_same<Kokkos::Experimental::HPX, - Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::Experimental::HPX, - Kokkos::HostSpace::execution_space>::value) { - if (num_threads > 0) { - Kokkos::Experimental::HPX::impl_initialize(num_threads); - } else { - Kokkos::Experimental::HPX::impl_initialize(); - } - // std::cout << "Kokkos::initialize() fyi: HPX enabled and initialized" << - // std::endl ; - } else { - // std::cout << "Kokkos::initialize() fyi: HPX enabled but not initialized" - // << std::endl ; - } -} - -void HPXSpaceInitializer::finalize(const bool all_spaces) { - if (std::is_same<Kokkos::Experimental::HPX, - Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::Experimental::HPX, - Kokkos::HostSpace::execution_space>::value || - all_spaces) { - if (Kokkos::Experimental::HPX::impl_is_initialized()) - Kokkos::Experimental::HPX::impl_finalize(); - } -} - -void HPXSpaceInitializer::fence(const std::string &name) { - Kokkos::Experimental::HPX::impl_fence_global(name); -} -void HPXSpaceInitializer::fence() { - Kokkos::Experimental::HPX::impl_fence_global(); -} - -void HPXSpaceInitializer::print_configuration(std::ostream &msg, - const bool detail) { - msg << "HPX Execution Space:" << std::endl; - msg << " KOKKOS_ENABLE_HPX: "; - msg << "yes" << std::endl; - - msg << "\nHPX Runtime Configuration:" << std::endl; - Kokkos::Experimental::HPX::print_configuration(msg, detail); -} + initialize_space_factory<Kokkos::Experimental::HPX>("060_HPX"); } // namespace Impl diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp b/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp index 8d42589bdf37b0b5557aed6631de851276a5f5c4..e61ac728a90dd86eee5b7fb4965349b2307a680f 100644 --- a/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp +++ b/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #if defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_TASKDAG) diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp b/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp index d77b1c2c748a272a288ff5059a6fecd89312a965..67765a6ae0f6c3482e6410cad15e2c7571dde658 100644 --- a/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp +++ b/packages/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp @@ -157,9 +157,9 @@ class TaskQueueSpecialization< template <class Scheduler> class TaskQueueSpecializationConstrained< - Scheduler, typename std::enable_if< - std::is_same<typename Scheduler::execution_space, - Kokkos::Experimental::HPX>::value>::type> { + Scheduler, + std::enable_if_t<std::is_same<typename Scheduler::execution_space, + Kokkos::Experimental::HPX>::value>> { public: using execution_space = Kokkos::Experimental::HPX; using scheduler_type = Scheduler; diff --git a/packages/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp b/packages/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp index a3d4a6a60441827f82fff27f49daf63d463fa556..5f2eff5774c0a5a73b323c2eb6a936aea1796db3 100644 --- a/packages/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp +++ b/packages/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp @@ -64,14 +64,14 @@ class ParallelFor<FunctorType, Kokkos::WorkGraphPolicy<Traits...>, FunctorType m_functor; template <class TagType> - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor(const std::int32_t w) const noexcept { + std::enable_if_t<std::is_void<TagType>::value> execute_functor( + const std::int32_t w) const noexcept { m_functor(w); } template <class TagType> - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor(const std::int32_t w) const noexcept { + std::enable_if_t<!std::is_void<TagType>::value> execute_functor( + const std::int32_t w) const noexcept { const TagType t{}; m_functor(t, w); } diff --git a/packages/kokkos/core/src/KokkosExp_InterOp.hpp b/packages/kokkos/core/src/KokkosExp_InterOp.hpp index 37c2088f88f08758f5f1585b7138f43dd73d54eb..0522ad7e8da8990c4378dc0830a5f4be3d0e5e1f 100644 --- a/packages/kokkos/core/src/KokkosExp_InterOp.hpp +++ b/packages/kokkos/core/src/KokkosExp_InterOp.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_CORE_EXP_INTEROP_HPP #define KOKKOS_CORE_EXP_INTEROP_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_INTEROP +#endif #include <Kokkos_Core_fwd.hpp> #include <Kokkos_Layout.hpp> @@ -144,4 +148,8 @@ auto as_python_type(Tp&& _v) { } // namespace Experimental } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_INTEROP +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_INTEROP +#endif #endif diff --git a/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp index dfae7451fc302362743c9349485ee574a15a2d76..64b31c7fea5d5cdefd5a91369c9144b3973be050 100644 --- a/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp +++ b/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP @@ -190,7 +199,7 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits<Properties...> { template <class... OtherProperties> friend struct MDRangePolicy; - static_assert(!std::is_same<typename traits::iteration_pattern, void>::value, + static_assert(!std::is_void<typename traits::iteration_pattern>::value, "Kokkos Error: MD iteration pattern not defined"); using iteration_pattern = typename traits::iteration_pattern; diff --git a/packages/kokkos/core/src/Kokkos_AcquireUniqueTokenImpl.hpp b/packages/kokkos/core/src/Kokkos_AcquireUniqueTokenImpl.hpp index d6227b7bcf8c8b91516d169cc90ca5c3cf87539a..4a22aedd8c1c6a7dad41eb9810bc5ef3f9f8afef 100644 --- a/packages/kokkos/core/src/Kokkos_AcquireUniqueTokenImpl.hpp +++ b/packages/kokkos/core/src/Kokkos_AcquireUniqueTokenImpl.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_ACQUIRE_UNIQUE_TOKEN_IMPL_HPP #define KOKKOS_ACQUIRE_UNIQUE_TOKEN_IMPL_HPP diff --git a/packages/kokkos/core/src/Kokkos_AnonymousSpace.hpp b/packages/kokkos/core/src/Kokkos_AnonymousSpace.hpp index fb94049d7ad7ed588b00cc1f9351162de32f08e5..6eed92be0480d0974d5433ab8383bbcb347d774f 100644 --- a/packages/kokkos/core/src/Kokkos_AnonymousSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_AnonymousSpace.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_ANONYMOUSSPACE_HPP #define KOKKOS_ANONYMOUSSPACE_HPP diff --git a/packages/kokkos/core/src/Kokkos_Array.hpp b/packages/kokkos/core/src/Kokkos_Array.hpp index d2098d0b1a8c17eb492e812cc040b0e858eea9c4..e7fec4c449787b0018061120ee37387637e5eee1 100644 --- a/packages/kokkos/core/src/Kokkos_Array.hpp +++ b/packages/kokkos/core/src/Kokkos_Array.hpp @@ -44,15 +44,20 @@ #ifndef KOKKOS_ARRAY_HPP #define KOKKOS_ARRAY_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ARRAY +#endif #include <Kokkos_Macros.hpp> #include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_StringManipulation.hpp> #include <type_traits> #include <algorithm> +#include <utility> #include <limits> #include <cstddef> -#include <string> namespace Kokkos { @@ -64,14 +69,12 @@ struct ArrayBoundsCheck; template <typename Integral> struct ArrayBoundsCheck<Integral, true> { KOKKOS_INLINE_FUNCTION - ArrayBoundsCheck(Integral i, size_t N) { + constexpr ArrayBoundsCheck(Integral i, size_t N) { if (i < 0) { - KOKKOS_IF_ON_HOST((std::string s = "Kokkos::Array: index "; - s += std::to_string(i); s += " < 0"; - Kokkos::Impl::throw_runtime_exception(s);)) - - KOKKOS_IF_ON_DEVICE( - (Kokkos::abort("Kokkos::Array: negative index in device code");)) + char err[128] = "Kokkos::Array: index "; + to_chars_i(err + strlen(err), err + 128, i); + strcat(err, " < 0"); + Kokkos::abort(err); } ArrayBoundsCheck<Integral, false>(i, N); } @@ -80,14 +83,13 @@ struct ArrayBoundsCheck<Integral, true> { template <typename Integral> struct ArrayBoundsCheck<Integral, false> { KOKKOS_INLINE_FUNCTION - ArrayBoundsCheck(Integral i, size_t N) { + constexpr ArrayBoundsCheck(Integral i, size_t N) { if (size_t(i) >= N) { - KOKKOS_IF_ON_HOST((std::string s = "Kokkos::Array: index "; - s += std::to_string(i); s += " >= "; - s += std::to_string(N); - Kokkos::Impl::throw_runtime_exception(s);)) - - KOKKOS_IF_ON_DEVICE((Kokkos::abort("Kokkos::Array: index >= size");)) + char err[128] = "Kokkos::Array: index "; + to_chars_i(err + strlen(err), err + 128, i); + strcat(err, " >= "); + to_chars_i(err + strlen(err), err + 128, N); + Kokkos::abort(err); } } }; @@ -118,19 +120,19 @@ struct Array { public: using reference = T&; - using const_reference = typename std::add_const<T>::type&; + using const_reference = std::add_const_t<T>&; using size_type = size_t; using difference_type = ptrdiff_t; using value_type = T; using pointer = T*; - using const_pointer = typename std::add_const<T>::type*; + using const_pointer = std::add_const_t<T>*; KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return N; } KOKKOS_INLINE_FUNCTION static constexpr bool empty() { return false; } KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return N; } template <typename iType> - KOKKOS_INLINE_FUNCTION reference operator[](const iType& i) { + KOKKOS_INLINE_FUNCTION constexpr reference operator[](const iType& i) { static_assert( (std::is_integral<iType>::value || std::is_enum<iType>::value), "Must be integral argument"); @@ -139,7 +141,8 @@ struct Array { } template <typename iType> - KOKKOS_INLINE_FUNCTION const_reference operator[](const iType& i) const { + KOKKOS_INLINE_FUNCTION constexpr const_reference operator[]( + const iType& i) const { static_assert( (std::is_integral<iType>::value || std::is_enum<iType>::value), "Must be integral argument"); @@ -147,10 +150,10 @@ struct Array { return m_internal_implementation_private_member_data[i]; } - KOKKOS_INLINE_FUNCTION pointer data() { + KOKKOS_INLINE_FUNCTION constexpr pointer data() { return &m_internal_implementation_private_member_data[0]; } - KOKKOS_INLINE_FUNCTION const_pointer data() const { + KOKKOS_INLINE_FUNCTION constexpr const_pointer data() const { return &m_internal_implementation_private_member_data[0]; } }; @@ -159,12 +162,12 @@ template <class T, class Proxy> struct Array<T, 0, Proxy> { public: using reference = T&; - using const_reference = typename std::add_const<T>::type&; + using const_reference = std::add_const_t<T>&; using size_type = size_t; using difference_type = ptrdiff_t; using value_type = T; using pointer = T*; - using const_pointer = typename std::add_const<T>::type*; + using const_pointer = std::add_const_t<T>*; KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return 0; } KOKKOS_INLINE_FUNCTION static constexpr bool empty() { return true; } @@ -216,12 +219,12 @@ struct Array<T, KOKKOS_INVALID_INDEX, Array<>::contiguous> { public: using reference = T&; - using const_reference = typename std::add_const<T>::type&; + using const_reference = std::add_const_t<T>&; using size_type = size_t; using difference_type = ptrdiff_t; using value_type = T; using pointer = T*; - using const_pointer = typename std::add_const<T>::type*; + using const_pointer = std::add_const_t<T>*; KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size; } KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size; } @@ -285,12 +288,12 @@ struct Array<T, KOKKOS_INVALID_INDEX, Array<>::strided> { public: using reference = T&; - using const_reference = typename std::add_const<T>::type&; + using const_reference = std::add_const_t<T>&; using size_type = size_t; using difference_type = ptrdiff_t; using value_type = T; using pointer = T*; - using const_pointer = typename std::add_const<T>::type*; + using const_pointer = std::add_const_t<T>*; KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size; } KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size; } @@ -347,4 +350,54 @@ struct Array<T, KOKKOS_INVALID_INDEX, Array<>::strided> { } // namespace Kokkos +//<editor-fold desc="Support for structured binding"> +// guarding against bogus error 'specialization in different namespace' with +// older GCC that do not support C++17 anyway +#if !defined(KOKKOS_COMPILER_GNU) || (KOKKOS_COMPILER_GNU >= 710) +#if defined(KOKKOS_COMPILER_CLANG) && KOKKOS_COMPILER_CLANG < 800 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmismatched-tags" +#endif +template <class T, std::size_t N> +struct std::tuple_size<Kokkos::Array<T, N>> + : std::integral_constant<std::size_t, N> {}; + +template <std::size_t I, class T, std::size_t N> +struct std::tuple_element<I, Kokkos::Array<T, N>> { + using type = T; +}; +#if defined(KOKKOS_COMPILER_CLANG) && KOKKOS_COMPILER_CLANG < 800 +#pragma clang diagnostic pop +#endif +#endif + +namespace Kokkos { + +template <std::size_t I, class T, std::size_t N> +KOKKOS_FUNCTION constexpr T& get(Array<T, N>& a) noexcept { + return a[I]; +} + +template <std::size_t I, class T, std::size_t N> +KOKKOS_FUNCTION constexpr T const& get(Array<T, N> const& a) noexcept { + return a[I]; +} + +template <std::size_t I, class T, std::size_t N> +KOKKOS_FUNCTION constexpr T&& get(Array<T, N>&& a) noexcept { + return std::move(a[I]); +} + +template <std::size_t I, class T, std::size_t N> +KOKKOS_FUNCTION constexpr T const&& get(Array<T, N> const&& a) noexcept { + return std::move(a[I]); +} + +} // namespace Kokkos +//</editor-fold> + +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ARRAY +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ARRAY +#endif #endif /* #ifndef KOKKOS_ARRAY_HPP */ diff --git a/packages/kokkos/core/src/Kokkos_Atomic.hpp b/packages/kokkos/core/src/Kokkos_Atomic.hpp index b07b5f2f60796aa3ad10bf395e5a2658a9aaff7e..7a2d1c662c62a25a6c50853e6c2212e7f7b8211a 100644 --- a/packages/kokkos/core/src/Kokkos_Atomic.hpp +++ b/packages/kokkos/core/src/Kokkos_Atomic.hpp @@ -67,6 +67,10 @@ #ifndef KOKKOS_ATOMIC_HPP #define KOKKOS_ATOMIC_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ATOMIC +#endif #include <Kokkos_Macros.hpp> @@ -414,4 +418,8 @@ KOKKOS_INLINE_FUNCTION T desul_atomic_compare_exchange( } // namespace Kokkos #endif /* !KOKKOS_ENABLE_IMPL_DESUL_ATOMICS */ +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ATOMIC +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_ATOMIC +#endif #endif /* KOKKOS_ATOMIC_HPP */ diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp index 81ae34b9e03d3843c9c657ce3b2fccec60b70c9b..ef576d74c3cdfa8d75c564454ff6697a59cdf049 100644 --- a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp +++ b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp @@ -41,6 +41,15 @@ // ************************************************************************ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_ATOMICS_DESUL_CONFIG_HPP #define KOKKOS_ATOMICS_DESUL_CONFIG_HPP diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp index d2bc9df89282989469cfe408d86e087decd0f44e..b202ab8f83d49447e82d5bc673e80da6ca88080a 100644 --- a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp +++ b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp @@ -1,3 +1,13 @@ + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_ #define KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_ #include <Kokkos_Macros.hpp> diff --git a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp index 939cf950b57932909a2cc2b42473429acc7c3d3c..ed7e8d9ede8508f3ed19682e5843945c85770e48 100644 --- a/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp +++ b/packages/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp @@ -1,3 +1,13 @@ + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_ #define KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_ #include <Kokkos_Macros.hpp> diff --git a/packages/kokkos/core/src/Kokkos_Complex.hpp b/packages/kokkos/core/src/Kokkos_Complex.hpp index 466903ab7d6626c0cd7ff97754594cc17933367e..009c73c900c41ccf506a0d0988e49fd9e303cbb2 100644 --- a/packages/kokkos/core/src/Kokkos_Complex.hpp +++ b/packages/kokkos/core/src/Kokkos_Complex.hpp @@ -43,6 +43,10 @@ */ #ifndef KOKKOS_COMPLEX_HPP #define KOKKOS_COMPLEX_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_COMPLEX +#endif #include <Kokkos_Atomic.hpp> #include <Kokkos_MathematicalFunctions.hpp> @@ -87,9 +91,9 @@ class complex& operator=(const complex&) noexcept = default; /// \brief Conversion constructor from compatible RType - template <class RType, - typename std::enable_if<std::is_convertible<RType, RealType>::value, - int>::type = 0> + template < + class RType, + std::enable_if_t<std::is_convertible<RType, RealType>::value, int> = 0> KOKKOS_INLINE_FUNCTION complex(const complex<RType>& other) noexcept // Intentionally do the conversions implicitly here so that users don't // get any warnings about narrowing, etc., that they would expect to get @@ -217,7 +221,6 @@ class // Conditional noexcept, just in case RType throws on divide-by-zero constexpr KOKKOS_INLINE_FUNCTION complex& operator/=( const complex<RealType>& y) noexcept(noexcept(RealType{} / RealType{})) { - using Kokkos::Experimental::fabs; // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. @@ -245,7 +248,6 @@ class constexpr KOKKOS_INLINE_FUNCTION complex& operator/=( const std::complex<RealType>& y) noexcept(noexcept(RealType{} / RealType{})) { - using Kokkos::Experimental::fabs; // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. @@ -282,9 +284,9 @@ class //--------------------------------------------------------------------------- //! Copy constructor from volatile. - template <class RType, - typename std::enable_if<std::is_convertible<RType, RealType>::value, - int>::type = 0> + template < + class RType, + std::enable_if_t<std::is_convertible<RType, RealType>::value, int> = 0> KOKKOS_INLINE_FUNCTION complex(const volatile complex<RType>& src) noexcept // Intentionally do the conversions implicitly here so that users don't // get any warnings about narrowing, etc., that they would expect to get @@ -312,8 +314,7 @@ class // vl = r; // vl = cr; template <class Complex, - typename std::enable_if<std::is_same<Complex, complex>::value, - int>::type = 0> + std::enable_if_t<std::is_same<Complex, complex>::value, int> = 0> KOKKOS_INLINE_FUNCTION void operator=(const Complex& src) volatile noexcept { re_ = src.re_; im_ = src.im_; @@ -335,8 +336,7 @@ class // vl = vr; // vl = cvr; template <class Complex, - typename std::enable_if<std::is_same<Complex, complex>::value, - int>::type = 0> + std::enable_if_t<std::is_same<Complex, complex>::value, int> = 0> KOKKOS_INLINE_FUNCTION volatile complex& operator=( const volatile Complex& src) volatile noexcept { re_ = src.re_; @@ -358,8 +358,7 @@ class // l = cvr; // template <class Complex, - typename std::enable_if<std::is_same<Complex, complex>::value, - int>::type = 0> + std::enable_if_t<std::is_same<Complex, complex>::value, int> = 0> KOKKOS_INLINE_FUNCTION complex& operator=( const volatile Complex& src) noexcept { re_ = src.re_; @@ -451,7 +450,7 @@ class template <class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION bool operator==(complex<RealType1> const& x, complex<RealType2> const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x.real()) == common_type(y.real()) && common_type(x.imag()) == common_type(y.imag()); } @@ -462,7 +461,7 @@ KOKKOS_INLINE_FUNCTION bool operator==(complex<RealType1> const& x, template <class RealType1, class RealType2> inline bool operator==(std::complex<RealType1> const& x, complex<RealType2> const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x.real()) == common_type(y.real()) && common_type(x.imag()) == common_type(y.imag()); } @@ -471,7 +470,7 @@ inline bool operator==(std::complex<RealType1> const& x, template <class RealType1, class RealType2> inline bool operator==(complex<RealType1> const& x, std::complex<RealType2> const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x.real()) == common_type(y.real()) && common_type(x.imag()) == common_type(y.imag()); } @@ -480,11 +479,10 @@ inline bool operator==(complex<RealType1> const& x, template < class RealType1, class RealType2, // Constraints to avoid participation in oparator==() for every possible RHS - typename std::enable_if<std::is_convertible<RealType2, RealType1>::value, - int>::type = 0> + std::enable_if_t<std::is_convertible<RealType2, RealType1>::value, int> = 0> KOKKOS_INLINE_FUNCTION bool operator==(complex<RealType1> const& x, RealType2 const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x.real()) == common_type(y) && common_type(x.imag()) == common_type(0); } @@ -493,11 +491,10 @@ KOKKOS_INLINE_FUNCTION bool operator==(complex<RealType1> const& x, template < class RealType1, class RealType2, // Constraints to avoid participation in oparator==() for every possible RHS - typename std::enable_if<std::is_convertible<RealType1, RealType2>::value, - int>::type = 0> + std::enable_if_t<std::is_convertible<RealType1, RealType2>::value, int> = 0> KOKKOS_INLINE_FUNCTION bool operator==(RealType1 const& x, complex<RealType2> const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x) == common_type(y.real()) && common_type(0) == common_type(y.imag()); } @@ -506,7 +503,7 @@ KOKKOS_INLINE_FUNCTION bool operator==(RealType1 const& x, template <class RealType1, class RealType2> KOKKOS_INLINE_FUNCTION bool operator!=(complex<RealType1> const& x, complex<RealType2> const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x.real()) != common_type(y.real()) || common_type(x.imag()) != common_type(y.imag()); } @@ -515,7 +512,7 @@ KOKKOS_INLINE_FUNCTION bool operator!=(complex<RealType1> const& x, template <class RealType1, class RealType2> inline bool operator!=(std::complex<RealType1> const& x, complex<RealType2> const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x.real()) != common_type(y.real()) || common_type(x.imag()) != common_type(y.imag()); } @@ -524,7 +521,7 @@ inline bool operator!=(std::complex<RealType1> const& x, template <class RealType1, class RealType2> inline bool operator!=(complex<RealType1> const& x, std::complex<RealType2> const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x.real()) != common_type(y.real()) || common_type(x.imag()) != common_type(y.imag()); } @@ -533,11 +530,10 @@ inline bool operator!=(complex<RealType1> const& x, template < class RealType1, class RealType2, // Constraints to avoid participation in oparator==() for every possible RHS - typename std::enable_if<std::is_convertible<RealType2, RealType1>::value, - int>::type = 0> + std::enable_if_t<std::is_convertible<RealType2, RealType1>::value, int> = 0> KOKKOS_INLINE_FUNCTION bool operator!=(complex<RealType1> const& x, RealType2 const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x.real()) != common_type(y) || common_type(x.imag()) != common_type(0); } @@ -546,11 +542,10 @@ KOKKOS_INLINE_FUNCTION bool operator!=(complex<RealType1> const& x, template < class RealType1, class RealType2, // Constraints to avoid participation in oparator==() for every possible RHS - typename std::enable_if<std::is_convertible<RealType1, RealType2>::value, - int>::type = 0> + std::enable_if_t<std::is_convertible<RealType1, RealType2>::value, int> = 0> KOKKOS_INLINE_FUNCTION bool operator!=(RealType1 const& x, complex<RealType2> const& y) noexcept { - using common_type = typename std::common_type<RealType1, RealType2>::type; + using common_type = std::common_type_t<RealType1, RealType2>; return common_type(x) != common_type(y.real()) || common_type(0) != common_type(y.imag()); } @@ -560,30 +555,26 @@ KOKKOS_INLINE_FUNCTION bool operator!=(RealType1 const& x, //! Binary + operator for complex complex. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator+(const complex<RealType1>& x, - const complex<RealType2>& y) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( - x.real() + y.real(), x.imag() + y.imag()); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator+(const complex<RealType1>& x, const complex<RealType2>& y) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>(x.real() + y.real(), + x.imag() + y.imag()); } //! Binary + operator for complex scalar. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator+(const complex<RealType1>& x, const RealType2& y) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( - x.real() + y, x.imag()); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator+(const complex<RealType1>& x, const RealType2& y) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>(x.real() + y, + x.imag()); } //! Binary + operator for scalar complex. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator+(const RealType1& x, const complex<RealType2>& y) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( - x + y.real(), y.imag()); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator+(const RealType1& x, const complex<RealType2>& y) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>(x + y.real(), + y.imag()); } //! Unary + operator for complex. @@ -595,30 +586,26 @@ KOKKOS_INLINE_FUNCTION complex<RealType> operator+( //! Binary - operator for complex. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator-(const complex<RealType1>& x, - const complex<RealType2>& y) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( - x.real() - y.real(), x.imag() - y.imag()); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator-(const complex<RealType1>& x, const complex<RealType2>& y) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>(x.real() - y.real(), + x.imag() - y.imag()); } //! Binary - operator for complex scalar. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator-(const complex<RealType1>& x, const RealType2& y) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( - x.real() - y, x.imag()); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator-(const complex<RealType1>& x, const RealType2& y) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>(x.real() - y, + x.imag()); } //! Binary - operator for scalar complex. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator-(const RealType1& x, const complex<RealType2>& y) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( - x - y.real(), -y.imag()); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator-(const RealType1& x, const complex<RealType2>& y) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>(x - y.real(), + -y.imag()); } //! Unary - operator for complex. @@ -630,11 +617,9 @@ KOKKOS_INLINE_FUNCTION complex<RealType> operator-( //! Binary * operator for complex. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator*(const complex<RealType1>& x, - const complex<RealType2>& y) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator*(const complex<RealType1>& x, const complex<RealType2>& y) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>( x.real() * y.real() - x.imag() * y.imag(), x.real() * y.imag() + x.imag() * y.real()); } @@ -648,9 +633,9 @@ KOKKOS_INLINE_FUNCTION /// std::complex's methods and nonmember functions are not marked as /// CUDA device functions. template <class RealType1, class RealType2> -inline complex<typename std::common_type<RealType1, RealType2>::type> operator*( +inline complex<std::common_type_t<RealType1, RealType2>> operator*( const std::complex<RealType1>& x, const complex<RealType2>& y) { - return complex<typename std::common_type<RealType1, RealType2>::type>( + return complex<std::common_type_t<RealType1, RealType2>>( x.real() * y.real() - x.imag() * y.imag(), x.real() * y.imag() + x.imag() * y.real()); } @@ -660,11 +645,10 @@ inline complex<typename std::common_type<RealType1, RealType2>::type> operator*( /// This function exists because the compiler doesn't know that /// RealType and complex<RealType> commute with respect to operator*. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator*(const RealType1& x, const complex<RealType2>& y) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( - x * y.real(), x * y.imag()); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator*(const RealType1& x, const complex<RealType2>& y) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>(x * y.real(), + x * y.imag()); } /// \brief Binary * operator for RealType times complex. @@ -672,11 +656,10 @@ KOKKOS_INLINE_FUNCTION /// This function exists because the compiler doesn't know that /// RealType and complex<RealType> commute with respect to operator*. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator*(const complex<RealType1>& y, const RealType2& x) noexcept { - return complex<typename std::common_type<RealType1, RealType2>::type>( - x * y.real(), x * y.imag()); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator*(const complex<RealType1>& y, const RealType2& x) noexcept { + return complex<std::common_type_t<RealType1, RealType2>>(x * y.real(), + x * y.imag()); } //! Imaginary part of a complex number. @@ -706,8 +689,6 @@ KOKKOS_INLINE_FUNCTION constexpr Impl::promote_t<ArithmeticType> real( //! Constructs a complex number from magnitude and phase angle template <class T> KOKKOS_INLINE_FUNCTION complex<T> polar(const T& r, const T& theta = T()) { - using Kokkos::Experimental::cos; - using Kokkos::Experimental::sin; KOKKOS_EXPECTS(r >= 0); return complex<T>(r * cos(theta), r * sin(theta)); } @@ -715,15 +696,12 @@ KOKKOS_INLINE_FUNCTION complex<T> polar(const T& r, const T& theta = T()) { //! Absolute value (magnitude) of a complex number. template <class RealType> KOKKOS_INLINE_FUNCTION RealType abs(const complex<RealType>& x) { - using Kokkos::Experimental::hypot; return hypot(x.real(), x.imag()); } //! Power of a complex number template <class T> KOKKOS_INLINE_FUNCTION complex<T> pow(const complex<T>& x, const T& y) { - using Kokkos::Experimental::atan2; - using Kokkos::Experimental::pow; T r = abs(x); T theta = atan2(x.imag(), x.real()); return polar(pow(r, y), y * theta); @@ -737,8 +715,6 @@ KOKKOS_INLINE_FUNCTION complex<T> pow(const T& x, const complex<T>& y) { template <class T> KOKKOS_INLINE_FUNCTION complex<T> pow(const complex<T>& x, const complex<T>& y) { - using Kokkos::Experimental::log; - return x == T() ? T() : exp(y * log(x)); } @@ -770,9 +746,6 @@ KOKKOS_INLINE_FUNCTION complex<Impl::promote_2_t<T, U>> pow( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sqrt( const complex<RealType>& x) { - using Kokkos::Experimental::fabs; - using Kokkos::Experimental::sqrt; - RealType r = x.real(); RealType i = x.imag(); @@ -805,9 +778,6 @@ KOKKOS_INLINE_FUNCTION constexpr complex<Impl::promote_t<ArithmeticType>> conj( //! Exponential of a complex number. template <class RealType> KOKKOS_INLINE_FUNCTION complex<RealType> exp(const complex<RealType>& x) { - using Kokkos::Experimental::cos; - using Kokkos::Experimental::exp; - using Kokkos::Experimental::sin; return exp(x.real()) * complex<RealType>(cos(x.imag()), sin(x.imag())); } @@ -815,20 +785,21 @@ KOKKOS_INLINE_FUNCTION complex<RealType> exp(const complex<RealType>& x) { template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> log( const complex<RealType>& x) { - using Kokkos::Experimental::atan2; - using Kokkos::Experimental::log; RealType phi = atan2(x.imag(), x.real()); return Kokkos::complex<RealType>(log(abs(x)), phi); } +//! base 10 log of a complex number. +template <class RealType> +KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> log10( + const complex<RealType>& x) { + return log(x) / log(RealType(10)); +} + //! sine of a complex number. template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sin( const complex<RealType>& x) { - using Kokkos::Experimental::cos; - using Kokkos::Experimental::cosh; - using Kokkos::Experimental::sin; - using Kokkos::Experimental::sinh; return Kokkos::complex<RealType>(sin(x.real()) * cosh(x.imag()), cos(x.real()) * sinh(x.imag())); } @@ -837,10 +808,6 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sin( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> cos( const complex<RealType>& x) { - using Kokkos::Experimental::cos; - using Kokkos::Experimental::cosh; - using Kokkos::Experimental::sin; - using Kokkos::Experimental::sinh; return Kokkos::complex<RealType>(cos(x.real()) * cosh(x.imag()), -sin(x.real()) * sinh(x.imag())); } @@ -856,10 +823,6 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> tan( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sinh( const complex<RealType>& x) { - using Kokkos::Experimental::cos; - using Kokkos::Experimental::cosh; - using Kokkos::Experimental::sin; - using Kokkos::Experimental::sinh; return Kokkos::complex<RealType>(sinh(x.real()) * cos(x.imag()), cosh(x.real()) * sin(x.imag())); } @@ -868,10 +831,6 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sinh( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> cosh( const complex<RealType>& x) { - using Kokkos::Experimental::cos; - using Kokkos::Experimental::cosh; - using Kokkos::Experimental::sin; - using Kokkos::Experimental::sinh; return Kokkos::complex<RealType>(cosh(x.real()) * cos(x.imag()), sinh(x.real()) * sin(x.imag())); } @@ -902,9 +861,6 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> acosh( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> atanh( const complex<RealType>& x) { - using Kokkos::Experimental::atan2; - using Kokkos::Experimental::log; - const RealType i2 = x.imag() * x.imag(); const RealType r = RealType(1.0) - i2 - x.real() * x.real(); @@ -932,7 +888,6 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> asin( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> acos( const complex<RealType>& x) { - using Kokkos::Experimental::acos; Kokkos::complex<RealType> t = asin(x); RealType pi_2 = acos(RealType(0.0)); return Kokkos::complex<RealType>(pi_2 - t.real(), -t.imag()); @@ -942,8 +897,6 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> acos( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> atan( const complex<RealType>& x) { - using Kokkos::Experimental::atan2; - using Kokkos::Experimental::log; const RealType r2 = x.real() * x.real(); const RealType i = RealType(1.0) - r2 - x.imag() * x.imag(); @@ -969,28 +922,23 @@ inline complex<RealType> exp(const std::complex<RealType>& c) { //! Binary operator / for complex and real numbers template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator/(const complex<RealType1>& x, - const RealType2& y) noexcept(noexcept(RealType1{} / - RealType2{})) { - return complex<typename std::common_type<RealType1, RealType2>::type>( - real(x) / y, imag(x) / y); +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator/(const complex<RealType1>& x, + const RealType2& y) noexcept(noexcept(RealType1{} / RealType2{})) { + return complex<std::common_type_t<RealType1, RealType2>>(real(x) / y, + imag(x) / y); } //! Binary operator / for complex. template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator/(const complex<RealType1>& x, - const complex<RealType2>& y) noexcept(noexcept(RealType1{} / - RealType2{})) { - using Kokkos::Experimental::fabs; +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator/(const complex<RealType1>& x, + const complex<RealType2>& y) noexcept(noexcept(RealType1{} / + RealType2{})) { // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. - using common_real_type = - typename std::common_type<RealType1, RealType2>::type; + using common_real_type = std::common_type_t<RealType1, RealType2>; const common_real_type s = fabs(real(y)) + fabs(imag(y)); // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. @@ -1012,12 +960,11 @@ KOKKOS_INLINE_FUNCTION //! Binary operator / for complex and real numbers template <class RealType1, class RealType2> -KOKKOS_INLINE_FUNCTION - complex<typename std::common_type<RealType1, RealType2>::type> - operator/(const RealType1& x, - const complex<RealType2>& y) noexcept(noexcept(RealType1{} / - RealType2{})) { - return complex<typename std::common_type<RealType1, RealType2>::type>(x) / y; +KOKKOS_INLINE_FUNCTION complex<std::common_type_t<RealType1, RealType2>> +operator/(const RealType1& x, + const complex<RealType2>& y) noexcept(noexcept(RealType1{} / + RealType2{})) { + return complex<std::common_type_t<RealType1, RealType2>>(x) / y; } template <class RealType> @@ -1050,4 +997,8 @@ struct reduction_identity<Kokkos::complex<T>> { } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_COMPLEX +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_COMPLEX +#endif #endif // KOKKOS_COMPLEX_HPP diff --git a/packages/kokkos/core/src/Kokkos_Concepts.hpp b/packages/kokkos/core/src/Kokkos_Concepts.hpp index 5a1a571e43e74ce51aed4db7d9bbada584083f06..63f2b896b5d123cb50d1019a4a8ed871d57b11f4 100644 --- a/packages/kokkos/core/src/Kokkos_Concepts.hpp +++ b/packages/kokkos/core/src/Kokkos_Concepts.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_CORE_CONCEPTS_HPP #define KOKKOS_CORE_CONCEPTS_HPP @@ -174,7 +183,8 @@ KOKKOS_IMPL_IS_CONCEPT(array_layout) KOKKOS_IMPL_IS_CONCEPT(reducer) namespace Experimental { KOKKOS_IMPL_IS_CONCEPT(work_item_property) -} +KOKKOS_IMPL_IS_CONCEPT(hooks_policy) +} // namespace Experimental namespace Impl { @@ -269,8 +279,7 @@ struct is_device_helper<Device<ExecutionSpace, MemorySpace>> : std::true_type { } // namespace Impl template <typename T> -using is_device = - typename Impl::is_device_helper<typename std::remove_cv<T>::type>::type; +using is_device = typename Impl::is_device_helper<std::remove_cv_t<T>>::type; //---------------------------------------------------------------------------- @@ -293,32 +302,26 @@ struct is_space { }; template <typename U> - struct exe<U, typename std::conditional<true, void, - typename U::execution_space>::type> + struct exe<U, std::conditional_t<true, void, typename U::execution_space>> : std::is_same<U, typename U::execution_space>::type { using space = typename U::execution_space; }; template <typename U> - struct mem< - U, typename std::conditional<true, void, typename U::memory_space>::type> + struct mem<U, std::conditional_t<true, void, typename U::memory_space>> : std::is_same<U, typename U::memory_space>::type { using space = typename U::memory_space; }; template <typename U> - struct dev< - U, typename std::conditional<true, void, typename U::device_type>::type> + struct dev<U, std::conditional_t<true, void, typename U::device_type>> : std::is_same<U, typename U::device_type>::type { using space = typename U::device_type; }; - using is_exe = - typename is_space<T>::template exe<typename std::remove_cv<T>::type>; - using is_mem = - typename is_space<T>::template mem<typename std::remove_cv<T>::type>; - using is_dev = - typename is_space<T>::template dev<typename std::remove_cv<T>::type>; + using is_exe = typename is_space<T>::template exe<std::remove_cv_t<T>>; + using is_mem = typename is_space<T>::template mem<std::remove_cv_t<T>>; + using is_dev = typename is_space<T>::template dev<std::remove_cv_t<T>>; public: static constexpr bool value = is_exe::value || is_mem::value || is_dev::value; @@ -342,7 +345,9 @@ struct is_space { std::is_same<memory_space, Kokkos::CudaHostPinnedSpace>::value #elif defined(KOKKOS_ENABLE_HIP) || std::is_same<memory_space, - Kokkos::Experimental::HIPHostPinnedSpace>::value + Kokkos::Experimental::HIPHostPinnedSpace>::value || + std::is_same<memory_space, + Kokkos::Experimental::HIPManagedSpace>::value #elif defined(KOKKOS_ENABLE_SYCL) || std::is_same<memory_space, Kokkos::Experimental::SYCLSharedUSMSpace>::value || @@ -500,11 +505,11 @@ struct SpaceAccessibility { // to be able to access MemorySpace? // If same memory space or not accessible use the AccessSpace // else construct a device with execution space and memory space. - using space = typename std::conditional< + using space = std::conditional_t< std::is_same<typename AccessSpace::memory_space, MemorySpace>::value || !exe_access::accessible, AccessSpace, - Kokkos::Device<typename AccessSpace::execution_space, MemorySpace>>::type; + Kokkos::Device<typename AccessSpace::execution_space, MemorySpace>>; }; } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_CopyViews.hpp b/packages/kokkos/core/src/Kokkos_CopyViews.hpp index ac516e31ea66672a043b87d325020204af96ad88..0a66ee9da71fdaf3bb2bf649fb1a7081e4651ea4 100644 --- a/packages/kokkos/core/src/Kokkos_CopyViews.hpp +++ b/packages/kokkos/core/src/Kokkos_CopyViews.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_COPYVIEWS_HPP_ #define KOKKOS_COPYVIEWS_HPP_ #include <string> @@ -862,14 +871,20 @@ template <class DstType, class SrcType, class ExecSpace> struct ViewRemap<DstType, SrcType, ExecSpace, 1> { using p_type = Kokkos::pair<int64_t, int64_t>; - ViewRemap(const DstType& dst, const SrcType& src) { + template <typename... OptExecSpace> + ViewRemap(const DstType& dst, const SrcType& src, + const OptExecSpace&... exec_space) { + static_assert( + sizeof...(OptExecSpace) <= 1, + "OptExecSpace must be either empty or be an execution space!"); + if (dst.extent(0) == src.extent(0)) { - view_copy(dst, src); + view_copy(exec_space..., dst, src); } else { p_type ext0(0, std::min(dst.extent(0), src.extent(0))); using sv_adapter_type = CommonSubview<DstType, SrcType, 1, p_type>; sv_adapter_type common_subview(dst, src, ext0); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, common_subview.src_sub); } } }; @@ -878,16 +893,23 @@ template <class DstType, class SrcType, class ExecSpace> struct ViewRemap<DstType, SrcType, ExecSpace, 2> { using p_type = Kokkos::pair<int64_t, int64_t>; - ViewRemap(const DstType& dst, const SrcType& src) { + template <typename... OptExecSpace> + ViewRemap(const DstType& dst, const SrcType& src, + const OptExecSpace&... exec_space) { + static_assert( + sizeof...(OptExecSpace) <= 1, + "OptExecSpace must be either empty or be an execution space!"); + if (dst.extent(0) == src.extent(0)) { if (dst.extent(1) == src.extent(1)) { - view_copy(dst, src); + view_copy(exec_space..., dst, src); } else { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); using sv_adapter_type = CommonSubview<DstType, SrcType, 2, Kokkos::Impl::ALL_t, p_type>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } else { if (dst.extent(1) == src.extent(1)) { @@ -895,14 +917,16 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 2> { using sv_adapter_type = CommonSubview<DstType, SrcType, 2, p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, ext0, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext0(0, std::min(dst.extent(0), src.extent(0))); p_type ext1(0, std::min(dst.extent(1), src.extent(1))); using sv_adapter_type = CommonSubview<DstType, SrcType, 2, p_type, p_type>; sv_adapter_type common_subview(dst, src, ext0, ext1); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } } @@ -912,7 +936,13 @@ template <class DstType, class SrcType, class ExecSpace> struct ViewRemap<DstType, SrcType, ExecSpace, 3> { using p_type = Kokkos::pair<int64_t, int64_t>; - ViewRemap(const DstType& dst, const SrcType& src) { + template <typename... OptExecSpace> + ViewRemap(const DstType& dst, const SrcType& src, + const OptExecSpace&... exec_space) { + static_assert( + sizeof...(OptExecSpace) <= 1, + "OptExecSpace must be either empty or be an execution space!"); + if (dst.extent(0) == src.extent(0)) { if (dst.extent(2) == src.extent(2)) { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -921,7 +951,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 3> { Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); p_type ext2(0, std::min(dst.extent(2), src.extent(2))); @@ -929,7 +960,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 3> { CommonSubview<DstType, SrcType, 3, Kokkos::Impl::ALL_t, p_type, p_type>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } else { if (dst.extent(2) == src.extent(2)) { @@ -938,7 +970,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 3> { using sv_adapter_type = CommonSubview<DstType, SrcType, 3, p_type, p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, ext0, ext1, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext0(0, std::min(dst.extent(0), src.extent(0))); p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -946,7 +979,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 3> { using sv_adapter_type = CommonSubview<DstType, SrcType, 3, p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } } @@ -956,7 +990,13 @@ template <class DstType, class SrcType, class ExecSpace> struct ViewRemap<DstType, SrcType, ExecSpace, 4> { using p_type = Kokkos::pair<int64_t, int64_t>; - ViewRemap(const DstType& dst, const SrcType& src) { + template <typename... OptExecSpace> + ViewRemap(const DstType& dst, const SrcType& src, + const OptExecSpace&... exec_space) { + static_assert( + sizeof...(OptExecSpace) <= 1, + "OptExecSpace must be either empty or be an execution space!"); + if (dst.extent(0) == src.extent(0)) { if (dst.extent(3) == src.extent(3)) { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -966,7 +1006,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 4> { p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); p_type ext2(0, std::min(dst.extent(2), src.extent(2))); @@ -975,7 +1016,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 4> { CommonSubview<DstType, SrcType, 4, Kokkos::Impl::ALL_t, p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } else { if (dst.extent(7) == src.extent(7)) { @@ -986,7 +1028,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 4> { CommonSubview<DstType, SrcType, 4, p_type, p_type, p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext0(0, std::min(dst.extent(0), src.extent(0))); p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -995,7 +1038,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 4> { using sv_adapter_type = CommonSubview<DstType, SrcType, 4, p_type, p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } } @@ -1005,7 +1049,13 @@ template <class DstType, class SrcType, class ExecSpace> struct ViewRemap<DstType, SrcType, ExecSpace, 5> { using p_type = Kokkos::pair<int64_t, int64_t>; - ViewRemap(const DstType& dst, const SrcType& src) { + template <typename... OptExecSpace> + ViewRemap(const DstType& dst, const SrcType& src, + const OptExecSpace&... exec_space) { + static_assert( + sizeof...(OptExecSpace) <= 1, + "OptExecSpace must be either empty or be an execution space!"); + if (dst.extent(0) == src.extent(0)) { if (dst.extent(4) == src.extent(4)) { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -1016,7 +1066,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 5> { p_type, p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); p_type ext2(0, std::min(dst.extent(2), src.extent(2))); @@ -1027,7 +1078,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 5> { p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3, ext4); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } else { if (dst.extent(4) == src.extent(4)) { @@ -1040,7 +1092,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 5> { Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext0(0, std::min(dst.extent(0), src.extent(0))); p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -1050,7 +1103,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 5> { using sv_adapter_type = CommonSubview<DstType, SrcType, 5, p_type, p_type, p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3, ext4); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } } @@ -1059,7 +1113,13 @@ template <class DstType, class SrcType, class ExecSpace> struct ViewRemap<DstType, SrcType, ExecSpace, 6> { using p_type = Kokkos::pair<int64_t, int64_t>; - ViewRemap(const DstType& dst, const SrcType& src) { + template <typename... OptExecSpace> + ViewRemap(const DstType& dst, const SrcType& src, + const OptExecSpace&... exec_space) { + static_assert( + sizeof...(OptExecSpace) <= 1, + "OptExecSpace must be either empty or be an execution space!"); + if (dst.extent(0) == src.extent(0)) { if (dst.extent(5) == src.extent(5)) { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -1071,7 +1131,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 6> { p_type, p_type, p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3, ext4, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); p_type ext2(0, std::min(dst.extent(2), src.extent(2))); @@ -1083,7 +1144,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 6> { p_type, p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3, ext4, ext5); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } else { if (dst.extent(5) == src.extent(5)) { @@ -1098,7 +1160,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 6> { p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3, ext4, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext0(0, std::min(dst.extent(0), src.extent(0))); p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -1112,7 +1175,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 6> { p_type, p_type>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3, ext4, ext5); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } } @@ -1122,7 +1186,13 @@ template <class DstType, class SrcType, class ExecSpace> struct ViewRemap<DstType, SrcType, ExecSpace, 7> { using p_type = Kokkos::pair<int64_t, int64_t>; - ViewRemap(const DstType& dst, const SrcType& src) { + template <typename... OptExecSpace> + ViewRemap(const DstType& dst, const SrcType& src, + const OptExecSpace&... exec_space) { + static_assert( + sizeof...(OptExecSpace) <= 1, + "OptExecSpace must be either empty or be an execution space!"); + if (dst.extent(0) == src.extent(0)) { if (dst.extent(6) == src.extent(6)) { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -1135,7 +1205,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 7> { p_type, p_type, p_type, p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3, ext4, ext5, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); p_type ext2(0, std::min(dst.extent(2), src.extent(2))); @@ -1148,7 +1219,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 7> { p_type, p_type, p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3, ext4, ext5, ext6); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } else { if (dst.extent(6) == src.extent(6)) { @@ -1163,7 +1235,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 7> { p_type, p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3, ext4, ext5, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext0(0, std::min(dst.extent(0), src.extent(0))); p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -1177,7 +1250,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 7> { p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3, ext4, ext5, ext6); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } } @@ -1187,7 +1261,13 @@ template <class DstType, class SrcType, class ExecSpace> struct ViewRemap<DstType, SrcType, ExecSpace, 8> { using p_type = Kokkos::pair<int64_t, int64_t>; - ViewRemap(const DstType& dst, const SrcType& src) { + template <typename... OptExecSpace> + ViewRemap(const DstType& dst, const SrcType& src, + const OptExecSpace&... exec_space) { + static_assert( + sizeof...(OptExecSpace) <= 1, + "OptExecSpace must be either empty or be an execution space!"); + if (dst.extent(0) == src.extent(0)) { if (dst.extent(7) == src.extent(7)) { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -1202,7 +1282,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 8> { Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3, ext4, ext5, ext6, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext1(0, std::min(dst.extent(1), src.extent(1))); p_type ext2(0, std::min(dst.extent(2), src.extent(2))); @@ -1216,7 +1297,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 8> { p_type, p_type, p_type, p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, Kokkos::ALL, ext1, ext2, ext3, ext4, ext5, ext6, ext7); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } else { if (dst.extent(7) == src.extent(7)) { @@ -1232,7 +1314,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 8> { p_type, p_type, p_type, Kokkos::Impl::ALL_t>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3, ext4, ext5, ext6, Kokkos::ALL); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } else { p_type ext0(0, std::min(dst.extent(0), src.extent(0))); p_type ext1(0, std::min(dst.extent(1), src.extent(1))); @@ -1247,7 +1330,8 @@ struct ViewRemap<DstType, SrcType, ExecSpace, 8> { p_type, p_type, p_type, p_type>; sv_adapter_type common_subview(dst, src, ext0, ext1, ext2, ext3, ext4, ext5, ext6, ext7); - view_copy(common_subview.dst_sub, common_subview.src_sub); + view_copy(exec_space..., common_subview.dst_sub, + common_subview.src_sub); } } } @@ -1261,9 +1345,9 @@ inline void contiguous_fill( using ViewTypeFlat = Kokkos::View< typename ViewType::value_type*, Kokkos::LayoutRight, Kokkos::Device<typename ViewType::execution_space, - typename std::conditional<ViewType::Rank == 0, - typename ViewType::memory_space, - Kokkos::AnonymousSpace>::type>, + std::conditional_t<ViewType::Rank == 0, + typename ViewType::memory_space, + Kokkos::AnonymousSpace>>, Kokkos::MemoryTraits<0>>; ViewTypeFlat dst_flat(dst.data(), dst.size()); @@ -1292,23 +1376,27 @@ struct ZeroMemset { template <typename ExecutionSpace, class DT, class... DP> inline std::enable_if_t< - std::is_trivial<typename ViewTraits<DT, DP...>::const_value_type>::value && + std::is_trivial<typename ViewTraits<DT, DP...>::value_type>::value && std::is_trivially_copy_assignable< - typename ViewTraits<DT, DP...>::const_value_type>::value> + typename ViewTraits<DT, DP...>::value_type>::value> contiguous_fill_or_memset( const ExecutionSpace& exec_space, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value) { +// On A64FX memset seems to do the wrong thing with regards to first touch +// leading to the significant performance issues +#ifndef KOKKOS_ARCH_A64FX if (Impl::is_zero_byte(value)) ZeroMemset<ExecutionSpace, DT, DP...>(exec_space, dst, value); else +#endif contiguous_fill(exec_space, dst, value); } template <typename ExecutionSpace, class DT, class... DP> -inline std::enable_if_t<!( - std::is_trivial<typename ViewTraits<DT, DP...>::const_value_type>::value && - std::is_trivially_copy_assignable< - typename ViewTraits<DT, DP...>::const_value_type>::value)> +inline std::enable_if_t< + !(std::is_trivial<typename ViewTraits<DT, DP...>::value_type>::value && + std::is_trivially_copy_assignable< + typename ViewTraits<DT, DP...>::value_type>::value)> contiguous_fill_or_memset( const ExecutionSpace& exec_space, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value) { @@ -1317,26 +1405,30 @@ contiguous_fill_or_memset( template <class DT, class... DP> inline std::enable_if_t< - std::is_trivial<typename ViewTraits<DT, DP...>::const_value_type>::value && + std::is_trivial<typename ViewTraits<DT, DP...>::value_type>::value && std::is_trivially_copy_assignable< - typename ViewTraits<DT, DP...>::const_value_type>::value> + typename ViewTraits<DT, DP...>::value_type>::value> contiguous_fill_or_memset( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value) { using ViewType = View<DT, DP...>; using exec_space_type = typename ViewType::execution_space; +// On A64FX memset seems to do the wrong thing with regards to first touch +// leading to the significant performance issues +#ifndef KOKKOS_ARCH_A64FX if (Impl::is_zero_byte(value)) ZeroMemset<exec_space_type, DT, DP...>(dst, value); else +#endif contiguous_fill(exec_space_type(), dst, value); } template <class DT, class... DP> -inline std::enable_if_t<!( - std::is_trivial<typename ViewTraits<DT, DP...>::const_value_type>::value && - std::is_trivially_copy_assignable< - typename ViewTraits<DT, DP...>::const_value_type>::value)> +inline std::enable_if_t< + !(std::is_trivial<typename ViewTraits<DT, DP...>::value_type>::value && + std::is_trivially_copy_assignable< + typename ViewTraits<DT, DP...>::value_type>::value)> contiguous_fill_or_memset( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value) { @@ -1352,9 +1444,8 @@ template <class DT, class... DP> inline void deep_copy( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { using ViewType = View<DT, DP...>; using exec_space_type = typename ViewType::execution_space; @@ -1416,9 +1507,10 @@ inline void deep_copy( // Lets call the right ViewFill functor based on integer space needed and // iteration type - using ViewTypeUniform = typename std::conditional< - ViewType::Rank == 0, typename ViewType::uniform_runtime_type, - typename ViewType::uniform_runtime_nomemspace_type>::type; + using ViewTypeUniform = + std::conditional_t<ViewType::Rank == 0, + typename ViewType::uniform_runtime_type, + typename ViewType::uniform_runtime_nomemspace_type>; if (dst.span() > static_cast<size_t>(std::numeric_limits<int>::max())) { if (iterate == Kokkos::Iterate::Right) Kokkos::Impl::ViewFill<ViewTypeUniform, Kokkos::LayoutRight, @@ -1450,9 +1542,8 @@ template <class ST, class... SP> inline void deep_copy( typename ViewTraits<ST, SP...>::non_const_value_type& dst, const View<ST, SP...>& src, - typename std::enable_if<std::is_same< - typename ViewTraits<ST, SP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<ST, SP...>::specialize, + void>::value>* = nullptr) { using src_traits = ViewTraits<ST, SP...>; using src_memory_space = typename src_traits::memory_space; @@ -1487,12 +1578,11 @@ inline void deep_copy( template <class DT, class... DP, class ST, class... SP> inline void deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<( - std::is_same<typename ViewTraits<DT, DP...>::specialize, void>::value && - std::is_same<typename ViewTraits<ST, SP...>::specialize, void>::value && - (unsigned(ViewTraits<DT, DP...>::rank) == unsigned(0) && - unsigned(ViewTraits<ST, SP...>::rank) == unsigned(0)))>::type* = - nullptr) { + std::enable_if_t< + (std::is_void<typename ViewTraits<DT, DP...>::specialize>::value && + std::is_void<typename ViewTraits<ST, SP...>::specialize>::value && + (unsigned(ViewTraits<DT, DP...>::rank) == unsigned(0) && + unsigned(ViewTraits<ST, SP...>::rank) == unsigned(0)))>* = nullptr) { using dst_type = View<DT, DP...>; using src_type = View<ST, SP...>; @@ -1540,11 +1630,11 @@ inline void deep_copy( template <class DT, class... DP, class ST, class... SP> inline void deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<( - std::is_same<typename ViewTraits<DT, DP...>::specialize, void>::value && - std::is_same<typename ViewTraits<ST, SP...>::specialize, void>::value && - (unsigned(ViewTraits<DT, DP...>::rank) != 0 || - unsigned(ViewTraits<ST, SP...>::rank) != 0))>::type* = nullptr) { + std::enable_if_t< + (std::is_void<typename ViewTraits<DT, DP...>::specialize>::value && + std::is_void<typename ViewTraits<ST, SP...>::specialize>::value && + (unsigned(ViewTraits<DT, DP...>::rank) != 0 || + unsigned(ViewTraits<ST, SP...>::rank) != 0))>* = nullptr) { using dst_type = View<DT, DP...>; using src_type = View<ST, SP...>; using dst_execution_space = typename dst_type::execution_space; @@ -1748,9 +1838,8 @@ template <class TeamType, class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 1 && - unsigned(ViewTraits<ST, SP...>::rank) == - 1)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 1 && + unsigned(ViewTraits<ST, SP...>::rank) == 1)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1767,9 +1856,8 @@ template <class TeamType, class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 2 && - unsigned(ViewTraits<ST, SP...>::rank) == - 2)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 2 && + unsigned(ViewTraits<ST, SP...>::rank) == 2)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1795,9 +1883,8 @@ template <class TeamType, class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 3 && - unsigned(ViewTraits<ST, SP...>::rank) == - 3)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 3 && + unsigned(ViewTraits<ST, SP...>::rank) == 3)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1825,9 +1912,8 @@ template <class TeamType, class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 4 && - unsigned(ViewTraits<ST, SP...>::rank) == - 4)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 4 && + unsigned(ViewTraits<ST, SP...>::rank) == 4)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1858,9 +1944,8 @@ template <class TeamType, class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 5 && - unsigned(ViewTraits<ST, SP...>::rank) == - 5)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 5 && + unsigned(ViewTraits<ST, SP...>::rank) == 5)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1893,9 +1978,8 @@ template <class TeamType, class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 6 && - unsigned(ViewTraits<ST, SP...>::rank) == - 6)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 6 && + unsigned(ViewTraits<ST, SP...>::rank) == 6)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1930,9 +2014,8 @@ template <class TeamType, class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 7 && - unsigned(ViewTraits<ST, SP...>::rank) == - 7)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 7 && + unsigned(ViewTraits<ST, SP...>::rank) == 7)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1969,9 +2052,8 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( template <class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 1 && - unsigned(ViewTraits<ST, SP...>::rank) == - 1)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 1 && + unsigned(ViewTraits<ST, SP...>::rank) == 1)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1986,9 +2068,8 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( template <class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 2 && - unsigned(ViewTraits<ST, SP...>::rank) == - 2)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 2 && + unsigned(ViewTraits<ST, SP...>::rank) == 2)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2004,9 +2085,8 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( template <class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 3 && - unsigned(ViewTraits<ST, SP...>::rank) == - 3)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 3 && + unsigned(ViewTraits<ST, SP...>::rank) == 3)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2024,9 +2104,8 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( template <class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 4 && - unsigned(ViewTraits<ST, SP...>::rank) == - 4)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 4 && + unsigned(ViewTraits<ST, SP...>::rank) == 4)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2045,9 +2124,8 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( template <class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 5 && - unsigned(ViewTraits<ST, SP...>::rank) == - 5)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 5 && + unsigned(ViewTraits<ST, SP...>::rank) == 5)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2067,9 +2145,8 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( template <class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 6 && - unsigned(ViewTraits<ST, SP...>::rank) == - 6)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 6 && + unsigned(ViewTraits<ST, SP...>::rank) == 6)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2090,9 +2167,8 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( template <class DT, class... DP, class ST, class... SP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == 7 && - unsigned(ViewTraits<ST, SP...>::rank) == - 7)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 7 && + unsigned(ViewTraits<ST, SP...>::rank) == 7)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2118,9 +2194,8 @@ template <class TeamType, class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous( const TeamType& team, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { Kokkos::parallel_for(Kokkos::TeamVectorRange(team, dst.span()), [&](const int& i) { dst.data()[i] = value; }); } @@ -2129,9 +2204,8 @@ template <class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<std::is_same< - typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = - nullptr) { + std::enable_if_t<std::is_same<typename ViewTraits<DT, DP...>::specialize, + void>::value>* = nullptr) { for (size_t i = 0; i < dst.span(); ++i) { dst.data()[i] = value; } @@ -2141,8 +2215,7 @@ template <class TeamType, class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 1)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 1)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2159,8 +2232,7 @@ template <class TeamType, class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 2)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 2)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2186,8 +2258,7 @@ template <class TeamType, class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 3)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 3)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2215,8 +2286,7 @@ template <class TeamType, class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 4)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 4)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2247,8 +2317,7 @@ template <class TeamType, class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 5)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 5)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2281,8 +2350,7 @@ template <class TeamType, class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 6)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 6)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2317,8 +2385,7 @@ template <class TeamType, class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 7)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 7)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2356,8 +2423,7 @@ template <class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 1)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 1)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2373,8 +2439,7 @@ template <class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 2)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 2)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2391,8 +2456,7 @@ template <class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 3)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 3)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2410,8 +2474,7 @@ template <class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 4)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 4)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2431,8 +2494,7 @@ template <class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 5)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 5)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2453,8 +2515,7 @@ template <class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 6)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 6)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2476,8 +2537,7 @@ template <class DT, class... DP> void KOKKOS_INLINE_FUNCTION local_deep_copy( const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if<(unsigned(ViewTraits<DT, DP...>::rank) == - 7)>::type* = nullptr) { + std::enable_if_t<(unsigned(ViewTraits<DT, DP...>::rank) == 7)>* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2509,12 +2569,11 @@ template <class ExecSpace, class DT, class... DP> inline void deep_copy( const ExecSpace& space, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if< + std::enable_if_t< Kokkos::is_execution_space<ExecSpace>::value && - std::is_same<typename ViewTraits<DT, DP...>::specialize, void>::value && - Kokkos::SpaceAccessibility< - ExecSpace, - typename ViewTraits<DT, DP...>::memory_space>::accessible>::type* = + std::is_void<typename ViewTraits<DT, DP...>::specialize>::value && + Kokkos::SpaceAccessibility<ExecSpace, typename ViewTraits<DT, DP...>:: + memory_space>::accessible>* = nullptr) { using dst_traits = ViewTraits<DT, DP...>; static_assert(std::is_same<typename dst_traits::non_const_value_type, @@ -2533,12 +2592,52 @@ inline void deep_copy( } else if (dst.span_is_contiguous()) { Impl::contiguous_fill_or_memset(space, dst, value); } else { - using ViewTypeUniform = typename std::conditional< - View<DT, DP...>::Rank == 0, - typename View<DT, DP...>::uniform_runtime_type, - typename View<DT, DP...>::uniform_runtime_nomemspace_type>::type; - Kokkos::Impl::ViewFill<ViewTypeUniform, typename dst_traits::array_layout, - ExecSpace>(dst, value, space); + using ViewType = View<DT, DP...>; + // Figure out iteration order to do the ViewFill + int64_t strides[ViewType::Rank + 1]; + dst.stride(strides); + Kokkos::Iterate iterate; + if (std::is_same<typename ViewType::array_layout, + Kokkos::LayoutRight>::value) { + iterate = Kokkos::Iterate::Right; + } else if (std::is_same<typename ViewType::array_layout, + Kokkos::LayoutLeft>::value) { + iterate = Kokkos::Iterate::Left; + } else if (std::is_same<typename ViewType::array_layout, + Kokkos::LayoutStride>::value) { + if (strides[0] > strides[ViewType::Rank > 0 ? ViewType::Rank - 1 : 0]) + iterate = Kokkos::Iterate::Right; + else + iterate = Kokkos::Iterate::Left; + } else { + if (std::is_same<typename ViewType::execution_space::array_layout, + Kokkos::LayoutRight>::value) + iterate = Kokkos::Iterate::Right; + else + iterate = Kokkos::Iterate::Left; + } + + // Lets call the right ViewFill functor based on integer space needed and + // iteration type + using ViewTypeUniform = + std::conditional_t<ViewType::Rank == 0, + typename ViewType::uniform_runtime_type, + typename ViewType::uniform_runtime_nomemspace_type>; + if (dst.span() > static_cast<size_t>(std::numeric_limits<int32_t>::max())) { + if (iterate == Kokkos::Iterate::Right) + Kokkos::Impl::ViewFill<ViewTypeUniform, Kokkos::LayoutRight, ExecSpace, + ViewType::Rank, int64_t>(dst, value, space); + else + Kokkos::Impl::ViewFill<ViewTypeUniform, Kokkos::LayoutLeft, ExecSpace, + ViewType::Rank, int64_t>(dst, value, space); + } else { + if (iterate == Kokkos::Iterate::Right) + Kokkos::Impl::ViewFill<ViewTypeUniform, Kokkos::LayoutRight, ExecSpace, + ViewType::Rank, int32_t>(dst, value, space); + else + Kokkos::Impl::ViewFill<ViewTypeUniform, Kokkos::LayoutLeft, ExecSpace, + ViewType::Rank, int32_t>(dst, value, space); + } } if (Kokkos::Tools::Experimental::get_callbacks().end_deep_copy != nullptr) { Kokkos::Profiling::endDeepCopy(); @@ -2551,12 +2650,11 @@ template <class ExecSpace, class DT, class... DP> inline void deep_copy( const ExecSpace& space, const View<DT, DP...>& dst, typename ViewTraits<DT, DP...>::const_value_type& value, - typename std::enable_if< + std::enable_if_t< Kokkos::is_execution_space<ExecSpace>::value && - std::is_same<typename ViewTraits<DT, DP...>::specialize, void>::value && - !Kokkos::SpaceAccessibility< - ExecSpace, - typename ViewTraits<DT, DP...>::memory_space>::accessible>::type* = + std::is_void<typename ViewTraits<DT, DP...>::specialize>::value && + !Kokkos::SpaceAccessibility<ExecSpace, typename ViewTraits<DT, DP...>:: + memory_space>::accessible>* = nullptr) { using dst_traits = ViewTraits<DT, DP...>; static_assert(std::is_same<typename dst_traits::non_const_value_type, @@ -2579,10 +2677,10 @@ inline void deep_copy( if (dst.span_is_contiguous()) { Impl::contiguous_fill_or_memset(fill_exec_space(), dst, value); } else { - using ViewTypeUniform = typename std::conditional< + using ViewTypeUniform = std::conditional_t< View<DT, DP...>::Rank == 0, typename View<DT, DP...>::uniform_runtime_type, - typename View<DT, DP...>::uniform_runtime_nomemspace_type>::type; + typename View<DT, DP...>::uniform_runtime_nomemspace_type>; Kokkos::Impl::ViewFill<ViewTypeUniform, typename dst_traits::array_layout, fill_exec_space>(dst, value, fill_exec_space()); } @@ -2600,10 +2698,9 @@ inline void deep_copy( const ExecSpace& exec_space, typename ViewTraits<ST, SP...>::non_const_value_type& dst, const View<ST, SP...>& src, - typename std::enable_if< - Kokkos::is_execution_space<ExecSpace>::value && - std::is_same<typename ViewTraits<ST, SP...>::specialize, - void>::value>::type* = nullptr) { + std::enable_if_t<Kokkos::is_execution_space<ExecSpace>::value && + std::is_same<typename ViewTraits<ST, SP...>::specialize, + void>::value>* = nullptr) { using src_traits = ViewTraits<ST, SP...>; using src_memory_space = typename src_traits::memory_space; static_assert(src_traits::rank == 0, @@ -2638,13 +2735,12 @@ template <class ExecSpace, class DT, class... DP, class ST, class... SP> inline void deep_copy( const ExecSpace& exec_space, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<( - Kokkos::is_execution_space<ExecSpace>::value && - std::is_same<typename ViewTraits<DT, DP...>::specialize, void>::value && - std::is_same<typename ViewTraits<ST, SP...>::specialize, void>::value && - (unsigned(ViewTraits<DT, DP...>::rank) == unsigned(0) && - unsigned(ViewTraits<ST, SP...>::rank) == unsigned(0)))>::type* = - nullptr) { + std::enable_if_t< + (Kokkos::is_execution_space<ExecSpace>::value && + std::is_void<typename ViewTraits<DT, DP...>::specialize>::value && + std::is_void<typename ViewTraits<ST, SP...>::specialize>::value && + (unsigned(ViewTraits<DT, DP...>::rank) == unsigned(0) && + unsigned(ViewTraits<ST, SP...>::rank) == unsigned(0)))>* = nullptr) { using src_traits = ViewTraits<ST, SP...>; using dst_traits = ViewTraits<DT, DP...>; @@ -2689,12 +2785,12 @@ template <class ExecSpace, class DT, class... DP, class ST, class... SP> inline void deep_copy( const ExecSpace& exec_space, const View<DT, DP...>& dst, const View<ST, SP...>& src, - typename std::enable_if<( - Kokkos::is_execution_space<ExecSpace>::value && - std::is_same<typename ViewTraits<DT, DP...>::specialize, void>::value && - std::is_same<typename ViewTraits<ST, SP...>::specialize, void>::value && - (unsigned(ViewTraits<DT, DP...>::rank) != 0 || - unsigned(ViewTraits<ST, SP...>::rank) != 0))>::type* = nullptr) { + std::enable_if_t< + (Kokkos::is_execution_space<ExecSpace>::value && + std::is_void<typename ViewTraits<DT, DP...>::specialize>::value && + std::is_void<typename ViewTraits<ST, SP...>::specialize>::value && + (unsigned(ViewTraits<DT, DP...>::rank) != 0 || + unsigned(ViewTraits<ST, SP...>::rank) != 0))>* = nullptr) { using dst_type = View<DT, DP...>; using src_type = View<ST, SP...>; @@ -2855,8 +2951,8 @@ inline void deep_copy( Impl::view_copy(exec_space, dst, src); } else if (DstExecCanAccessSrc || SrcExecCanAccessDst) { using cpy_exec_space = - typename std::conditional<DstExecCanAccessSrc, dst_execution_space, - src_execution_space>::type; + std::conditional_t<DstExecCanAccessSrc, dst_execution_space, + src_execution_space>; exec_space.fence( "Kokkos::deep_copy: view-to-view noncontiguous copy on space, pre " "copy"); @@ -2900,19 +2996,30 @@ bool size_mismatch(const ViewType& view, unsigned int max_extent, /** \brief Resize a view with copying old data to new data at the corresponding * indices. */ -template <class... I, class T, class... P> +template <class T, class... P, class... ViewCtorArgs> inline typename std::enable_if< std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutRight>::value>::type -impl_resize(Kokkos::View<T, P...>& v, const size_t n0, const size_t n1, +impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::View<T, P...>& v, const size_t n0, const size_t n1, const size_t n2, const size_t n3, const size_t n4, const size_t n5, - const size_t n6, const size_t n7, const I&... arg_prop) { - using view_type = Kokkos::View<T, P...>; + const size_t n6, const size_t n7) { + using view_type = Kokkos::View<T, P...>; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; static_assert(Kokkos::ViewTraits<T, P...>::is_managed, "Can only resize managed views"); + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::resize " + "must not include a label!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::resize must " + "not include a pointer!"); + static_assert(!alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::resize must " + "not include a memory space instance!"); // TODO (mfh 27 Jun 2017) If the old View has enough space but just // different dimensions (e.g., if the product of the dimensions, @@ -2925,22 +3032,57 @@ impl_resize(Kokkos::View<T, P...>& v, const size_t n0, const size_t n1, const bool sizeMismatch = Impl::size_mismatch(v, v.rank_dynamic, new_extents); if (sizeMismatch) { - view_type v_resized(view_alloc(v.label(), arg_prop...), n0, n1, n2, n3, n4, - n5, n6, n7); - - Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v); - Kokkos::fence("Kokkos::resize(View)"); + // Add execution space here to avoid the need for if constexpr below + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., std::string, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 10>, + typename view_type::execution_space>>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + v.label(); + + view_type v_resized(prop_copy, n0, n1, n2, n3, n4, n5, n6, n7); + + if (alloc_prop_input::has_execution_space) + Kokkos::Impl::ViewRemap<view_type, view_type>( + v_resized, v, + static_cast<const Impl::ViewCtorProp< + void, typename alloc_prop::execution_space>&>(prop_copy) + .value); + else { + Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v); + Kokkos::fence("Kokkos::resize(View)"); + } v = v_resized; } } +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + std::is_same<typename Kokkos::View<T, P...>::array_layout, + Kokkos::LayoutLeft>::value || + std::is_same<typename Kokkos::View<T, P...>::array_layout, + Kokkos::LayoutRight>::value> +resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + impl_resize(arg_prop, v, n0, n1, n2, n3, n4, n5, n6, n7); +} + template <class T, class... P> -inline typename std::enable_if< +inline std::enable_if_t< std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename Kokkos::View<T, P...>::array_layout, - Kokkos::LayoutRight>::value>::type + Kokkos::LayoutRight>::value> resize(Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -2949,18 +3091,17 @@ resize(Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - impl_resize(v, n0, n1, n2, n3, n4, n5, n6, n7); + impl_resize(Impl::ViewCtorProp<>{}, v, n0, n1, n2, n3, n4, n5, n6, n7); } -/** \brief Resize a view with copying old data to new data at the corresponding - * indices. */ template <class I, class T, class... P> -inline typename std::enable_if< - Impl::is_view_ctor_property<I>::value && +inline std::enable_if_t< + (Impl::is_view_ctor_property<I>::value || + Kokkos::is_execution_space<I>::value) && (std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename Kokkos::View<T, P...>::array_layout, - Kokkos::LayoutRight>::value)>::type + Kokkos::LayoutRight>::value)> resize(const I& arg_prop, Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -2970,12 +3111,10 @@ resize(const I& arg_prop, Kokkos::View<T, P...>& v, const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - impl_resize(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); + impl_resize(Kokkos::view_alloc(arg_prop), v, n0, n1, n2, n3, n4, n5, n6, n7); } -/** \brief Resize a view with copying old data to new data at the corresponding - * indices. */ -template <class... I, class T, class... P> +template <class T, class... P, class... ViewCtorArgs> inline std::enable_if_t< std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || @@ -2984,19 +3123,47 @@ inline std::enable_if_t< std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutStride>::value || is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value> -impl_resize(Kokkos::View<T, P...>& v, - const typename Kokkos::View<T, P...>::array_layout& layout, - const I&... arg_prop) { - using view_type = Kokkos::View<T, P...>; +impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::View<T, P...>& v, + const typename Kokkos::View<T, P...>::array_layout& layout) { + using view_type = Kokkos::View<T, P...>; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; static_assert(Kokkos::ViewTraits<T, P...>::is_managed, "Can only resize managed views"); + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::resize " + "must not include a label!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::resize must " + "not include a pointer!"); + static_assert(!alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::resize must " + "not include a memory space instance!"); if (v.layout() != layout) { - view_type v_resized(view_alloc(v.label(), arg_prop...), layout); - - Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v); - Kokkos::fence("Kokkos::resize(View)"); + // Add execution space here to avoid the need for if constexpr below + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., std::string, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 10>, + typename view_type::execution_space>>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + v.label(); + + view_type v_resized(prop_copy, layout); + + if (alloc_prop::has_execution_space) + Kokkos::Impl::ViewRemap<view_type, view_type>( + v_resized, v, + static_cast<const Impl::ViewCtorProp< + void, typename alloc_prop::execution_space>&>(prop_copy) + .value); + else { + Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v); + Kokkos::fence("Kokkos::resize(View)"); + } v = v_resized; } @@ -3005,7 +3172,7 @@ impl_resize(Kokkos::View<T, P...>& v, // FIXME User-provided (custom) layouts are not required to have a comparison // operator. Hence, there is no way to check if the requested layout is actually // the same as the existing one. -template <class... I, class T, class... P> +template <class T, class... P, class... ViewCtorArgs> inline std::enable_if_t< !(std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || @@ -3014,68 +3181,156 @@ inline std::enable_if_t< std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutStride>::value || is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value)> -impl_resize(Kokkos::View<T, P...>& v, - const typename Kokkos::View<T, P...>::array_layout& layout, - const I&... arg_prop) { - using view_type = Kokkos::View<T, P...>; +impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::View<T, P...>& v, + const typename Kokkos::View<T, P...>::array_layout& layout) { + using view_type = Kokkos::View<T, P...>; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; static_assert(Kokkos::ViewTraits<T, P...>::is_managed, "Can only resize managed views"); - - view_type v_resized(view_alloc(v.label(), arg_prop...), layout); - - Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v); + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::resize " + "must not include a label!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::resize must " + "not include a pointer!"); + static_assert(!alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::resize must " + "not include a memory space instance!"); + + // Add execution space here to avoid the need for if constexpr below + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., std::string, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 10>, + typename view_type::execution_space>>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + v.label(); + + view_type v_resized(prop_copy, layout); + + if (alloc_prop::has_execution_space) + Kokkos::Impl::ViewRemap<view_type, view_type>( + v_resized, v, + static_cast<const Impl::ViewCtorProp< + void, typename alloc_prop::execution_space>&>(prop_copy) + .value); + else { + Kokkos::Impl::ViewRemap<view_type, view_type>(v_resized, v); + Kokkos::fence("Kokkos::resize(View)"); + } v = v_resized; } +template <class T, class... P, class... ViewCtorArgs> +inline void resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::View<T, P...>& v, + const typename Kokkos::View<T, P...>::array_layout& layout) { + impl_resize(arg_prop, v, layout); +} + template <class I, class T, class... P> -inline std::enable_if_t<Impl::is_view_ctor_property<I>::value> resize( - const I& arg_prop, Kokkos::View<T, P...>& v, - const typename Kokkos::View<T, P...>::array_layout& layout) { - impl_resize(v, layout, arg_prop); +inline std::enable_if_t<Impl::is_view_ctor_property<I>::value || + Kokkos::is_execution_space<I>::value> +resize(const I& arg_prop, Kokkos::View<T, P...>& v, + const typename Kokkos::View<T, P...>::array_layout& layout) { + impl_resize(arg_prop, v, layout); +} + +template <class ExecutionSpace, class T, class... P> +inline void resize(const ExecutionSpace& exec_space, Kokkos::View<T, P...>& v, + const typename Kokkos::View<T, P...>::array_layout& layout) { + impl_resize(Impl::ViewCtorProp<>(), exec_space, v, layout); } template <class T, class... P> inline void resize(Kokkos::View<T, P...>& v, const typename Kokkos::View<T, P...>::array_layout& layout) { - impl_resize(v, layout); + impl_resize(Impl::ViewCtorProp<>{}, v, layout); } /** \brief Resize a view with discarding old data. */ -template <class... I, class T, class... P> -inline typename std::enable_if< +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename Kokkos::View<T, P...>::array_layout, - Kokkos::LayoutRight>::value>::type + Kokkos::LayoutRight>::value> impl_realloc(Kokkos::View<T, P...>& v, const size_t n0, const size_t n1, const size_t n2, const size_t n3, const size_t n4, const size_t n5, - const size_t n6, const size_t n7, const I&... arg_prop) { - using view_type = Kokkos::View<T, P...>; + const size_t n6, const size_t n7, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using view_type = Kokkos::View<T, P...>; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; static_assert(Kokkos::ViewTraits<T, P...>::is_managed, "Can only realloc managed views"); + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a label!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a pointer!"); + static_assert(!alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a memory space instance!"); const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7}; const bool sizeMismatch = Impl::size_mismatch(v, v.rank_dynamic, new_extents); if (sizeMismatch) { - const std::string label = v.label(); - - v = view_type(); // Deallocate first, if the only view to allocation - v = view_type(view_alloc(label, arg_prop...), n0, n1, n2, n3, n4, n5, n6, - n7); - } else if (!Kokkos::Impl::has_type<Impl::WithoutInitializing_t, I...>::value) - Kokkos::deep_copy(v, typename view_type::value_type{}); + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop arg_prop_copy(arg_prop); + static_cast<Kokkos::Impl::ViewCtorProp<void, std::string>&>(arg_prop_copy) + .value = v.label(); + v = view_type(); // Best effort to deallocate in case no other view refers + // to the shared allocation + v = view_type(arg_prop_copy, n0, n1, n2, n3, n4, n5, n6, n7); + } else if (alloc_prop_input::initialize) { + if (alloc_prop_input::has_execution_space) { + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 2>, + typename view_type::execution_space>>; + alloc_prop arg_prop_copy(arg_prop); + auto const& exec_space = static_cast<Kokkos::Impl::ViewCtorProp< + void, typename alloc_prop::execution_space> const&>(arg_prop_copy) + .value; + Kokkos::deep_copy(exec_space, v, typename view_type::value_type{}); + } else + Kokkos::deep_copy(v, typename view_type::value_type{}); + } +} + +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< + std::is_same<typename Kokkos::View<T, P...>::array_layout, + Kokkos::LayoutLeft>::value || + std::is_same<typename Kokkos::View<T, P...>::array_layout, + Kokkos::LayoutRight>::value> +realloc(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::View<T, P...>& v, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); } template <class T, class... P> -inline typename std::enable_if< +inline std::enable_if_t< std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename Kokkos::View<T, P...>::array_layout, - Kokkos::LayoutRight>::value>::type + Kokkos::LayoutRight>::value> realloc(Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -3085,16 +3340,16 @@ realloc(Kokkos::View<T, P...>& v, const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7); + impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, Impl::ViewCtorProp<>{}); } template <class I, class T, class... P> -inline typename std::enable_if< +inline std::enable_if_t< Impl::is_view_ctor_property<I>::value && (std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename Kokkos::View<T, P...>::array_layout, - Kokkos::LayoutRight>::value)>::type + Kokkos::LayoutRight>::value)> realloc(const I& arg_prop, Kokkos::View<T, P...>& v, const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -3104,10 +3359,10 @@ realloc(const I& arg_prop, Kokkos::View<T, P...>& v, const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, arg_prop); + impl_realloc(v, n0, n1, n2, n3, n4, n5, n6, n7, Kokkos::view_alloc(arg_prop)); } -template <class... I, class T, class... P> +template <class T, class... P, class... ViewCtorArgs> inline std::enable_if_t< std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || @@ -3118,24 +3373,53 @@ inline std::enable_if_t< is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value> impl_realloc(Kokkos::View<T, P...>& v, const typename Kokkos::View<T, P...>::array_layout& layout, - const I&... arg_prop) { - using view_type = Kokkos::View<T, P...>; + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using view_type = Kokkos::View<T, P...>; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; static_assert(Kokkos::ViewTraits<T, P...>::is_managed, "Can only realloc managed views"); + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a label!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a pointer!"); + static_assert(!alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a memory space instance!"); if (v.layout() != layout) { - const std::string label = v.label(); - v = view_type(); // Deallocate first, if the only view to allocation - v = view_type(view_alloc(label, arg_prop...), layout); + v = view_type(arg_prop, layout); + } else if (alloc_prop_input::initialize) { + if (alloc_prop_input::has_execution_space) { + // Add execution_space if not provided to avoid need for if constexpr + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 2>, + typename view_type::execution_space>, + std::string>; + alloc_prop arg_prop_copy(arg_prop); + static_cast<Kokkos::Impl::ViewCtorProp<void, std::string>&>(arg_prop_copy) + .value = v.label(); + using execution_space_type = typename alloc_prop::execution_space; + const execution_space_type& exec_space = + static_cast< + Kokkos::Impl::ViewCtorProp<void, execution_space_type> const&>( + arg_prop_copy) + .value; + Kokkos::deep_copy(exec_space, v, typename view_type::value_type{}); + } else + Kokkos::deep_copy(v, typename view_type::value_type{}); } } // FIXME User-provided (custom) layouts are not required to have a comparison // operator. Hence, there is no way to check if the requested layout is actually // the same as the existing one. -template <class... I, class T, class... P> +template <class T, class... P, class... ViewCtorArgs> inline std::enable_if_t< !(std::is_same<typename Kokkos::View<T, P...>::array_layout, Kokkos::LayoutLeft>::value || @@ -3146,30 +3430,51 @@ inline std::enable_if_t< is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value)> impl_realloc(Kokkos::View<T, P...>& v, const typename Kokkos::View<T, P...>::array_layout& layout, - const I&... arg_prop) { - using view_type = Kokkos::View<T, P...>; + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using view_type = Kokkos::View<T, P...>; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; static_assert(Kokkos::ViewTraits<T, P...>::is_managed, "Can only realloc managed views"); - - const std::string label = v.label(); + static_assert(!alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a label!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a pointer!"); + static_assert(!alloc_prop_input::has_memory_space, + "The view constructor arguments passed to Kokkos::realloc must " + "not include a memory space instance!"); v = view_type(); // Deallocate first, if the only view to allocation - v = view_type(view_alloc(label, arg_prop...), layout); + + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop arg_prop_copy(arg_prop); + static_cast<Kokkos::Impl::ViewCtorProp<void, std::string>&>(arg_prop_copy) + .value = v.label(); + v = view_type(arg_prop_copy, layout); +} + +template <class T, class... P, class... ViewCtorArgs> +inline void realloc( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + Kokkos::View<T, P...>& v, + const typename Kokkos::View<T, P...>::array_layout& layout) { + impl_realloc(v, layout, arg_prop); } template <class I, class T, class... P> inline std::enable_if_t<Impl::is_view_ctor_property<I>::value> realloc( const I& arg_prop, Kokkos::View<T, P...>& v, const typename Kokkos::View<T, P...>::array_layout& layout) { - impl_realloc(v, layout, arg_prop); + impl_realloc(v, layout, Kokkos::view_alloc(arg_prop)); } template <class T, class... P> inline void realloc( Kokkos::View<T, P...>& v, const typename Kokkos::View<T, P...>::array_layout& layout) { - impl_realloc(v, layout); + impl_realloc(v, layout, Impl::ViewCtorProp<>{}); } } /* namespace Kokkos */ @@ -3201,8 +3506,8 @@ struct MirrorViewType { using dest_view_type = Kokkos::View<data_type, array_layout, Space>; // If it is the same memory_space return the existsing view_type // This will also keep the unmanaged trait if necessary - using view_type = typename std::conditional<is_same_memspace, src_view_type, - dest_view_type>::type; + using view_type = + std::conditional_t<is_same_memspace, src_view_type, dest_view_type>; }; template <class Space, class T, class... P> @@ -3225,18 +3530,38 @@ struct MirrorType { using view_type = Kokkos::View<data_type, array_layout, Space>; }; -template <class T, class... P, class... I> -inline typename std::enable_if< +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< !std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout, - Kokkos::LayoutStride>::value, - typename Kokkos::View<T, P...>::HostMirror>::type -create_mirror(const Kokkos::View<T, P...>& src, const I&... arg_prop) { - using src_type = View<T, P...>; - using dst_type = typename src_type::HostMirror; + Kokkos::LayoutStride>::value && + !Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space, + typename Kokkos::View<T, P...>::HostMirror> +create_mirror(const Kokkos::View<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using src_type = View<T, P...>; + using dst_type = typename src_type::HostMirror; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert( + !alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::create_mirror " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::allow_padding, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not explicitly allow padding!"); + + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + std::string(src.label()).append("_mirror"); return dst_type( - Kokkos::view_alloc(std::string(src.label()).append("_mirror"), - arg_prop...), + prop_copy, src.rank_dynamic > 0 ? src.extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, src.rank_dynamic > 1 ? src.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, src.rank_dynamic > 2 ? src.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -3247,14 +3572,30 @@ create_mirror(const Kokkos::View<T, P...>& src, const I&... arg_prop) { src.rank_dynamic > 7 ? src.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG); } -template <class T, class... P, class... I> -inline typename std::enable_if< +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout, - Kokkos::LayoutStride>::value, - typename Kokkos::View<T, P...>::HostMirror>::type -create_mirror(const Kokkos::View<T, P...>& src, const I&... arg_prop) { - using src_type = View<T, P...>; - using dst_type = typename src_type::HostMirror; + Kokkos::LayoutStride>::value && + !Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space, + typename Kokkos::View<T, P...>::HostMirror> +create_mirror(const Kokkos::View<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using src_type = View<T, P...>; + using dst_type = typename src_type::HostMirror; + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert( + !alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::create_mirror " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::allow_padding, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not explicitly allow padding!"); Kokkos::LayoutStride layout; @@ -3276,123 +3617,199 @@ create_mirror(const Kokkos::View<T, P...>& src, const I&... arg_prop) { layout.stride[6] = src.stride_6(); layout.stride[7] = src.stride_7(); - return dst_type(Kokkos::view_alloc(std::string(src.label()).append("_mirror"), - arg_prop...), - layout); + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + std::string(src.label()).append("_mirror"); + + return dst_type(prop_copy, layout); } // Create a mirror in a new space (specialization for different space) -template <class Space, class T, class... P, class... I> -typename Impl::MirrorType<Space, T, P...>::view_type create_mirror( - const Space&, const Kokkos::View<T, P...>& src, const I&... arg_prop) { - return typename Impl::MirrorType<Space, T, P...>::view_type( - Kokkos::view_alloc(src.label(), arg_prop...), src.layout()); +template <class T, class... P, class... ViewCtorArgs, + class Enable = std::enable_if_t< + Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>> +auto create_mirror(const Kokkos::View<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + + static_assert( + !alloc_prop_input::has_label, + "The view constructor arguments passed to Kokkos::create_mirror " + "must not include a label!"); + static_assert( + !alloc_prop_input::has_pointer, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not include a pointer!"); + static_assert( + !alloc_prop_input::allow_padding, + "The view constructor arguments passed to Kokkos::create_mirror must " + "not explicitly allow padding!"); + + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., std::string>; + alloc_prop prop_copy(arg_prop); + static_cast<Impl::ViewCtorProp<void, std::string>&>(prop_copy).value = + std::string(src.label()).append("_mirror"); + + return typename Impl::MirrorType<typename alloc_prop::memory_space, T, + P...>::view_type(prop_copy, src.layout()); } } // namespace Impl template <class T, class... P> -std::enable_if_t< - std::is_same<typename ViewTraits<T, P...>::specialize, void>::value, - typename Kokkos::View<T, P...>::HostMirror> +std::enable_if_t<std::is_void<typename ViewTraits<T, P...>::specialize>::value, + typename Kokkos::View<T, P...>::HostMirror> create_mirror(Kokkos::View<T, P...> const& v) { - return Impl::create_mirror(v); + return Impl::create_mirror(v, Impl::ViewCtorProp<>{}); } template <class T, class... P> -std::enable_if_t< - std::is_same<typename ViewTraits<T, P...>::specialize, void>::value, - typename Kokkos::View<T, P...>::HostMirror> +std::enable_if_t<std::is_void<typename ViewTraits<T, P...>::specialize>::value, + typename Kokkos::View<T, P...>::HostMirror> create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Kokkos::View<T, P...> const& v) { - return Impl::create_mirror(v, wi); + return Impl::create_mirror(v, view_alloc(wi)); } template <class Space, class T, class... P, typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> +std::enable_if_t<std::is_void<typename ViewTraits<T, P...>::specialize>::value, + typename Impl::MirrorType<Space, T, P...>::view_type> +create_mirror(Space const&, Kokkos::View<T, P...> const& v) { + return Impl::create_mirror(v, view_alloc(typename Space::memory_space{})); +} + +template <class T, class... P, class... ViewCtorArgs, + typename Enable = std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>> +auto create_mirror(Impl::ViewCtorProp<ViewCtorArgs...> const& arg_prop, + Kokkos::View<T, P...> const& v) { + return Impl::create_mirror(v, arg_prop); +} + +template <class T, class... P, class... ViewCtorArgs> std::enable_if_t< - std::is_same<typename ViewTraits<T, P...>::specialize, void>::value, - typename Impl::MirrorType<Space, T, P...>::view_type> -create_mirror(Space const& space, Kokkos::View<T, P...> const& v) { - return Impl::create_mirror(space, v); + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + !Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space, + typename Kokkos::View<T, P...>::HostMirror> +create_mirror(Impl::ViewCtorProp<ViewCtorArgs...> const& arg_prop, + Kokkos::View<T, P...> const& v) { + return Impl::create_mirror(v, arg_prop); } template <class Space, class T, class... P, typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> -std::enable_if_t< - std::is_same<typename ViewTraits<T, P...>::specialize, void>::value, - typename Impl::MirrorType<Space, T, P...>::view_type> -create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const& space, +std::enable_if_t<std::is_void<typename ViewTraits<T, P...>::specialize>::value, + typename Impl::MirrorType<Space, T, P...>::view_type> +create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const&, Kokkos::View<T, P...> const& v) { - return Impl::create_mirror(space, v, wi); + return Impl::create_mirror(v, view_alloc(typename Space::memory_space{}, wi)); } namespace Impl { -template <class T, class... P, class... I> -inline typename std::enable_if< +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< (std::is_same< typename Kokkos::View<T, P...>::memory_space, typename Kokkos::View<T, P...>::HostMirror::memory_space>::value && std::is_same< typename Kokkos::View<T, P...>::data_type, typename Kokkos::View<T, P...>::HostMirror::data_type>::value), - typename Kokkos::View<T, P...>::HostMirror>::type -create_mirror_view(const Kokkos::View<T, P...>& src, const I&...) { + typename Kokkos::View<T, P...>::HostMirror> +create_mirror_view(const Kokkos::View<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>&) { return src; } -template <class T, class... P, class... I> -inline typename std::enable_if< +template <class T, class... P, class... ViewCtorArgs> +inline std::enable_if_t< !(std::is_same< typename Kokkos::View<T, P...>::memory_space, typename Kokkos::View<T, P...>::HostMirror::memory_space>::value && std::is_same< typename Kokkos::View<T, P...>::data_type, typename Kokkos::View<T, P...>::HostMirror::data_type>::value), - typename Kokkos::View<T, P...>::HostMirror>::type -create_mirror_view(const Kokkos::View<T, P...>& src, const I&... arg_prop) { - return Kokkos::create_mirror(arg_prop..., src); + typename Kokkos::View<T, P...>::HostMirror> +create_mirror_view(const Kokkos::View<T, P...>& src, + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + return Kokkos::Impl::create_mirror(src, arg_prop); } // Create a mirror view in a new space (specialization for same space) -template <class Space, class T, class... P, class... I> -typename std::enable_if< - Impl::MirrorViewType<Space, T, P...>::is_same_memspace, - typename Impl::MirrorViewType<Space, T, P...>::view_type>::type +template <class Space, class T, class... P, class... ViewCtorArgs> +std::enable_if_t<Impl::MirrorViewType<Space, T, P...>::is_same_memspace, + typename Impl::MirrorViewType<Space, T, P...>::view_type> create_mirror_view(const Space&, const Kokkos::View<T, P...>& src, - const I&...) { + const Impl::ViewCtorProp<ViewCtorArgs...>&) { return src; } // Create a mirror view in a new space (specialization for different space) -template <class Space, class T, class... P, class... I> -typename std::enable_if< - !Impl::MirrorViewType<Space, T, P...>::is_same_memspace, - typename Impl::MirrorViewType<Space, T, P...>::view_type>::type +template <class Space, class T, class... P, class... ViewCtorArgs> +std::enable_if_t<!Impl::MirrorViewType<Space, T, P...>::is_same_memspace, + typename Impl::MirrorViewType<Space, T, P...>::view_type> create_mirror_view(const Space&, const Kokkos::View<T, P...>& src, - const I&... arg_prop) { - return typename Impl::MirrorViewType<Space, T, P...>::view_type( - Kokkos::view_alloc(src.label(), arg_prop...), src.layout()); + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { + using MemorySpace = typename Space::memory_space; + using alloc_prop = Impl::ViewCtorProp<ViewCtorArgs..., MemorySpace>; + alloc_prop prop_copy(arg_prop); + + return Kokkos::Impl::create_mirror(src, prop_copy); } } // namespace Impl template <class T, class... P> -typename Kokkos::View<T, P...>::HostMirror create_mirror_view( - Kokkos::View<T, P...> const& v) { - return Impl::create_mirror_view(v); +std::enable_if_t< + std::is_same< + typename Kokkos::View<T, P...>::memory_space, + typename Kokkos::View<T, P...>::HostMirror::memory_space>::value && + std::is_same< + typename Kokkos::View<T, P...>::data_type, + typename Kokkos::View<T, P...>::HostMirror::data_type>::value, + typename Kokkos::View<T, P...>::HostMirror> +create_mirror_view(const Kokkos::View<T, P...>& src) { + return src; +} + +template <class T, class... P> +std::enable_if_t< + !(std::is_same< + typename Kokkos::View<T, P...>::memory_space, + typename Kokkos::View<T, P...>::HostMirror::memory_space>::value && + std::is_same< + typename Kokkos::View<T, P...>::data_type, + typename Kokkos::View<T, P...>::HostMirror::data_type>::value), + typename Kokkos::View<T, P...>::HostMirror> +create_mirror_view(const Kokkos::View<T, P...>& src) { + return Kokkos::create_mirror(src); } template <class T, class... P> typename Kokkos::View<T, P...>::HostMirror create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, Kokkos::View<T, P...> const& v) { - return Impl::create_mirror_view(v, wi); + return Impl::create_mirror_view(v, view_alloc(wi)); } +// FIXME_C++17 Improve SFINAE here. template <class Space, class T, class... P, - typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> + class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> +typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view( + const Space&, const Kokkos::View<T, P...>& src, + std::enable_if_t<Impl::MirrorViewType<Space, T, P...>::is_same_memspace>* = + nullptr) { + return src; +} + +// FIXME_C++17 Improve SFINAE here. +template <class Space, class T, class... P, + class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view( - Space const& space, Kokkos::View<T, P...> const& v) { - return Impl::create_mirror_view(space, v); + const Space& space, const Kokkos::View<T, P...>& src, + std::enable_if_t<!Impl::MirrorViewType<Space, T, P...>::is_same_memspace>* = + nullptr) { + return Kokkos::create_mirror(space, src); } template <class Space, class T, class... P, @@ -3400,43 +3817,112 @@ template <class Space, class T, class... P, typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, Space const& space, Kokkos::View<T, P...> const& v) { - return Impl::create_mirror_view(space, v, wi); + return Impl::create_mirror_view(space, v, view_alloc(wi)); +} + +template <class T, class... P, class... ViewCtorArgs> +auto create_mirror_view(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::View<T, P...>& v) { + return Impl::create_mirror_view(v, arg_prop); +} + +template <class... ViewCtorArgs, class T, class... P> +auto create_mirror_view_and_copy( + const Impl::ViewCtorProp<ViewCtorArgs...>&, + const Kokkos::View<T, P...>& src, + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + Impl::MirrorViewType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::is_same_memspace>* = nullptr) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + static_assert( + alloc_prop_input::has_memory_space, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must include a memory space!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not include a pointer!"); + static_assert(!alloc_prop_input::allow_padding, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not explicitly allow padding!"); + + // same behavior as deep_copy(src, src) + if (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src view"); + return src; } -// Create a mirror view and deep_copy in a new space (specialization for same -// space) -template <class Space, class T, class... P> -typename Impl::MirrorViewType<Space, T, P...>::view_type -create_mirror_view_and_copy( - const Space&, const Kokkos::View<T, P...>& src, - std::string const& name = "", - typename std::enable_if< - std::is_same<typename ViewTraits<T, P...>::specialize, void>::value && - Impl::MirrorViewType<Space, T, P...>::is_same_memspace>::type* = - nullptr) { - (void)name; - fence( - "Kokkos::create_mirror_view_and_copy: fence before returning src view"); // same behavior as deep_copy(src, src) - return src; +template <class... ViewCtorArgs, class T, class... P> +auto create_mirror_view_and_copy( + const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, + const Kokkos::View<T, P...>& src, + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value && + !Impl::MirrorViewType< + typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, + P...>::is_same_memspace>* = nullptr) { + using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; + static_assert( + alloc_prop_input::has_memory_space, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must include a memory space!"); + static_assert(!alloc_prop_input::has_pointer, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not include a pointer!"); + static_assert(!alloc_prop_input::allow_padding, + "The view constructor arguments passed to " + "Kokkos::create_mirror_view_and_copy must " + "not explicitly allow padding!"); + using Space = typename alloc_prop_input::memory_space; + using Mirror = typename Impl::MirrorViewType<Space, T, P...>::view_type; + + // Add some properties if not provided to avoid need for if constexpr + using alloc_prop = Impl::ViewCtorProp< + ViewCtorArgs..., + std::conditional_t<alloc_prop_input::has_label, + std::integral_constant<unsigned int, 12>, std::string>, + std::conditional_t<!alloc_prop_input::initialize, + std::integral_constant<unsigned int, 13>, + Impl::WithoutInitializing_t>, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 14>, + typename Space::execution_space>>; + alloc_prop arg_prop_copy(arg_prop); + + std::string& label = + static_cast<Impl::ViewCtorProp<void, std::string>&>(arg_prop_copy).value; + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type{arg_prop_copy, src.layout()}; + if (alloc_prop_input::has_execution_space) { + using ExecutionSpace = typename alloc_prop::execution_space; + deep_copy( + static_cast<Impl::ViewCtorProp<void, ExecutionSpace>&>(arg_prop_copy) + .value, + mirror, src); + } else + deep_copy(mirror, src); + return mirror; } -// Create a mirror view and deep_copy in a new space (specialization for -// different space) -template <class Space, class T, class... P> +// Previously when using auto here, the intel compiler 19.3 would +// sometimes not create a symbol, guessing that it somehow is a combination +// of auto and just forwarding arguments (see issue #5196) +template <class Space, class T, class... P, + typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view_and_copy( const Space&, const Kokkos::View<T, P...>& src, std::string const& name = "", - typename std::enable_if< - std::is_same<typename ViewTraits<T, P...>::specialize, void>::value && - !Impl::MirrorViewType<Space, T, P...>::is_same_memspace>::type* = + std::enable_if_t< + std::is_void<typename ViewTraits<T, P...>::specialize>::value>* = nullptr) { - using Mirror = typename Impl::MirrorViewType<Space, T, P...>::view_type; - std::string label = name.empty() ? src.label() : name; - auto mirror = typename Mirror::non_const_type{ - view_alloc(WithoutInitializing, label), src.layout()}; - deep_copy(mirror, src); - return mirror; + return create_mirror_view_and_copy( + Kokkos::view_alloc(typename Space::memory_space{}, name), src); } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 @@ -3448,8 +3934,7 @@ KOKKOS_DEPRECATED_WITH_COMMENT( typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view( const Space&, const Kokkos::View<T, P...>& src, Kokkos::Impl::WithoutInitializing_t, - typename std::enable_if< - Impl::MirrorViewType<Space, T, P...>::is_same_memspace>::type* = + std::enable_if_t<Impl::MirrorViewType<Space, T, P...>::is_same_memspace>* = nullptr) { return src; } @@ -3462,8 +3947,7 @@ KOKKOS_DEPRECATED_WITH_COMMENT( typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view( const Space&, const Kokkos::View<T, P...>& src, Kokkos::Impl::WithoutInitializing_t, - typename std::enable_if< - !Impl::MirrorViewType<Space, T, P...>::is_same_memspace>::type* = + std::enable_if_t<!Impl::MirrorViewType<Space, T, P...>::is_same_memspace>* = nullptr) { using Mirror = typename Impl::MirrorViewType<Space, T, P...>::view_type; return Mirror(view_alloc(WithoutInitializing, src.label()), src.layout()); diff --git a/packages/kokkos/core/src/Kokkos_Core.hpp b/packages/kokkos/core/src/Kokkos_Core.hpp index 232873d3f780777c1e25f2ba1430d8f4f3d7661d..3a9aaafbc300242efe20e7edb7b5cb51b0e5db45 100644 --- a/packages/kokkos/core/src/Kokkos_Core.hpp +++ b/packages/kokkos/core/src/Kokkos_Core.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_CORE_HPP #define KOKKOS_CORE_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_CORE +#endif //---------------------------------------------------------------------------- // Include the execution space header files for the enabled execution spaces. @@ -71,9 +75,9 @@ #include <Kokkos_TaskScheduler.hpp> #include <Kokkos_Complex.hpp> #include <Kokkos_CopyViews.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> #include <functional> #include <iosfwd> -#include <map> #include <memory> #include <vector> @@ -81,87 +85,16 @@ namespace Kokkos { -struct InitArguments { - int num_threads; - int num_numa; - int device_id; - int ndevices; - int skip_device; - bool disable_warnings; - bool tune_internals; - bool tool_help = false; - std::string tool_lib = {}; - std::string tool_args = {}; - - InitArguments(int nt = -1, int nn = -1, int dv = -1, bool dw = false, - bool ti = false) - : num_threads{nt}, - num_numa{nn}, - device_id{dv}, - ndevices{-1}, - skip_device{9999}, - disable_warnings{dw}, - tune_internals{ti} {} - Tools::InitArguments impl_get_tools_init_arguments() const { - Tools::InitArguments init_tools; - init_tools.tune_internals = - tune_internals ? Tools::InitArguments::PossiblyUnsetOption::on - : Tools::InitArguments::PossiblyUnsetOption::unset; - init_tools.help = tool_help - ? Tools::InitArguments::PossiblyUnsetOption::on - : Tools::InitArguments::PossiblyUnsetOption::unset; - init_tools.lib = tool_lib.empty() - ? Kokkos::Tools::InitArguments::unset_string_option - : tool_lib; - init_tools.args = tool_args.empty() - ? Kokkos::Tools::InitArguments::unset_string_option - : tool_args; - return init_tools; - } -}; - -namespace Impl { +void initialize(int& argc, char* argv[]); -/* ExecSpaceManager - Responsible for initializing all of the registered - * backends. Backends are registered using the register_space_initializer() - * function which should be called from a global context so that it is called - * prior to initialize_spaces() which is called from Kokkos::initialize() - */ -class ExecSpaceManager { - std::map<std::string, std::unique_ptr<ExecSpaceInitializerBase>> - exec_space_factory_list; - - public: - ExecSpaceManager() = default; - - void register_space_factory(std::string name, - std::unique_ptr<ExecSpaceInitializerBase> ptr); - void initialize_spaces(const Kokkos::InitArguments& args); - void finalize_spaces(const bool all_spaces); - void static_fence(); - void static_fence(const std::string&); - void print_configuration(std::ostream& msg, const bool detail); - static ExecSpaceManager& get_instance(); -}; - -template <class SpaceInitializerType> -int initialize_space_factory(std::string name) { - auto space_ptr = std::make_unique<SpaceInitializerType>(); - ExecSpaceManager::get_instance().register_space_factory(name, - std::move(space_ptr)); - return 1; -} - -} // namespace Impl -void initialize(int& narg, char* arg[]); - -void initialize(InitArguments args = InitArguments()); +void initialize( + InitializationSettings const& settings = InitializationSettings()); namespace Impl { -void pre_initialize(const InitArguments& args); +void pre_initialize(const InitializationSettings& settings); -void post_initialize(const InitArguments& args); +void post_initialize(const InitializationSettings& settings); void declare_configuration_metadata(const std::string& category, const std::string& key, @@ -169,7 +102,8 @@ void declare_configuration_metadata(const std::string& category, } // namespace Impl -bool is_initialized() noexcept; +KOKKOS_ATTRIBUTE_NODISCARD bool is_initialized() noexcept; +KOKKOS_ATTRIBUTE_NODISCARD bool is_finalized() noexcept; bool show_warnings() noexcept; bool tune_internals() noexcept; @@ -199,14 +133,13 @@ void finalize(); */ void push_finalize_hook(std::function<void()> f); +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 /** \brief Finalize all known execution spaces */ -void finalize_all(); - -void fence(); -void fence(const std::string&); +KOKKOS_DEPRECATED void finalize_all(); +#endif /** \brief Print "Bill of Materials" */ -void print_configuration(std::ostream&, const bool detail = false); +void print_configuration(std::ostream& os, bool verbose = false); } // namespace Kokkos @@ -219,7 +152,7 @@ namespace Kokkos { * The allocation is tracked in Kokkos memory tracking system, so * leaked memory can be identified. */ -template <class Space = typename Kokkos::DefaultExecutionSpace::memory_space> +template <class Space = Kokkos::DefaultExecutionSpace::memory_space> inline void* kokkos_malloc(const std::string& arg_alloc_label, const size_t arg_alloc_size) { using MemorySpace = typename Space::memory_space; @@ -227,21 +160,21 @@ inline void* kokkos_malloc(const std::string& arg_alloc_label, MemorySpace(), arg_alloc_label, arg_alloc_size); } -template <class Space = typename Kokkos::DefaultExecutionSpace::memory_space> +template <class Space = Kokkos::DefaultExecutionSpace::memory_space> inline void* kokkos_malloc(const size_t arg_alloc_size) { using MemorySpace = typename Space::memory_space; return Impl::SharedAllocationRecord<MemorySpace>::allocate_tracked( MemorySpace(), "no-label", arg_alloc_size); } -template <class Space = typename Kokkos::DefaultExecutionSpace::memory_space> +template <class Space = Kokkos::DefaultExecutionSpace::memory_space> inline void kokkos_free(void* arg_alloc) { using MemorySpace = typename Space::memory_space; return Impl::SharedAllocationRecord<MemorySpace>::deallocate_tracked( arg_alloc); } -template <class Space = typename Kokkos::DefaultExecutionSpace::memory_space> +template <class Space = Kokkos::DefaultExecutionSpace::memory_space> inline void* kokkos_realloc(void* arg_alloc, const size_t arg_alloc_size) { using MemorySpace = typename Space::memory_space; return Impl::SharedAllocationRecord<MemorySpace>::reallocate_tracked( @@ -260,37 +193,153 @@ namespace Kokkos { * if Kokkos::is_initialized() in the constructor, don't call * Kokkos::initialize or Kokkos::finalize it is not copyable or assignable */ +namespace Impl { + +inline std::string scopeguard_correct_usage() { + return std::string( + "Do instead:\n" + " std::unique_ptr<Kokkos::ScopeGuard> guard =\n" + " !Kokkos::is_initialized() && !Kokkos::is_finalized()?\n" + " new ScopeGuard(argc,argv) : nullptr;\n"); +} + +inline std::string scopeguard_create_while_initialized_warning() { + return std::string( + "Kokkos Error: Creating a ScopeGuard while Kokkos is initialized " + "is illegal.\n") + .append(scopeguard_correct_usage()); +} + +inline std::string scopeguard_create_after_finalize_warning() { + return std::string( + "Kokkos Error: Creating a ScopeGuard after Kokkos was finalized " + "is illegal.\n") + .append(scopeguard_correct_usage()); +} -class ScopeGuard { +inline std::string scopeguard_destruct_after_finalize_warning() { + return std::string( + "Kokkos Error: Destroying a ScopeGuard after Kokkos was finalized " + "is illegal.\n") + .append(scopeguard_correct_usage()); +} + +} // namespace Impl + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +class KOKKOS_ATTRIBUTE_NODISCARD ScopeGuard { public: - ScopeGuard(int& narg, char* arg[]) { +#if defined(__has_cpp_attribute) && __has_cpp_attribute(nodiscard) >= 201907 + KOKKOS_ATTRIBUTE_NODISCARD +#endif + ScopeGuard(int& argc, char* argv[]) { sg_init = false; - if (!Kokkos::is_initialized()) { - initialize(narg, arg); +#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS + if (is_initialized()) { + std::cerr << Impl::scopeguard_create_while_initialized_warning() + << std::endl; + } + if (is_finalized()) { + std::cerr << Impl::scopeguard_create_after_finalize_warning() + << std::endl; + } +#endif + if (!is_initialized()) { + initialize(argc, argv); sg_init = true; } } - ScopeGuard(const InitArguments& args = InitArguments()) { +#if defined(__has_cpp_attribute) && __has_cpp_attribute(nodiscard) >= 201907 + KOKKOS_ATTRIBUTE_NODISCARD +#endif + explicit ScopeGuard( + const InitializationSettings& settings = InitializationSettings()) { sg_init = false; - if (!Kokkos::is_initialized()) { - initialize(args); +#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS + if (is_initialized()) { + std::cerr << Impl::scopeguard_create_while_initialized_warning() + << std::endl; + } + if (is_finalized()) { + std::cerr << Impl::scopeguard_create_after_finalize_warning() + << std::endl; + } +#endif + if (!is_initialized()) { + initialize(settings); sg_init = true; } } ~ScopeGuard() { - if (Kokkos::is_initialized() && sg_init) { +#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS + if (is_finalized()) { + std::cerr << Impl::scopeguard_destruct_after_finalize_warning() + << std::endl; + } +#endif + if (is_initialized() && sg_init) { finalize(); } } - // private: + private: bool sg_init; + public: + ScopeGuard& operator=(const ScopeGuard&) = delete; + ScopeGuard& operator=(ScopeGuard&&) = delete; + ScopeGuard(const ScopeGuard&) = delete; + ScopeGuard(ScopeGuard&&) = delete; +}; + +#else // ifndef KOKKOS_ENABLE_DEPRECATED_CODE3 + +class KOKKOS_ATTRIBUTE_NODISCARD ScopeGuard { + public: +#if defined(__has_cpp_attribute) && __has_cpp_attribute(nodiscard) >= 201907 + KOKKOS_ATTRIBUTE_NODISCARD +#endif + ScopeGuard(int& argc, char* argv[]) { + if (is_initialized()) { + Kokkos::abort( + Impl::scopeguard_create_while_initialized_warning().c_str()); + } + if (is_finalized()) { + Kokkos::abort(Impl::scopeguard_create_after_finalize_warning().c_str()); + } + initialize(argc, argv); + } + +#if defined(__has_cpp_attribute) && __has_cpp_attribute(nodiscard) >= 201907 + KOKKOS_ATTRIBUTE_NODISCARD +#endif + ScopeGuard( + const InitializationSettings& settings = InitializationSettings()) { + if (is_initialized()) { + Kokkos::abort( + Impl::scopeguard_create_while_initialized_warning().c_str()); + } + if (is_finalized()) { + Kokkos::abort(Impl::scopeguard_create_after_finalize_warning().c_str()); + } + initialize(settings); + } + + ~ScopeGuard() { + if (is_finalized()) { + Kokkos::abort(Impl::scopeguard_destruct_after_finalize_warning().c_str()); + } + finalize(); + } + ScopeGuard& operator=(const ScopeGuard&) = delete; - ScopeGuard(const ScopeGuard&) = delete; + ScopeGuard& operator=(ScopeGuard&&) = delete; + ScopeGuard(const ScopeGuard&) = delete; + ScopeGuard(ScopeGuard&&) = delete; }; +#endif } // namespace Kokkos @@ -343,9 +392,14 @@ std::vector<ExecSpace> partition_space(ExecSpace space, // implementation of the RAII wrapper is using Kokkos::single. #include <Kokkos_AcquireUniqueTokenImpl.hpp> -// Specializations requires after core definitions +// Specializations required after core definitions #include <KokkosCore_Config_PostInclude.hpp> + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_CORE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_CORE +#endif #endif diff --git a/packages/kokkos/core/src/Kokkos_Core_fwd.hpp b/packages/kokkos/core/src/Kokkos_Core_fwd.hpp index d04e6a75c7240e5d8351ca7aa74d544fa10a5422..2bb323b4a6a9f10511ebc47f271d0bee6cbc9c55 100644 --- a/packages/kokkos/core/src/Kokkos_Core_fwd.hpp +++ b/packages/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_CORE_FWD_HPP #define KOKKOS_CORE_FWD_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_CORE_FWD +#endif //---------------------------------------------------------------------------- // Kokkos_Macros.hpp does introspection on configuration options @@ -94,7 +98,10 @@ template <class ExecutionSpace, class MemorySpace> struct Device; // forward declare here so that backend initializer calls can use it. +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 struct InitArguments; +#endif +class InitializationSettings; } // namespace Kokkos @@ -132,6 +139,9 @@ using DefaultExecutionSpace KOKKOS_IMPL_DEFAULT_EXEC_SPACE_ANNOTATION = #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SYCL) using DefaultExecutionSpace KOKKOS_IMPL_DEFAULT_EXEC_SPACE_ANNOTATION = Experimental::SYCL; +#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENACC) +using DefaultExecutionSpace KOKKOS_IMPL_DEFAULT_EXEC_SPACE_ANNOTATION = + Experimental::OpenACC; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP) using DefaultExecutionSpace KOKKOS_IMPL_DEFAULT_EXEC_SPACE_ANNOTATION = OpenMP; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS) @@ -143,7 +153,7 @@ using DefaultExecutionSpace KOKKOS_IMPL_DEFAULT_EXEC_SPACE_ANNOTATION = using DefaultExecutionSpace KOKKOS_IMPL_DEFAULT_EXEC_SPACE_ANNOTATION = Serial; #else #error \ - "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::Experimental::HIP, Kokkos::Experimental::SYCL, Kokkos::Experimental::OpenMPTarget, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Experimental::HPX, or Kokkos::Serial." + "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::Experimental::HIP, Kokkos::Experimental::SYCL, Kokkos::Experimental::OpenMPTarget, Kokkos::Experimental::OpenACC, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Experimental::HPX, or Kokkos::Serial." #endif #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP) @@ -199,7 +209,7 @@ struct RuntimeCheckMemoryAccessViolation { // explicit specialization: memory access violation will occur, call abort with // the specified error message. template <class MemorySpace, class AccessSpace> -struct RuntimeCheckMemoryAccessViolation<AccessSpace, MemorySpace, false> { +struct RuntimeCheckMemoryAccessViolation<MemorySpace, AccessSpace, false> { KOKKOS_FUNCTION RuntimeCheckMemoryAccessViolation(char const *const msg) { Kokkos::abort(msg); } @@ -267,9 +277,6 @@ struct verify_space<DstMemorySpace, SrcMemorySpace, false> { }; #endif -// Base class for exec space initializer factories -class ExecSpaceInitializerBase; - } // namespace Impl namespace Experimental { @@ -292,8 +299,15 @@ class LogicalMemorySpace; //---------------------------------------------------------------------------- namespace Kokkos { +// Getting ICE in Trilinos in Sacado and Intrepid in deep_copy +// See issue https://github.com/kokkos/kokkos/issues/5290 +// Simply taking string by value did not resolve the issue +#ifdef KOKKOS_COMPILER_INTEL void fence(); -void fence(const std::string &); +void fence(const std::string &name); +#else +void fence(const std::string &name = "Kokkos::fence: Unnamed Global Fence"); +#endif } // namespace Kokkos //---------------------------------------------------------------------------- @@ -354,12 +368,12 @@ class ParallelReduce; /// skip this and go directly to the documentation of the nonmember /// template function Kokkos::parallel_scan. template <class FunctorType, class ExecPolicy, - class ExecutionSapce = typename Impl::FunctorPolicyExecutionSpace< + class ExecutionSpace = typename Impl::FunctorPolicyExecutionSpace< FunctorType, ExecPolicy>::execution_space> class ParallelScan; template <class FunctorType, class ExecPolicy, class ReturnType = InvalidType, - class ExecutionSapce = typename Impl::FunctorPolicyExecutionSpace< + class ExecutionSpace = typename Impl::FunctorPolicyExecutionSpace< FunctorType, ExecPolicy>::execution_space> class ParallelScanWithTotal; @@ -418,4 +432,8 @@ template <class Index, class Space = HostSpace> struct StdPartitionPoint; } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_CORE_FWD +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_CORE_FWD +#endif #endif /* #ifndef KOKKOS_CORE_FWD_HPP */ diff --git a/packages/kokkos/core/src/Kokkos_Crs.hpp b/packages/kokkos/core/src/Kokkos_Crs.hpp index 0657146bbd058aa1f80a49f7e33b1b78957f0d33..9c0d1f68212ecea38797f832b65dae8b9175ba69 100644 --- a/packages/kokkos/core/src/Kokkos_Crs.hpp +++ b/packages/kokkos/core/src/Kokkos_Crs.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_CRS_HPP #define KOKKOS_CRS_HPP @@ -213,8 +222,7 @@ class CrsRowMapFromCounts { KOKKOS_INLINE_FUNCTION void init(value_type& update) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& update, - const volatile value_type& input) const { + void join(value_type& update, const value_type& input) const { update += input; } using self_type = CrsRowMapFromCounts<InCounts, OutRowMap>; diff --git a/packages/kokkos/core/src/Kokkos_Cuda.hpp b/packages/kokkos/core/src/Kokkos_Cuda.hpp index 0063b1cd1ee865f38bb556ff9653688e575fc546..72a00f41b7a239dd4af09f736815978284a13c5e 100644 --- a/packages/kokkos/core/src/Kokkos_Cuda.hpp +++ b/packages/kokkos/core/src/Kokkos_Cuda.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_CUDA_HPP #define KOKKOS_CUDA_HPP @@ -62,8 +71,8 @@ #include <Kokkos_Layout.hpp> #include <Kokkos_ScratchSpace.hpp> #include <Kokkos_MemoryTraits.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> #include <impl/Kokkos_HostSharedPtr.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> /*--------------------------------------------------------------------------*/ @@ -183,17 +192,16 @@ class Cuda { /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. - static void impl_static_fence(); - static void impl_static_fence(const std::string&); + static void impl_static_fence(const std::string& name); - void fence() const; - void fence(const std::string&) const; + void fence(const std::string& name = + "Kokkos::Cuda::fence(): Unnamed Instance Fence") const; /** \brief Return the maximum amount of concurrency. */ static int concurrency(); //! Print configuration information to the given output stream. - static void print_configuration(std::ostream&, const bool detail = false); + void print_configuration(std::ostream& os, bool verbose = false) const; //@} //-------------------------------------------------- @@ -204,15 +212,6 @@ class Cuda { Cuda(cudaStream_t stream, bool manage_stream = false); //-------------------------------------------------------------------------- - //! \name Device-specific functions - //@{ - - struct SelectDevice { - int cuda_device_id; - SelectDevice() : cuda_device_id(0) {} - explicit SelectDevice(int id) : cuda_device_id(id) {} - }; - //! Free any resources being consumed by the device. static void impl_finalize(); @@ -220,8 +219,7 @@ class Cuda { static int impl_is_initialized(); //! Initialize, telling the CUDA run-time library which device to use. - static void impl_initialize(const SelectDevice = SelectDevice(), - const size_t num_instances = 1); + static void impl_initialize(InitializationSettings const&); /// \brief Cuda device architecture of the selected device. /// @@ -267,17 +265,6 @@ struct DeviceTypeTraits<Cuda> { namespace Impl { -class CudaSpaceInitializer : public ExecSpaceInitializerBase { - public: - CudaSpaceInitializer() = default; - ~CudaSpaceInitializer() = default; - void initialize(const InitArguments& args) final; - void finalize(const bool all_spaces) final; - void fence() final; - void fence(const std::string&) final; - void print_configuration(std::ostream& msg, const bool detail) final; -}; - template <class DT, class... DP> struct ZeroMemset<Kokkos::Cuda, DT, DP...> { ZeroMemset(const Kokkos::Cuda& exec_space_instance, diff --git a/packages/kokkos/core/src/Kokkos_CudaSpace.hpp b/packages/kokkos/core/src/Kokkos_CudaSpace.hpp index 910a8b2d7470b65b66d397a2507eb96723be447c..7ec78c0211c3af1dd32615901af643d066c8cde2 100644 --- a/packages/kokkos/core/src/Kokkos_CudaSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_CudaSpace.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_CUDASPACE_HPP #define KOKKOS_CUDASPACE_HPP @@ -98,6 +107,10 @@ class CudaSpace { ~CudaSpace() = default; /**\brief Allocate untracked memory in the cuda space */ + void* allocate(const Cuda& exec_space, const size_t arg_alloc_size) const; + void* allocate(const Cuda& exec_space, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; void* allocate(const size_t arg_alloc_size) const; void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; @@ -111,6 +124,11 @@ class CudaSpace { private: template <class, class, class, class> friend class Kokkos::Experimental::LogicalMemorySpace; + void* impl_allocate(const Cuda& exec_space, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; void* impl_allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0, const Kokkos::Tools::SpaceHandle = @@ -574,11 +592,50 @@ class SharedAllocationRecord<Kokkos::CudaSpace, void> ~SharedAllocationRecord(); SharedAllocationRecord() = default; + // This constructor does not forward to the one without exec_space arg + // in order to work around https://github.com/kokkos/kokkos/issues/5258 + // This constructor is templated so I can't just put it into the cpp file + // like the other constructor. + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, const Kokkos::CudaSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record, +#endif + Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_tex_obj(0), + m_space(arg_space) { + + SharedAllocationHeader header; + + this->base_t::_fill_host_accessible_header_info(header, arg_label); + + // Copy to device memory + // workaround for issue with NVCC and MSVC + // https://github.com/kokkos/kokkos/issues/5258 + deep_copy_header_no_exec(RecordBase::m_alloc_ptr, &header); + } + + SharedAllocationRecord( + const Kokkos::Cuda& exec_space, const Kokkos::CudaSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate); + SharedAllocationRecord( const Kokkos::CudaSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, const RecordBase::function_type arg_dealloc = &base_t::deallocate); + // helper function to work around MSVC+NVCC issue + // https://github.com/kokkos/kokkos/issues/5258 + static void deep_copy_header_no_exec(void*, const void*); + public: template <typename AliasType> inline ::cudaTextureObject_t attach_texture_object() { @@ -625,6 +682,30 @@ class SharedAllocationRecord<Kokkos::CudaUVMSpace, void> ~SharedAllocationRecord(); SharedAllocationRecord() = default; + // This constructor does not forward to the one without exec_space arg + // in order to work around https://github.com/kokkos/kokkos/issues/5258 + // This constructor is templated so I can't just put it into the cpp file + // like the other constructor. + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, + const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label, + const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record, +#endif + Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_tex_obj(0), + m_space(arg_space) { + this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr, + arg_label); + } + SharedAllocationRecord( const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, @@ -676,10 +757,34 @@ class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void> ~SharedAllocationRecord(); SharedAllocationRecord() = default; + // This constructor does not forward to the one without exec_space arg + // in order to work around https://github.com/kokkos/kokkos/issues/5258 + // This constructor is templated so I can't just put it into the cpp file + // like the other constructor. + template <typename ExecutionSpace> SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, const Kokkos::CudaHostPinnedSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, - const RecordBase::function_type arg_dealloc = &deallocate); + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, + void>::s_root_record, +#endif + Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_space(arg_space) { + this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr, + arg_label); + } + + SharedAllocationRecord( + const Kokkos::CudaHostPinnedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate); }; } // namespace Impl diff --git a/packages/kokkos/core/src/Kokkos_DetectionIdiom.hpp b/packages/kokkos/core/src/Kokkos_DetectionIdiom.hpp index 9e060b343eb77ffd9783b2449def926704032334..d45693819147e2eafdb04e80cd90aec08b4d9cc2 100644 --- a/packages/kokkos/core/src/Kokkos_DetectionIdiom.hpp +++ b/packages/kokkos/core/src/Kokkos_DetectionIdiom.hpp @@ -43,6 +43,10 @@ */ #ifndef KOKKOS_DETECTION_IDIOM_HPP #define KOKKOS_DETECTION_IDIOM_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DETECTIONIDIOM +#endif #include <impl/Kokkos_Utilities.hpp> // void_t #include <type_traits> @@ -113,4 +117,8 @@ inline constexpr bool is_detected_convertible_v = } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DETECTIONIDIOM +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_DETECTIONIDIOM +#endif #endif diff --git a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp index c88c1ada14e38bd4c3cf90c61fc7351cc27fc8ea..4cd57bae10a1f588bb1a84b1f7256d62704bcd67 100644 --- a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_EXECPOLICY_HPP #define KOKKOS_EXECPOLICY_HPP @@ -199,11 +208,10 @@ class RangePolicy : public Impl::PolicyTraits<Properties...> { inline member_type chunk_size() const { return m_granularity; } /** \brief set chunk_size to a discrete value*/ - inline RangePolicy set_chunk_size(int chunk_size_) const { - RangePolicy p = *this; - p.m_granularity = chunk_size_; - p.m_granularity_mask = p.m_granularity - 1; - return p; + inline RangePolicy& set_chunk_size(int chunk_size) { + m_granularity = chunk_size; + m_granularity_mask = m_granularity - 1; + return *this; } private: @@ -431,53 +439,49 @@ class TeamPolicyInternal : public Impl::PolicyTraits<Properties...> { }; struct PerTeamValue { - int value; - PerTeamValue(int arg); + size_t value; + PerTeamValue(size_t arg); }; struct PerThreadValue { - int value; - PerThreadValue(int arg); + size_t value; + PerThreadValue(size_t arg); }; template <class iType, class... Args> struct ExtractVectorLength { static inline iType value( - typename std::enable_if<std::is_integral<iType>::value, iType>::type val, - Args...) { + std::enable_if_t<std::is_integral<iType>::value, iType> val, Args...) { return val; } - static inline - typename std::enable_if<!std::is_integral<iType>::value, int>::type - value( - typename std::enable_if<!std::is_integral<iType>::value, iType>::type, - Args...) { + static inline std::enable_if_t<!std::is_integral<iType>::value, int> value( + std::enable_if_t<!std::is_integral<iType>::value, iType>, Args...) { return 1; } }; template <class iType, class... Args> -inline typename std::enable_if<std::is_integral<iType>::value, iType>::type +inline std::enable_if_t<std::is_integral<iType>::value, iType> extract_vector_length(iType val, Args...) { return val; } template <class iType, class... Args> -inline typename std::enable_if<!std::is_integral<iType>::value, int>::type +inline std::enable_if_t<!std::is_integral<iType>::value, int> extract_vector_length(iType, Args...) { return 1; } } // namespace Impl -Impl::PerTeamValue PerTeam(const int& arg); -Impl::PerThreadValue PerThread(const int& arg); +Impl::PerTeamValue PerTeam(const size_t& arg); +Impl::PerThreadValue PerThread(const size_t& arg); struct ScratchRequest { int level; - int per_team; - int per_thread; + size_t per_team; + size_t per_thread; inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value) { @@ -813,7 +817,7 @@ KOKKOS_INLINE_FUNCTION_DELETED template <typename iType1, typename iType2, class TeamMemberType, class _never_use_this_overload> KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, TeamMemberType> + std::common_type_t<iType1, iType2>, TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType1& begin, const iType2& end) = delete; @@ -839,7 +843,7 @@ KOKKOS_INLINE_FUNCTION_DELETED template <typename iType1, typename iType2, class TeamMemberType, class _never_use_this_overload> KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, TeamMemberType> + std::common_type_t<iType1, iType2>, TeamMemberType> TeamVectorRange(const TeamMemberType&, const iType1& begin, const iType2& end) = delete; @@ -858,14 +862,14 @@ KOKKOS_INLINE_FUNCTION_DELETED template <typename iType1, typename iType2, class TeamMemberType, class _never_use_this_overload> KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, TeamMemberType> + std::common_type_t<iType1, iType2>, TeamMemberType> ThreadVectorRange(const TeamMemberType&, const iType1& arg_begin, const iType2& arg_end) = delete; namespace Impl { template <typename FunctorType, typename TagType, - bool HasTag = !std::is_same<TagType, void>::value> + bool HasTag = !std::is_void<TagType>::value> struct ParallelConstructName; template <typename FunctorType, typename TagType> diff --git a/packages/kokkos/core/src/Kokkos_Extents.hpp b/packages/kokkos/core/src/Kokkos_Extents.hpp index 683b76e1f960836134862d10fb62ab53f55a8463..c51d663ce9bb10b074cb3378d4d8ef30f95e95da 100644 --- a/packages/kokkos/core/src/Kokkos_Extents.hpp +++ b/packages/kokkos/core/src/Kokkos_Extents.hpp @@ -41,6 +41,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_KOKKOS_EXTENTS_HPP #define KOKKOS_KOKKOS_EXTENTS_HPP @@ -98,9 +107,8 @@ struct _parse_impl { // We have to treat the case of int**[x] specially, since it *doesn't* go // backwards template <class T, ptrdiff_t... ExtentSpec> -struct _parse_impl< - T*, Kokkos::Experimental::Extents<ExtentSpec...>, - typename std::enable_if<_all_remaining_extents_dynamic<T>::value>::type> +struct _parse_impl<T*, Kokkos::Experimental::Extents<ExtentSpec...>, + std::enable_if_t<_all_remaining_extents_dynamic<T>::value>> : _parse_impl<T, Kokkos::Experimental::Extents< Kokkos::Experimental::dynamic_extent, ExtentSpec...>> { }; @@ -109,7 +117,7 @@ struct _parse_impl< template <class T, ptrdiff_t... ExtentSpec> struct _parse_impl< T*, Kokkos::Experimental::Extents<ExtentSpec...>, - typename std::enable_if<!_all_remaining_extents_dynamic<T>::value>::type> { + std::enable_if_t<!_all_remaining_extents_dynamic<T>::value>> { using _next = Kokkos::Experimental::AppendExtent< typename _parse_impl<T, Kokkos::Experimental::Extents<ExtentSpec...>, void>::type, diff --git a/packages/kokkos/core/src/Kokkos_Future.hpp b/packages/kokkos/core/src/Kokkos_Future.hpp index b163bd1fc9018d6275d0a3bc7bb2bcac90d3955e..4da6c2b5d94a1b26497316c2c14ce1355d088578 100644 --- a/packages/kokkos/core/src/Kokkos_Future.hpp +++ b/packages/kokkos/core/src/Kokkos_Future.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_FUTURE_HPP #define KOKKOS_FUTURE_HPP @@ -155,13 +164,13 @@ class BasicFuture<ValueType, SimpleTaskScheduler<ExecutionSpace, QueueType>> { KOKKOS_INLINE_FUNCTION BasicFuture( BasicFuture<T, S>&& rhs) noexcept // NOLINT(google-explicit-constructor) : m_task(std::move(rhs.m_task)) { - static_assert(std::is_same<scheduler_type, void>::value || + static_assert(std::is_void<scheduler_type>::value || std::is_same<scheduler_type, S>::value, "Moved Futures must have the same scheduler"); - static_assert(std::is_same<value_type, void>::value || - std::is_same<value_type, T>::value, - "Moved Futures must have the same value_type"); + static_assert( + std::is_void<value_type>::value || std::is_same<value_type, T>::value, + "Moved Futures must have the same value_type"); // reference counts are unchanged, since this is a move rhs.m_task = nullptr; @@ -172,13 +181,13 @@ class BasicFuture<ValueType, SimpleTaskScheduler<ExecutionSpace, QueueType>> { BasicFuture<T, S> const& rhs) // NOLINT(google-explicit-constructor) //: m_task(rhs.m_task) : m_task(nullptr) { - static_assert(std::is_same<scheduler_type, void>::value || + static_assert(std::is_void<scheduler_type>::value || std::is_same<scheduler_type, S>::value, "Copied Futures must have the same scheduler"); - static_assert(std::is_same<value_type, void>::value || - std::is_same<value_type, T>::value, - "Copied Futures must have the same value_type"); + static_assert( + std::is_void<value_type>::value || std::is_same<value_type, T>::value, + "Copied Futures must have the same value_type"); *static_cast<task_base_type* volatile*>(&m_task) = rhs.m_task; if (m_task) m_task->increment_reference_count(); @@ -186,13 +195,13 @@ class BasicFuture<ValueType, SimpleTaskScheduler<ExecutionSpace, QueueType>> { template <class T, class S> KOKKOS_INLINE_FUNCTION BasicFuture& operator=(BasicFuture<T, S> const& rhs) { - static_assert(std::is_same<scheduler_type, void>::value || + static_assert(std::is_void<scheduler_type>::value || std::is_same<scheduler_type, S>::value, "Assigned Futures must have the same scheduler"); - static_assert(std::is_same<value_type, void>::value || - std::is_same<value_type, T>::value, - "Assigned Futures must have the same value_type"); + static_assert( + std::is_void<value_type>::value || std::is_same<value_type, T>::value, + "Assigned Futures must have the same value_type"); if (m_task != rhs.m_task) { clear(); @@ -207,13 +216,13 @@ class BasicFuture<ValueType, SimpleTaskScheduler<ExecutionSpace, QueueType>> { template <class T, class S> KOKKOS_INLINE_FUNCTION BasicFuture& operator=(BasicFuture<T, S>&& rhs) { - static_assert(std::is_same<scheduler_type, void>::value || + static_assert(std::is_void<scheduler_type>::value || std::is_same<scheduler_type, S>::value, "Assigned Futures must have the same scheduler"); - static_assert(std::is_same<value_type, void>::value || - std::is_same<value_type, T>::value, - "Assigned Futures must have the same value_type"); + static_assert( + std::is_void<value_type>::value || std::is_same<value_type, T>::value, + "Assigned Futures must have the same value_type"); if (m_task != rhs.m_task) { clear(); @@ -361,13 +370,13 @@ class BasicFuture { KOKKOS_INLINE_FUNCTION BasicFuture( BasicFuture<T, S>&& rhs) noexcept // NOLINT(google-explicit-constructor) : m_task(rhs.m_task) { - static_assert(std::is_same<scheduler_type, void>::value || + static_assert(std::is_void<scheduler_type>::value || std::is_same<scheduler_type, S>::value, "Assigned Futures must have the same scheduler"); - static_assert(std::is_same<value_type, void>::value || - std::is_same<value_type, T>::value, - "Assigned Futures must have the same value_type"); + static_assert( + std::is_void<value_type>::value || std::is_same<value_type, T>::value, + "Assigned Futures must have the same value_type"); rhs.m_task = 0; } @@ -376,26 +385,26 @@ class BasicFuture { KOKKOS_INLINE_FUNCTION BasicFuture( BasicFuture<T, S> const& rhs) // NOLINT(google-explicit-constructor) : m_task(nullptr) { - static_assert(std::is_same<scheduler_type, void>::value || + static_assert(std::is_void<scheduler_type>::value || std::is_same<scheduler_type, S>::value, "Assigned Futures must have the same scheduler"); - static_assert(std::is_same<value_type, void>::value || - std::is_same<value_type, T>::value, - "Assigned Futures must have the same value_type"); + static_assert( + std::is_void<value_type>::value || std::is_same<value_type, T>::value, + "Assigned Futures must have the same value_type"); if (rhs.m_task) queue_type::assign(&m_task, rhs.m_task); } template <class T, class S> KOKKOS_INLINE_FUNCTION BasicFuture& operator=(BasicFuture<T, S> const& rhs) { - static_assert(std::is_same<scheduler_type, void>::value || + static_assert(std::is_void<scheduler_type>::value || std::is_same<scheduler_type, S>::value, "Assigned Futures must have the same scheduler"); - static_assert(std::is_same<value_type, void>::value || - std::is_same<value_type, T>::value, - "Assigned Futures must have the same value_type"); + static_assert( + std::is_void<value_type>::value || std::is_same<value_type, T>::value, + "Assigned Futures must have the same value_type"); if (m_task || rhs.m_task) queue_type::assign(&m_task, rhs.m_task); return *this; @@ -403,13 +412,13 @@ class BasicFuture { template <class T, class S> KOKKOS_INLINE_FUNCTION BasicFuture& operator=(BasicFuture<T, S>&& rhs) { - static_assert(std::is_same<scheduler_type, void>::value || + static_assert(std::is_void<scheduler_type>::value || std::is_same<scheduler_type, S>::value, "Assigned Futures must have the same scheduler"); - static_assert(std::is_same<value_type, void>::value || - std::is_same<value_type, T>::value, - "Assigned Futures must have the same value_type"); + static_assert( + std::is_void<value_type>::value || std::is_same<value_type, T>::value, + "Assigned Futures must have the same value_type"); clear(); m_task = rhs.m_task; @@ -422,7 +431,7 @@ class BasicFuture { KOKKOS_INLINE_FUNCTION int is_ready() const noexcept { return (nullptr == m_task) || - (((task_base*)task_base::LockTag) == m_task->m_wait); + (reinterpret_cast<task_base*>(task_base::LockTag) == m_task->m_wait); } KOKKOS_INLINE_FUNCTION @@ -456,8 +465,8 @@ class ResolveFutureArgOrder { private: enum { Arg1_is_space = Kokkos::is_space<Arg1>::value }; enum { Arg2_is_space = Kokkos::is_space<Arg2>::value }; - enum { Arg1_is_value = !Arg1_is_space && !std::is_same<Arg1, void>::value }; - enum { Arg2_is_value = !Arg2_is_space && !std::is_same<Arg2, void>::value }; + enum { Arg1_is_value = !Arg1_is_space && !std::is_void<Arg1>::value }; + enum { Arg2_is_value = !Arg2_is_space && !std::is_void<Arg2>::value }; static_assert(!(Arg1_is_space && Arg2_is_space), "Future cannot be given two spaces"); @@ -465,14 +474,13 @@ class ResolveFutureArgOrder { static_assert(!(Arg1_is_value && Arg2_is_value), "Future cannot be given two value types"); - using value_type = typename std::conditional< - Arg1_is_value, Arg1, - typename std::conditional<Arg2_is_value, Arg2, void>::type>::type; + using value_type = + std::conditional_t<Arg1_is_value, Arg1, + std::conditional_t<Arg2_is_value, Arg2, void>>; - using execution_space = typename std::conditional< + using execution_space = typename std::conditional_t< Arg1_is_space, Arg1, - typename std::conditional<Arg2_is_space, Arg2, - void>::type>::type::execution_space; + std::conditional_t<Arg2_is_space, Arg2, void>>::execution_space; public: using type = BasicFuture<value_type, TaskScheduler<execution_space>>; diff --git a/packages/kokkos/core/src/Kokkos_Graph.hpp b/packages/kokkos/core/src/Kokkos_Graph.hpp index ef6057ae8f00959e11783d6e382b64d76d487fd1..1f71665fbbe5668bfbf1bdae1f757a06886e2ac0 100644 --- a/packages/kokkos/core/src/Kokkos_Graph.hpp +++ b/packages/kokkos/core/src/Kokkos_Graph.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_GRAPH_HPP #define KOKKOS_GRAPH_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH +#endif #include <Kokkos_Macros.hpp> #include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS @@ -188,4 +192,8 @@ Graph<ExecutionSpace> create_graph(Closure&& arg_closure) { #include <impl/Kokkos_GraphNodeImpl.hpp> #include <impl/Kokkos_Default_Graph_Impl.hpp> #include <Cuda/Kokkos_Cuda_Graph_Impl.hpp> +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH +#endif #endif // KOKKOS_GRAPH_HPP diff --git a/packages/kokkos/core/src/Kokkos_GraphNode.hpp b/packages/kokkos/core/src/Kokkos_GraphNode.hpp index e34d1353e7cff7046301d238719800f63bc13230..6eab5ec8c7b285018619b0350bbb76d7623ea257 100644 --- a/packages/kokkos/core/src/Kokkos_GraphNode.hpp +++ b/packages/kokkos/core/src/Kokkos_GraphNode.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_KOKKOS_GRAPHNODE_HPP #define KOKKOS_KOKKOS_GRAPHNODE_HPP @@ -225,7 +234,7 @@ class GraphNodeRef { template < class OtherKernel, class OtherPredecessor, - typename std::enable_if_t< + std::enable_if_t< // Not a copy/move constructor !std::is_same<GraphNodeRef, GraphNodeRef<execution_space, OtherKernel, OtherPredecessor>>::value && @@ -256,12 +265,12 @@ class GraphNodeRef { template < class Policy, class Functor, - typename std::enable_if< + std::enable_if_t< // equivalent to: // requires Kokkos::ExecutionPolicy<remove_cvref_t<Policy>> is_execution_policy<Kokkos::Impl::remove_cvref_t<Policy>>::value, // -------------------- - int>::type = 0> + int> = 0> auto then_parallel_for(std::string arg_name, Policy&& arg_policy, Functor&& functor) const { //---------------------------------------- @@ -298,12 +307,12 @@ class GraphNodeRef { template < class Policy, class Functor, - typename std::enable_if< + std::enable_if_t< // equivalent to: // requires Kokkos::ExecutionPolicy<remove_cvref_t<Policy>> is_execution_policy<Kokkos::Impl::remove_cvref_t<Policy>>::value, // -------------------- - int>::type = 0> + int> = 0> auto then_parallel_for(Policy&& policy, Functor&& functor) const { // needs to static assert constraint: DataParallelFunctor<Functor> return this->then_parallel_for("", (Policy &&) policy, @@ -333,12 +342,12 @@ class GraphNodeRef { template < class Policy, class Functor, class ReturnType, - typename std::enable_if< + std::enable_if_t< // equivalent to: // requires Kokkos::ExecutionPolicy<remove_cvref_t<Policy>> is_execution_policy<Kokkos::Impl::remove_cvref_t<Policy>>::value, // -------------------- - int>::type = 0> + int> = 0> auto then_parallel_reduce(std::string arg_name, Policy&& arg_policy, Functor&& functor, ReturnType&& return_value) const { @@ -353,8 +362,7 @@ class GraphNodeRef { // needs static assertion of constraint: // DataParallelReductionFunctor<Functor, ReturnType> - using policy_t = typename std::remove_cv< - typename std::remove_reference<Policy>::type>::type; + using policy_t = std::remove_cv_t<std::remove_reference_t<Policy>>; static_assert( std::is_same<typename policy_t::execution_space, execution_space>::value, @@ -380,8 +388,8 @@ class GraphNodeRef { //---------------------------------------- // This is a disaster, but I guess it's not a my disaster to fix right now - using return_type_remove_cvref = typename std::remove_cv< - typename std::remove_reference<ReturnType>::type>::type; + using return_type_remove_cvref = + std::remove_cv_t<std::remove_reference_t<ReturnType>>; static_assert(Kokkos::is_view<return_type_remove_cvref>::value || Kokkos::is_reducer<return_type_remove_cvref>::value, "Output argument to parallel reduce in a graph must be a " @@ -416,12 +424,12 @@ class GraphNodeRef { template < class Policy, class Functor, class ReturnType, - typename std::enable_if< + std::enable_if_t< // equivalent to: // requires Kokkos::ExecutionPolicy<remove_cvref_t<Policy>> is_execution_policy<Kokkos::Impl::remove_cvref_t<Policy>>::value, // -------------------- - int>::type = 0> + int> = 0> auto then_parallel_reduce(Policy&& arg_policy, Functor&& functor, ReturnType&& return_value) const { return this->then_parallel_reduce("", (Policy &&) arg_policy, diff --git a/packages/kokkos/core/src/Kokkos_Graph_fwd.hpp b/packages/kokkos/core/src/Kokkos_Graph_fwd.hpp index 1ba58e4c8c74ac38d49d883f5795591846ed8488..6f639658e1d72cd4d1166a3189428b05f6b082d7 100644 --- a/packages/kokkos/core/src/Kokkos_Graph_fwd.hpp +++ b/packages/kokkos/core/src/Kokkos_Graph_fwd.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_KOKKOS_GRAPH_FWD_HPP #define KOKKOS_KOKKOS_GRAPH_FWD_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH_FWD +#endif #include <Kokkos_Macros.hpp> @@ -62,4 +66,8 @@ class GraphNodeRef; } // end namespace Experimental } // end namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH_FWD +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH_FWD +#endif #endif // KOKKOS_KOKKOS_GRAPH_FWD_HPP diff --git a/packages/kokkos/core/src/Kokkos_HBWSpace.hpp b/packages/kokkos/core/src/Kokkos_HBWSpace.hpp index 47810f17aa77d87b34c5c30481334c9993bb3403..0c5dbbdc22b60fccbb159837a37b4e5a7229a290 100644 --- a/packages/kokkos/core/src/Kokkos_HBWSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_HBWSPACE_HPP #define KOKKOS_HBWSPACE_HPP diff --git a/packages/kokkos/core/src/Kokkos_HIP.hpp b/packages/kokkos/core/src/Kokkos_HIP.hpp index 7241bb6c359045468a4db1aef582dbb5aeb33d1c..c387b5945d77aff8922e49d1d8c9f369deac8667 100644 --- a/packages/kokkos/core/src/Kokkos_HIP.hpp +++ b/packages/kokkos/core/src/Kokkos_HIP.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_HIP_HPP #define KOKKOS_HIP_HPP diff --git a/packages/kokkos/core/src/Kokkos_HIP_Space.hpp b/packages/kokkos/core/src/Kokkos_HIP_Space.hpp index 68869a6074369e9e15b6c7743a5bbc572ad38a96..8c195a0f398f13107ffded35b14677e25db6e23d 100644 --- a/packages/kokkos/core/src/Kokkos_HIP_Space.hpp +++ b/packages/kokkos/core/src/Kokkos_HIP_Space.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_HIPSPACE_HPP #define KOKKOS_HIPSPACE_HPP @@ -61,8 +70,8 @@ #include <HIP/Kokkos_HIP_Error.hpp> // HIP_SAFE_CALL #include <impl/Kokkos_Profiling_Interface.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> #include <impl/Kokkos_HostSharedPtr.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> #include <hip/hip_runtime_api.h> /*--------------------------------------------------------------------------*/ @@ -213,6 +222,75 @@ struct Impl::is_hip_type_space<Experimental::HIPHostPinnedSpace> /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ +namespace Kokkos { +namespace Experimental { +/** \brief Memory that is accessible to HIP execution space + * and host through HIP's memory page migration. + */ +class HIPManagedSpace { + public: + //! Tag this class as a kokkos memory space + /** \brief Memory is unified to both device and host via page migration + * and therefore able to be used by HostSpace::execution_space and + * DeviceSpace::execution_space. + */ + //! tag this class as a kokkos memory space + using memory_space = HIPManagedSpace; + using execution_space = Kokkos::Experimental::HIP; + using device_type = Kokkos::Device<execution_space, memory_space>; + using size_type = unsigned int; + + /*--------------------------------*/ + + HIPManagedSpace(); + HIPManagedSpace(HIPManagedSpace&& rhs) = default; + HIPManagedSpace(const HIPManagedSpace& rhs) = default; + HIPManagedSpace& operator=(HIPManagedSpace&& rhs) = default; + HIPManagedSpace& operator=(const HIPManagedSpace& rhs) = default; + ~HIPManagedSpace() = default; + + /**\brief Allocate untracked memory in the space */ + void* allocate(const size_t arg_alloc_size) const; + void* allocate(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; + + /**\brief Deallocate untracked memory in the space */ + void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const; + void deallocate(const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; + + private: + int m_device; ///< Which HIP device + template <class, class, class, class> + friend class LogicalMemorySpace; + void* impl_allocate(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; + void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; + + public: + /**\brief Return Name of the MemorySpace */ + static constexpr const char* name() { return "HIPManaged"; } + + /*--------------------------------*/ +}; +} // namespace Experimental + +template <> +struct Impl::is_hip_type_space<Experimental::HIPManagedSpace> + : public std::true_type {}; + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + namespace Kokkos { namespace Impl { @@ -239,6 +317,15 @@ struct MemorySpaceAccess<Kokkos::HostSpace, enum : bool { deepcopy = true }; }; +template <> +struct MemorySpaceAccess<Kokkos::HostSpace, + Kokkos::Experimental::HIPManagedSpace> { + // HostSpace::execution_space != HIPManagedSpace::execution_space + enum : bool { assignable = false }; + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; + //---------------------------------------- template <> @@ -257,6 +344,15 @@ struct MemorySpaceAccess<Kokkos::Experimental::HIPSpace, enum : bool { deepcopy = true }; }; +template <> +struct MemorySpaceAccess<Kokkos::Experimental::HIPSpace, + Kokkos::Experimental::HIPManagedSpace> { + // HIPSpace::execution_space == HIPManagedSpace::execution_space + enum : bool { assignable = true }; + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; + //---------------------------------------- // HIPHostPinnedSpace::execution_space == HostSpace::execution_space // HIPHostPinnedSpace accessible to both HIP and Host @@ -277,6 +373,42 @@ struct MemorySpaceAccess<Kokkos::Experimental::HIPHostPinnedSpace, enum : bool { deepcopy = true }; }; +template <> +struct MemorySpaceAccess<Kokkos::Experimental::HIPHostPinnedSpace, + Kokkos::Experimental::HIPManagedSpace> { + enum : bool { assignable = false }; // different exec_space + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; + +//---------------------------------------- +// HIPManagedSpace::execution_space != HostSpace::execution_space +// HIPManagedSpace accessible to both HIP and Host + +template <> +struct MemorySpaceAccess<Kokkos::Experimental::HIPManagedSpace, + Kokkos::HostSpace> { + enum : bool { assignable = false }; + enum : bool { accessible = false }; // HIPHostPinnedSpace::execution_space + enum : bool { deepcopy = true }; +}; + +template <> +struct MemorySpaceAccess<Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIPSpace> { + enum : bool { assignable = false }; + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; + +template <> +struct MemorySpaceAccess<Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIPHostPinnedSpace> { + enum : bool { assignable = false }; // different exec_space + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; + }; // namespace Impl //---------------------------------------- @@ -433,6 +565,21 @@ class SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void> protected: ~SharedAllocationRecord(); + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec*/, + const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + + SharedAllocationRecord( + const Kokkos::Experimental::HIP& exec_space, + const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate); + SharedAllocationRecord( const Kokkos::Experimental::HIPSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, @@ -463,11 +610,59 @@ class SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void> ~SharedAllocationRecord(); SharedAllocationRecord() = default; + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, + const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + SharedAllocationRecord( const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, const RecordBase::function_type arg_dealloc = &base_t::deallocate); }; + +template <> +class SharedAllocationRecord<Kokkos::Experimental::HIPManagedSpace, void> + : public SharedAllocationRecordCommon< + Kokkos::Experimental::HIPManagedSpace> { + private: + friend class SharedAllocationRecordCommon< + Kokkos::Experimental::HIPManagedSpace>; + using base_t = + SharedAllocationRecordCommon<Kokkos::Experimental::HIPManagedSpace>; + using RecordBase = SharedAllocationRecord<void, void>; + + SharedAllocationRecord(const SharedAllocationRecord&) = delete; + SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; + +#ifdef KOKKOS_ENABLE_DEBUG + static RecordBase s_root_record; +#endif + + const Kokkos::Experimental::HIPManagedSpace m_space; + + protected: + ~SharedAllocationRecord(); + SharedAllocationRecord() = default; + + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, + const Kokkos::Experimental::HIPManagedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + + SharedAllocationRecord( + const Kokkos::Experimental::HIPManagedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate); +}; } // namespace Impl } // namespace Kokkos @@ -519,16 +714,15 @@ class HIP { * asynchronously, before the functor completes. This method does not return * until all dispatched functors on this device have completed. */ - static void impl_static_fence(); - static void impl_static_fence(const std::string&); + static void impl_static_fence(const std::string& name); - void fence() const; - void fence(const std::string&) const; + void fence(const std::string& name = + "Kokkos::HIP::fence(): Unnamed Instance Fence") const; hipStream_t hip_stream() const; /// \brief Print configuration information to the given output stream. - static void print_configuration(std::ostream&, const bool detail = false); + void print_configuration(std::ostream& os, bool verbose = false) const; /// \brief Free any resources being consumed by the device. static void impl_finalize(); @@ -536,16 +730,10 @@ class HIP { /** \brief Initialize the device. * */ - struct SelectDevice { - int hip_device_id; - SelectDevice() : hip_device_id(0) {} - explicit SelectDevice(int id) : hip_device_id(id) {} - }; - int hip_device() const; static hipDeviceProp_t const& hip_device_prop(); - static void impl_initialize(const SelectDevice = SelectDevice()); + static void impl_initialize(InitializationSettings const&); static int impl_is_initialized(); @@ -579,18 +767,6 @@ struct DeviceTypeTraits<Kokkos::Experimental::HIP> { } // namespace Tools namespace Impl { - -class HIPSpaceInitializer : public Kokkos::Impl::ExecSpaceInitializerBase { - public: - HIPSpaceInitializer() = default; - ~HIPSpaceInitializer() = default; - void initialize(const InitArguments& args) final; - void finalize(const bool) final; - void fence() final; - void fence(const std::string&) final; - void print_configuration(std::ostream& msg, const bool detail) final; -}; - template <class DT, class... DP> struct ZeroMemset<Kokkos::Experimental::HIP, DT, DP...> { ZeroMemset(const Kokkos::Experimental::HIP& exec_space, diff --git a/packages/kokkos/core/src/Kokkos_HPX.hpp b/packages/kokkos/core/src/Kokkos_HPX.hpp index 9238ca30a7e4260a740c772b9736f0ba9621c19a..044e54fb29f6b1c9d96c84b55d1bfaaf79ea6f56 100644 --- a/packages/kokkos/core/src/Kokkos_HPX.hpp +++ b/packages/kokkos/core/src/Kokkos_HPX.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_HPX_HPP #define KOKKOS_HPX_HPP @@ -65,11 +74,10 @@ #include <Kokkos_ScratchSpace.hpp> #include <Kokkos_TaskScheduler.hpp> #include <impl/Kokkos_ConcurrentBitset.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_FunctorAnalysis.hpp> #include <impl/Kokkos_Tools.hpp> #include <impl/Kokkos_TaskQueue.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> #include <KokkosExp_MDRangePolicy.hpp> @@ -89,7 +97,6 @@ #include <iostream> #include <memory> #include <sstream> -#include <stdexcept> #include <type_traits> #include <vector> @@ -210,8 +217,6 @@ class HPX { enum class instance_mode { default_, independent }; private: - instance_mode m_mode; - static uint32_t m_active_parallel_region_count; static hpx::spinlock m_active_parallel_region_count_mutex; static hpx::condition_variable_any m_active_parallel_region_count_cond; @@ -246,7 +251,6 @@ class HPX { HPX() noexcept : m_instance_id(impl_default_instance_id()), - m_mode(instance_mode::default_), m_buffer(m_default_instance_data.m_buffer), m_future(m_default_instance_data.m_future), m_future_mutex(m_default_instance_data.m_future_mutex) {} @@ -255,7 +259,6 @@ class HPX { : m_instance_id(mode == instance_mode::independent ? m_next_instance_id++ : impl_default_instance_id()), - m_mode(mode), m_independent_instance_data(mode == instance_mode::independent ? (new instance_data()) : nullptr), @@ -271,7 +274,6 @@ class HPX { HPX(hpx::shared_future<void> future) : m_instance_id(m_next_instance_id++), - m_mode(instance_mode::independent), m_independent_instance_data(new instance_data(future)), m_buffer(m_independent_instance_data->m_buffer), @@ -286,9 +288,11 @@ class HPX { HPX() noexcept {} #endif - static void print_configuration(std::ostream &, - const bool /* verbose */ = false) { - std::cout << "HPX backend" << std::endl; + void print_configuration(std::ostream &os, bool /*verbose*/ = false) const { + os << "HPX backend\n"; + os << "HPX Execution Space:\n"; + os << " KOKKOS_ENABLE_HPX: yes\n"; + os << "\nHPX Runtime Configuration:\n"; } uint32_t impl_instance_id() const noexcept { return m_instance_id; } @@ -315,9 +319,9 @@ class HPX { } #endif - void impl_fence_instance(const std::string &name = - "Kokkos::Experimental::HPX::impl_fence_instance:" - " Unnamed Instance Fence") const { + void fence( + const std::string &name = + "Kokkos::Experimental::HPX::fence: Unnamed Instance Fence") const { Kokkos::Tools::Experimental::Impl::profile_fence_event< Kokkos::Experimental::HPX>( name, @@ -333,9 +337,7 @@ class HPX { }); } - static void impl_fence_global(const std::string &name = - "Kokkos::Experimental::HPX::impl_fence_" - "global: Unnamed Global Fence") { + static void impl_static_fence(const std::string &name) { Kokkos::Tools::Experimental::Impl::profile_fence_event< Kokkos::Experimental::HPX>( name, @@ -350,7 +352,7 @@ class HPX { // Reset the future to free variables that may have been captured in // parallel regions (however, we don't have access to futures from // instances other than the default instances, they will only be - // released by impl_fence_instance). + // released by fence). HPX().impl_get_future() = hpx::make_ready_future<void>(); #endif }); @@ -360,9 +362,6 @@ class HPX { return hpx::execution::parallel_executor(); } - void fence() const { impl_fence_instance(); } - void fence(const std::string &name) const { impl_fence_instance(name); } - static bool is_asynchronous(HPX const & = HPX()) noexcept { #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) return true; @@ -371,6 +370,7 @@ class HPX { #endif } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 static std::vector<HPX> partition(...) { Kokkos::abort( "Kokkos::Experimental::HPX::partition_master: can't partition an HPX " @@ -378,7 +378,6 @@ class HPX { return std::vector<HPX>(); } -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 template <typename F> KOKKOS_DEPRECATED static void partition_master( F const &, int requested_num_partitions = 0, int = 0) { @@ -391,8 +390,7 @@ class HPX { #endif static int concurrency(); - static void impl_initialize(int thread_count); - static void impl_initialize(); + static void impl_initialize(InitializationSettings const &); static bool impl_is_initialized() noexcept; static void impl_finalize(); @@ -507,17 +505,6 @@ struct DeviceTypeTraits<Kokkos::Experimental::HPX> { namespace Impl { -class HPXSpaceInitializer : public ExecSpaceInitializerBase { - public: - HPXSpaceInitializer() = default; - ~HPXSpaceInitializer() = default; - void initialize(const InitArguments &args) final; - void finalize(const bool) final; - void fence() final; - void fence(const std::string &) final; - void print_configuration(std::ostream &msg, const bool detail) final; -}; - #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) template <typename Closure> inline void dispatch_execute_task(Closure *closure, @@ -706,7 +693,7 @@ struct HPXTeamMember { const TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...> &policy, const int team_rank, const int league_rank, void *scratch, - int scratch_size) noexcept + size_t scratch_size) noexcept : m_team_shared(scratch, scratch_size, scratch, scratch_size), m_league_size(policy.league_size()), m_league_rank(league_rank), @@ -733,9 +720,8 @@ struct HPXTeamMember { } template <class ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(const ReducerType &) const {} + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(const ReducerType &) const {} template <typename Type> KOKKOS_INLINE_FUNCTION Type @@ -860,7 +846,7 @@ class TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...> inline int team_size() const { return m_team_size; } inline int league_size() const { return m_league_size; } - inline size_t scratch_size(const int &level, int team_size_ = -1) const { + size_t scratch_size(const int &level, int team_size_ = -1) const { if (team_size_ < 0) { team_size_ = m_team_size; } @@ -1026,31 +1012,29 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, const Policy m_policy; template <class TagType> - static typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor(const FunctorType &functor, const Member i) { + static std::enable_if_t<std::is_void<TagType>::value> execute_functor( + const FunctorType &functor, const Member i) { functor(i); } template <class TagType> - static typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor(const FunctorType &functor, const Member i) { + static std::enable_if_t<!std::is_void<TagType>::value> execute_functor( + const FunctorType &functor, const Member i) { const TagType t{}; functor(t, i); } template <class TagType> - static typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Member i_begin, - const Member i_end) { + static std::enable_if_t<std::is_void<TagType>::value> execute_functor_range( + const FunctorType &functor, const Member i_begin, const Member i_end) { for (Member i = i_begin; i < i_end; ++i) { functor(i); } } template <class TagType> - static typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Member i_begin, - const Member i_end) { + static std::enable_if_t<!std::is_void<TagType>::value> execute_functor_range( + const FunctorType &functor, const Member i_begin, const Member i_end) { const TagType t{}; for (Member i = i_begin; i < i_end; ++i) { functor(t, i); @@ -1156,7 +1140,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, inline ParallelFor(const FunctorType &arg_functor, MDRangePolicy arg_policy) : m_functor(arg_functor), m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} + m_policy(Policy(0, arg_policy.m_num_tiles).set_chunk_size(1)) {} template <typename Policy, typename Functor> static int max_tile_size_product(const Policy &, const Functor &) { /** @@ -1180,20 +1164,13 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, using WorkTag = typename Policy::work_tag; using WorkRange = typename Policy::WorkRange; using Member = typename Policy::member_type; - using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; using ReducerConditional = Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueFinal = Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - using ValueOps = Kokkos::Impl::FunctorValueOps<ReducerTypeFwd, WorkTagFwd>; - using value_type = typename Analysis::value_type; + using Analysis = + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; + using value_type = typename Analysis::value_type; using pointer_type = typename Analysis::pointer_type; using reference_type = typename Analysis::reference_type; @@ -1205,35 +1182,29 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, bool m_force_synchronous; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor(const FunctorType &functor, const Member i, - reference_type update) { + inline static std::enable_if_t<std::is_void<TagType>::value> execute_functor( + const FunctorType &functor, const Member i, reference_type update) { functor(i, update); } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor(const FunctorType &functor, const Member i, - reference_type update) { + inline static std::enable_if_t<!std::is_void<TagType>::value> execute_functor( + const FunctorType &functor, const Member i, reference_type update) { const TagType t{}; functor(t, i, update); } template <class TagType> - inline typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor_range(reference_type update, const Member i_begin, - const Member i_end) const { + inline std::enable_if_t<std::is_void<TagType>::value> execute_functor_range( + reference_type update, const Member i_begin, const Member i_end) const { for (Member i = i_begin; i < i_end; ++i) { m_functor(i, update); } } template <class TagType> - inline typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor_range(reference_type update, const Member i_begin, - const Member i_end) const { + inline std::enable_if_t<!std::is_void<TagType>::value> execute_functor_range( + reference_type update, const Member i_begin, const Member i_end) const { const TagType t{}; for (Member i = i_begin; i < i_end; ++i) { @@ -1307,7 +1278,7 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, } reference_type reference() const { - return ValueOps::reference( + return Analysis::Reducer::reference( reinterpret_cast<pointer_type>(m_value_buffer)); } }; @@ -1316,10 +1287,11 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, void execute() const { if (m_policy.end() <= m_policy.begin()) { if (m_result_ptr) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - ValueFinal::final(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + final_reducer.init(m_result_ptr); + final_reducer.final(m_result_ptr); } return; } @@ -1331,6 +1303,9 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, Kokkos::Experimental::HPX::reset_on_exit_parallel reset_on_exit( m_policy.space()); + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + const std::size_t value_size = Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); @@ -1350,20 +1325,17 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, value_type_wrapper final_value(value_size); value_type_wrapper identity(value_size); - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - final_value.pointer()); - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - identity.pointer()); + final_reducer.init(final_value.pointer()); + final_reducer.init(identity.pointer()); for_loop(par.on(exec).with( static_chunk_size(get_hpx_adjusted_chunk_size(m_policy))), m_policy.begin(), m_policy.end(), reduction(final_value, identity, - [this](value_type_wrapper &a, - value_type_wrapper &b) -> value_type_wrapper & { - ValueJoin::join( - ReducerConditional::select(m_functor, m_reducer), - a.pointer(), b.pointer()); + [final_reducer]( + value_type_wrapper &a, + value_type_wrapper &b) -> value_type_wrapper & { + final_reducer.join(a.pointer(), b.pointer()); return a; }), [this](Member i, value_type_wrapper &update) { @@ -1382,9 +1354,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, for_loop( par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, - [ this, &buffer ](const int t) noexcept { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<pointer_type>(buffer.get(t))); + [&buffer, final_reducer ](const int t) noexcept { + final_reducer.init(reinterpret_cast<pointer_type>(buffer.get(t))); }); const Member chunk_size = get_hpx_adjusted_chunk_size(m_policy); @@ -1392,25 +1363,23 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, for_loop_strided( par.on(exec), m_policy.begin(), m_policy.end(), chunk_size, [this, &buffer, chunk_size](const Member i_begin) { - reference_type update = - ValueOps::reference(reinterpret_cast<pointer_type>(buffer.get( + reference_type update = Analysis::Reducer::reference( + reinterpret_cast<pointer_type>(buffer.get( Kokkos::Experimental::HPX::impl_hardware_thread_id()))); const Member i_end = (std::min)(i_begin + chunk_size, m_policy.end()); execute_functor_range<WorkTag>(update, i_begin, i_end); }); for (int i = 1; i < num_worker_threads; ++i) { - ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<pointer_type>(buffer.get(0)), - reinterpret_cast<pointer_type>(buffer.get(i))); + final_reducer.join(reinterpret_cast<pointer_type>(buffer.get(0)), + reinterpret_cast<pointer_type>(buffer.get(i))); } pointer_type final_value_ptr = reinterpret_cast<pointer_type>(buffer.get(0)); #endif - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), final_value_ptr); + final_reducer.final(final_value_ptr); if (m_result_ptr != nullptr) { const int n = Analysis::value_count( @@ -1426,9 +1395,9 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, inline ParallelReduce( const FunctorType &arg_functor, Policy arg_policy, const ViewType &arg_view, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void *>::type = nullptr) + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void *> = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -1453,19 +1422,13 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, using WorkTag = typename MDRangePolicy::work_tag; using WorkRange = typename Policy::WorkRange; using Member = typename Policy::member_type; - using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, - MDRangePolicy, FunctorType>; using ReducerConditional = Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueFinal = Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - using ValueOps = Kokkos::Impl::FunctorValueOps<ReducerTypeFwd, WorkTagFwd>; + using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, + MDRangePolicy, ReducerTypeFwd>; + using pointer_type = typename Analysis::pointer_type; using value_type = typename Analysis::value_type; using reference_type = typename Analysis::reference_type; @@ -1504,18 +1467,21 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, auto exec = Kokkos::Experimental::HPX::impl_get_executor(); + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + #if KOKKOS_HPX_IMPLEMENTATION == 0 - for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, - [this, &buffer](std::size_t t) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<pointer_type>(buffer.get(t))); - }); + for_loop( + par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, + [&buffer, final_reducer](std::size_t t) { + final_reducer.init(reinterpret_cast<pointer_type>(buffer.get(t))); + }); for_loop(par.on(exec).with( static_chunk_size(get_hpx_adjusted_chunk_size(m_policy))), m_policy.begin(), m_policy.end(), [this, &buffer](const Member i) { - reference_type update = ValueOps::reference( + reference_type update = Analysis::Reducer::reference( reinterpret_cast<pointer_type>(buffer.get( Kokkos::Experimental::HPX::impl_hardware_thread_id()))); iterate_type(m_mdr_policy, m_functor, update)(i); @@ -1524,19 +1490,19 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, #elif KOKKOS_HPX_IMPLEMENTATION == 1 using hpx::for_loop_strided; - for_loop(par.on(exec).with(static_chunk_size(1)), std::size_t(0), - num_worker_threads, [this, &buffer](const std::size_t t) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<pointer_type>(buffer.get(t))); - }); + for_loop( + par.on(exec).with(static_chunk_size(1)), std::size_t(0), + num_worker_threads, [&buffer, final_reducer](const std::size_t t) { + final_reducer.init(reinterpret_cast<pointer_type>(buffer.get(t))); + }); const Member chunk_size = get_hpx_adjusted_chunk_size(m_policy); for_loop_strided( par.on(exec), m_policy.begin(), m_policy.end(), chunk_size, [this, &buffer, chunk_size](const Member i_begin) { - reference_type update = - ValueOps::reference(reinterpret_cast<pointer_type>(buffer.get( + reference_type update = Analysis::Reducer::reference( + reinterpret_cast<pointer_type>(buffer.get( Kokkos::Experimental::HPX::impl_hardware_thread_id()))); const Member i_end = (std::min)(i_begin + chunk_size, m_policy.end()); @@ -1547,14 +1513,11 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, #endif for (int i = 1; i < num_worker_threads; ++i) { - ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<pointer_type>(buffer.get(0)), - reinterpret_cast<pointer_type>(buffer.get(i))); + final_reducer.join(reinterpret_cast<pointer_type>(buffer.get(0)), + reinterpret_cast<pointer_type>(buffer.get(i))); } - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<pointer_type>(buffer.get(0))); + final_reducer.final(reinterpret_cast<pointer_type>(buffer.get(0))); if (m_result_ptr != nullptr) { const int n = Analysis::value_count( @@ -1570,12 +1533,12 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, inline ParallelReduce( const FunctorType &arg_functor, MDRangePolicy arg_policy, const ViewType &arg_view, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void *>::type = nullptr) + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void *> = nullptr) : m_functor(arg_functor), m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_policy(Policy(0, arg_policy.m_num_tiles).set_chunk_size(1)), m_reducer(InvalidType()), m_result_ptr(arg_view.data()), m_force_synchronous(!arg_view.impl_track().has_record()) {} @@ -1614,9 +1577,6 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, using Member = typename Policy::member_type; using Analysis = FunctorAnalysis<FunctorPatternInterface::SCAN, Policy, FunctorType>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, WorkTag>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; using pointer_type = typename Analysis::pointer_type; using reference_type = typename Analysis::reference_type; using value_type = typename Analysis::value_type; @@ -1625,22 +1585,20 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, const Policy m_policy; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Member i_begin, - const Member i_end, reference_type update, - const bool final) { + inline static std::enable_if_t<std::is_void<TagType>::value> + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { for (Member i = i_begin; i < i_end; ++i) { functor(i, update, final); } } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Member i_begin, - const Member i_end, reference_type update, - const bool final) { + inline static std::enable_if_t<!std::is_void<TagType>::value> + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { const TagType t{}; for (Member i = i_begin; i < i_end; ++i) { functor(t, i, update, final); @@ -1670,46 +1628,49 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, barrier<> bar(num_worker_threads); auto exec = Kokkos::Experimental::HPX::impl_get_executor(); - for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, - [this, &bar, &buffer, num_worker_threads, value_count, - value_size](int t) { - reference_type update_sum = ValueInit::init( - m_functor, reinterpret_cast<pointer_type>(buffer.get(t))); - - const WorkRange range(m_policy, t, num_worker_threads); - execute_functor_range<WorkTag>(m_functor, range.begin(), - range.end(), update_sum, false); - - bar.arrive_and_wait(); - - if (t == 0) { - ValueInit::init(m_functor, reinterpret_cast<pointer_type>( - buffer.get(0) + value_size)); - - for (int i = 1; i < num_worker_threads; ++i) { - pointer_type ptr_1_prev = - reinterpret_cast<pointer_type>(buffer.get(i - 1)); - pointer_type ptr_2_prev = reinterpret_cast<pointer_type>( - buffer.get(i - 1) + value_size); - pointer_type ptr_2 = reinterpret_cast<pointer_type>( - buffer.get(i) + value_size); + typename Analysis::Reducer final_reducer(&m_functor); - for (int j = 0; j < value_count; ++j) { - ptr_2[j] = ptr_2_prev[j]; - } - - ValueJoin::join(m_functor, ptr_2, ptr_1_prev); - } - } + for_loop( + par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, + [this, &bar, &buffer, num_worker_threads, value_count, value_size, + final_reducer](int t) { + reference_type update_sum = + final_reducer.init(reinterpret_cast<pointer_type>(buffer.get(t))); + + const WorkRange range(m_policy, t, num_worker_threads); + execute_functor_range<WorkTag>(m_functor, range.begin(), range.end(), + update_sum, false); + + bar.arrive_and_wait(); + + if (t == 0) { + final_reducer.init( + reinterpret_cast<pointer_type>(buffer.get(0) + value_size)); + + for (int i = 1; i < num_worker_threads; ++i) { + pointer_type ptr_1_prev = + reinterpret_cast<pointer_type>(buffer.get(i - 1)); + pointer_type ptr_2_prev = reinterpret_cast<pointer_type>( + buffer.get(i - 1) + value_size); + pointer_type ptr_2 = + reinterpret_cast<pointer_type>(buffer.get(i) + value_size); + + for (int j = 0; j < value_count; ++j) { + ptr_2[j] = ptr_2_prev[j]; + } + + final_reducer.join(ptr_2, ptr_1_prev); + } + } - bar.arrive_and_wait(); + bar.arrive_and_wait(); - reference_type update_base = ValueOps::reference( - reinterpret_cast<pointer_type>(buffer.get(t) + value_size)); + reference_type update_base = Analysis::Reducer::reference( + reinterpret_cast<pointer_type>(buffer.get(t) + value_size)); - execute_functor_range<WorkTag>(m_functor, range.begin(), - range.end(), update_base, true); - }); + execute_functor_range<WorkTag>(m_functor, range.begin(), range.end(), + update_base, true); + }); } inline ParallelScan(const FunctorType &arg_functor, const Policy &arg_policy) @@ -1726,9 +1687,6 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, using Member = typename Policy::member_type; using Analysis = FunctorAnalysis<FunctorPatternInterface::SCAN, Policy, FunctorType>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, WorkTag>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; using pointer_type = typename Analysis::pointer_type; using reference_type = typename Analysis::reference_type; using value_type = typename Analysis::value_type; @@ -1738,22 +1696,20 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, ReturnType &m_returnvalue; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Member i_begin, - const Member i_end, reference_type update, - const bool final) { + inline static std::enable_if_t<std::is_void<TagType>::value> + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { for (Member i = i_begin; i < i_end; ++i) { functor(i, update, final); } } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Member i_begin, - const Member i_end, reference_type update, - const bool final) { + inline static std::enable_if_t<!std::is_void<TagType>::value> + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { const TagType t{}; for (Member i = i_begin; i < i_end; ++i) { functor(t, i, update, final); @@ -1783,50 +1739,53 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, barrier<> bar(num_worker_threads); auto exec = Kokkos::Experimental::HPX::impl_get_executor(); - for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, - [this, &bar, &buffer, num_worker_threads, value_count, - value_size](int t) { - reference_type update_sum = ValueInit::init( - m_functor, reinterpret_cast<pointer_type>(buffer.get(t))); - - const WorkRange range(m_policy, t, num_worker_threads); - execute_functor_range<WorkTag>(m_functor, range.begin(), - range.end(), update_sum, false); + typename Analysis::Reducer final_reducer(&m_functor); - bar.arrive_and_wait(); - - if (t == 0) { - ValueInit::init(m_functor, reinterpret_cast<pointer_type>( - buffer.get(0) + value_size)); - - for (int i = 1; i < num_worker_threads; ++i) { - pointer_type ptr_1_prev = - reinterpret_cast<pointer_type>(buffer.get(i - 1)); - pointer_type ptr_2_prev = reinterpret_cast<pointer_type>( - buffer.get(i - 1) + value_size); - pointer_type ptr_2 = reinterpret_cast<pointer_type>( - buffer.get(i) + value_size); - - for (int j = 0; j < value_count; ++j) { - ptr_2[j] = ptr_2_prev[j]; - } - - ValueJoin::join(m_functor, ptr_2, ptr_1_prev); - } - } + for_loop( + par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, + [this, &bar, &buffer, num_worker_threads, value_count, value_size, + final_reducer](int t) { + reference_type update_sum = + final_reducer.init(reinterpret_cast<pointer_type>(buffer.get(t))); + + const WorkRange range(m_policy, t, num_worker_threads); + execute_functor_range<WorkTag>(m_functor, range.begin(), range.end(), + update_sum, false); + + bar.arrive_and_wait(); + + if (t == 0) { + final_reducer.init( + reinterpret_cast<pointer_type>(buffer.get(0) + value_size)); + + for (int i = 1; i < num_worker_threads; ++i) { + pointer_type ptr_1_prev = + reinterpret_cast<pointer_type>(buffer.get(i - 1)); + pointer_type ptr_2_prev = reinterpret_cast<pointer_type>( + buffer.get(i - 1) + value_size); + pointer_type ptr_2 = + reinterpret_cast<pointer_type>(buffer.get(i) + value_size); + + for (int j = 0; j < value_count; ++j) { + ptr_2[j] = ptr_2_prev[j]; + } + + final_reducer.join(ptr_2, ptr_1_prev); + } + } - bar.arrive_and_wait(); + bar.arrive_and_wait(); - reference_type update_base = ValueOps::reference( - reinterpret_cast<pointer_type>(buffer.get(t) + value_size)); + reference_type update_base = Analysis::Reducer::reference( + reinterpret_cast<pointer_type>(buffer.get(t) + value_size)); - execute_functor_range<WorkTag>(m_functor, range.begin(), - range.end(), update_base, true); + execute_functor_range<WorkTag>(m_functor, range.begin(), range.end(), + update_base, true); - if (t == num_worker_threads - 1) { - m_returnvalue = update_base; - } - }); + if (t == num_worker_threads - 1) { + m_returnvalue = update_base; + } + }); } inline ParallelScanWithTotal(const FunctorType &arg_functor, @@ -1856,31 +1815,26 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, const std::size_t m_shared; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor(const FunctorType &functor, const Policy &policy, - const int league_rank, char *local_buffer, - const std::size_t local_buffer_size) { + inline static std::enable_if_t<std::is_void<TagType>::value> execute_functor( + const FunctorType &functor, const Policy &policy, const int league_rank, + char *local_buffer, const std::size_t local_buffer_size) { functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size)); } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor(const FunctorType &functor, const Policy &policy, - const int league_rank, char *local_buffer, - const std::size_t local_buffer_size) { + inline static std::enable_if_t<!std::is_void<TagType>::value> execute_functor( + const FunctorType &functor, const Policy &policy, const int league_rank, + char *local_buffer, const std::size_t local_buffer_size) { const TagType t{}; functor(t, Member(policy, 0, league_rank, local_buffer, local_buffer_size)); } template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Policy &policy, - const int league_rank_begin, - const int league_rank_end, char *local_buffer, - const std::size_t local_buffer_size) { + inline static std::enable_if_t<std::is_void<TagType>::value> + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, const int league_rank_end, + char *local_buffer, + const std::size_t local_buffer_size) { for (int league_rank = league_rank_begin; league_rank < league_rank_end; ++league_rank) { functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size)); @@ -1888,12 +1842,11 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Policy &policy, - const int league_rank_begin, - const int league_rank_end, char *local_buffer, - const std::size_t local_buffer_size) { + inline static std::enable_if_t<!std::is_void<TagType>::value> + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, const int league_rank_end, + char *local_buffer, + const std::size_t local_buffer_size) { const TagType t{}; for (int league_rank = league_rank_begin; league_rank < league_rank_end; ++league_rank) { @@ -1962,22 +1915,15 @@ template <class FunctorType, class ReducerType, class... Properties> class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, ReducerType, Kokkos::Experimental::HPX> { private: - using Policy = TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...>; - using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; + using Policy = TeamPolicyInternal<Kokkos::Experimental::HPX, Properties...>; using Member = typename Policy::member_type; using WorkTag = typename Policy::work_tag; using ReducerConditional = Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueFinal = Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - using ValueOps = Kokkos::Impl::FunctorValueOps<ReducerTypeFwd, WorkTagFwd>; + using Analysis = + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; using pointer_type = typename Analysis::pointer_type; using reference_type = typename Analysis::reference_type; using value_type = typename Analysis::value_type; @@ -1992,36 +1938,30 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, bool m_force_synchronous; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor(const FunctorType &functor, const Policy &policy, - const int league_rank, char *local_buffer, - const std::size_t local_buffer_size, - reference_type update) { + inline static std::enable_if_t<std::is_void<TagType>::value> execute_functor( + const FunctorType &functor, const Policy &policy, const int league_rank, + char *local_buffer, const std::size_t local_buffer_size, + reference_type update) { functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size), update); } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor(const FunctorType &functor, const Policy &policy, - const int league_rank, char *local_buffer, - const std::size_t local_buffer_size, - reference_type update) { + inline static std::enable_if_t<!std::is_void<TagType>::value> execute_functor( + const FunctorType &functor, const Policy &policy, const int league_rank, + char *local_buffer, const std::size_t local_buffer_size, + reference_type update) { const TagType t{}; functor(t, Member(policy, 0, league_rank, local_buffer, local_buffer_size), update); } template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Policy &policy, - const int league_rank_begin, - const int league_rank_end, char *local_buffer, - const std::size_t local_buffer_size, - reference_type update) { + inline static std::enable_if_t<std::is_void<TagType>::value> + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, const int league_rank_end, + char *local_buffer, const std::size_t local_buffer_size, + reference_type update) { for (int league_rank = league_rank_begin; league_rank < league_rank_end; ++league_rank) { functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size), @@ -2030,13 +1970,11 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_functor_range(const FunctorType &functor, const Policy &policy, - const int league_rank_begin, - const int league_rank_end, char *local_buffer, - const std::size_t local_buffer_size, - reference_type update) { + inline static std::enable_if_t<!std::is_void<TagType>::value> + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, const int league_rank_end, + char *local_buffer, const std::size_t local_buffer_size, + reference_type update) { const TagType t{}; for (int league_rank = league_rank_begin; league_rank < league_rank_end; ++league_rank) { @@ -2050,10 +1988,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, void execute() const { if (m_policy.league_size() * m_policy.team_size() == 0) { if (m_result_ptr) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - ValueFinal::final(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + final_reducer.init(m_result_ptr); + final_reducer.final(m_result_ptr); } return; } @@ -2078,20 +2016,23 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, using hpx::execution::par; using hpx::execution::static_chunk_size; + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + #if KOKKOS_HPX_IMPLEMENTATION == 0 - for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, - [this, &buffer](const std::size_t t) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<pointer_type>(buffer.get(t))); - }); + for_loop( + par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, + [&buffer, final_reducer](const std::size_t t) { + final_reducer.init(reinterpret_cast<pointer_type>(buffer.get(t))); + }); for_loop(par.on(exec).with(static_chunk_size(m_policy.chunk_size())), 0, m_policy.league_size(), [this, &buffer, value_size](const int league_rank) { std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); - reference_type update = ValueOps::reference( + reference_type update = Analysis::Reducer::reference( reinterpret_cast<pointer_type>(buffer.get(t))); execute_functor<WorkTag>(m_functor, m_policy, league_rank, @@ -2102,17 +2043,17 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, #elif KOKKOS_HPX_IMPLEMENTATION == 1 using hpx::for_loop_strided; - for_loop(par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, - [this, &buffer](std::size_t const t) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - reinterpret_cast<pointer_type>(buffer.get(t))); - }); + for_loop( + par.on(exec).with(static_chunk_size(1)), 0, num_worker_threads, + [&buffer, final_reducer](std::size_t const t) { + final_reducer.init(reinterpret_cast<pointer_type>(buffer.get(t))); + }); for_loop_strided( par.on(exec), 0, m_policy.league_size(), m_policy.chunk_size(), [this, &buffer, value_size](int const league_rank_begin) { std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); - reference_type update = ValueOps::reference( + reference_type update = Analysis::Reducer::reference( reinterpret_cast<pointer_type>(buffer.get(t))); const int league_rank_end = (std::min)(league_rank_begin + m_policy.chunk_size(), @@ -2125,12 +2066,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, const pointer_type ptr = reinterpret_cast<pointer_type>(buffer.get(0)); for (int t = 1; t < num_worker_threads; ++t) { - ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), ptr, - reinterpret_cast<pointer_type>(buffer.get(t))); + final_reducer.join(ptr, reinterpret_cast<pointer_type>(buffer.get(t))); } - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), ptr); + final_reducer.final(ptr); if (m_result_ptr) { const int n = Analysis::value_count( @@ -2143,12 +2082,11 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } template <class ViewType> - ParallelReduce( - const FunctorType &arg_functor, const Policy &arg_policy, - const ViewType &arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void *>::type = nullptr) + ParallelReduce(const FunctorType &arg_functor, const Policy &arg_policy, + const ViewType &arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void *> = nullptr) : m_functor(arg_functor), m_league(arg_policy.league_size()), m_policy(arg_policy), @@ -2186,10 +2124,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::HPXTeamMember> + std::common_type_t<iType1, iType2>, Impl::HPXTeamMember> TeamThreadRange(const Impl::HPXTeamMember &thread, const iType1 &i_begin, const iType2 &i_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::HPXTeamMember>( thread, iType(i_begin), iType(i_end)); } @@ -2204,10 +2142,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::HPXTeamMember> + std::common_type_t<iType1, iType2>, Impl::HPXTeamMember> TeamVectorRange(const Impl::HPXTeamMember &thread, const iType1 &i_begin, const iType2 &i_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::HPXTeamMember>( thread, iType(i_begin), iType(i_end)); } @@ -2222,10 +2160,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::HPXTeamMember> + std::common_type_t<iType1, iType2>, Impl::HPXTeamMember> ThreadVectorRange(const Impl::HPXTeamMember &thread, const iType1 &i_begin, const iType2 &i_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HPXTeamMember>( thread, iType(i_begin), iType(i_end)); } @@ -2384,8 +2322,10 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( const Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HPXTeamMember> &loop_boundaries, const FunctorType &lambda) { - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = typename ValueTraits::value_type; + using value_type = + typename Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + TeamPolicy<Experimental::HPX>, + FunctorType>::value_type; value_type scan_val = value_type(); @@ -2402,11 +2342,11 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( * */ template <typename iType, class FunctorType, typename ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::HPXTeamMember> &loop_boundaries, - const FunctorType &lambda, const ReducerType &reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_scan( + const Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HPXTeamMember> + &loop_boundaries, + const FunctorType &lambda, const ReducerType &reducer) { typename ReducerType::value_type scan_val; reducer.init(scan_val); diff --git a/packages/kokkos/core/src/Kokkos_Half.hpp b/packages/kokkos/core/src/Kokkos_Half.hpp index 7382ffbd47e1d38a17050bbb86c764489bb293bc..c1085876c51f2ed5ee54da1caa81a88bce27195a 100644 --- a/packages/kokkos/core/src/Kokkos_Half.hpp +++ b/packages/kokkos/core/src/Kokkos_Half.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_HALF_HPP_ #define KOKKOS_HALF_HPP_ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_HALF +#endif #include <type_traits> #include <Kokkos_Macros.hpp> @@ -1029,4 +1033,8 @@ cast_from_bhalf(bhalf_t val) { #else #define KOKKOS_BHALF_T_IS_FLOAT false #endif // KOKKOS_IMPL_BHALF_TYPE_DEFINED +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_HALF +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_HALF +#endif #endif // KOKKOS_HALF_HPP_ diff --git a/packages/kokkos/core/src/Kokkos_HostSpace.hpp b/packages/kokkos/core/src/Kokkos_HostSpace.hpp index 034d31fca0978e23420895d83b72cb576d6ba75e..1d67e2f9cd2081e701ef44a893739cc739cf0823 100644 --- a/packages/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_HostSpace.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_HOSTSPACE_HPP #define KOKKOS_HOSTSPACE_HPP @@ -60,6 +69,7 @@ #include <impl/Kokkos_Tools.hpp> #include "impl/Kokkos_HostSpace_deepcopy.hpp" +#include <impl/Kokkos_MemorySpace.hpp> /*--------------------------------------------------------------------------*/ @@ -204,13 +214,12 @@ struct HostMirror { }; public: - using Space = typename std::conditional< + using Space = std::conditional_t< keep_exe && keep_mem, S, - typename std::conditional< - keep_mem, - Kokkos::Device<Kokkos::HostSpace::execution_space, - typename S::memory_space>, - Kokkos::HostSpace>::type>::type; + std::conditional_t<keep_mem, + Kokkos::Device<Kokkos::HostSpace::execution_space, + typename S::memory_space>, + Kokkos::HostSpace>>; }; } // namespace Impl @@ -252,6 +261,28 @@ class SharedAllocationRecord<Kokkos::HostSpace, void> ; SharedAllocationRecord() = default; + // This constructor does not forward to the one without exec_space arg + // in order to work around https://github.com/kokkos/kokkos/issues/5258 + // This constructor is templated so I can't just put it into the cpp file + // like the other constructor. + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /* exec_space*/, const Kokkos::HostSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &deallocate) + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::HostSpace, void>::s_root_record, +#endif + Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_space(arg_space) { + this->base_t::_fill_host_accessible_header_info(*RecordBase::m_alloc_ptr, + arg_label); + } + SharedAllocationRecord( const Kokkos::HostSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, @@ -280,10 +311,17 @@ namespace Impl { template <class DT, class... DP> struct ZeroMemset<typename HostSpace::execution_space, DT, DP...> { - ZeroMemset(const typename HostSpace::execution_space&, + ZeroMemset(const typename HostSpace::execution_space& exec, const View<DT, DP...>& dst, - typename View<DT, DP...>::const_value_type& value) - : ZeroMemset(dst, value) {} + typename View<DT, DP...>::const_value_type&) { + // Host spaces, except for HPX, are synchronous and we need to fence for HPX + // since we can't properly enqueue a std::memset otherwise. + // We can't use exec.fence() directly since we don't have a full definition + // of HostSpace here. + hostspace_fence(exec); + using ValueType = typename View<DT, DP...>::value_type; + std::memset(dst.data(), 0, sizeof(ValueType) * dst.size()); + } ZeroMemset(const View<DT, DP...>& dst, typename View<DT, DP...>::const_value_type&) { diff --git a/packages/kokkos/core/src/Kokkos_Layout.hpp b/packages/kokkos/core/src/Kokkos_Layout.hpp index cfd77ea50fedcb5766ace9feb488c4c0f6238e89..78173c083e6300e3b3aa533038bef54f2ca8947f 100644 --- a/packages/kokkos/core/src/Kokkos_Layout.hpp +++ b/packages/kokkos/core/src/Kokkos_Layout.hpp @@ -45,6 +45,15 @@ /// \file Kokkos_Layout.hpp /// \brief Declaration of various \c MemoryLayout options. +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_LAYOUT_HPP #define KOKKOS_LAYOUT_HPP @@ -84,9 +93,14 @@ struct LayoutLeft { LayoutLeft& operator=(LayoutLeft&&) = default; KOKKOS_INLINE_FUNCTION - explicit constexpr LayoutLeft(size_t N0 = 0, size_t N1 = 0, size_t N2 = 0, - size_t N3 = 0, size_t N4 = 0, size_t N5 = 0, - size_t N6 = 0, size_t N7 = 0) + explicit constexpr LayoutLeft(size_t N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) : dimension{N0, N1, N2, N3, N4, N5, N6, N7} {} friend bool operator==(const LayoutLeft& left, const LayoutLeft& right) { @@ -128,9 +142,14 @@ struct LayoutRight { LayoutRight& operator=(LayoutRight&&) = default; KOKKOS_INLINE_FUNCTION - explicit constexpr LayoutRight(size_t N0 = 0, size_t N1 = 0, size_t N2 = 0, - size_t N3 = 0, size_t N4 = 0, size_t N5 = 0, - size_t N6 = 0, size_t N7 = 0) + explicit constexpr LayoutRight(size_t N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + size_t N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) : dimension{N0, N1, N2, N3, N4, N5, N6, N7} {} friend bool operator==(const LayoutRight& left, const LayoutRight& right) { @@ -177,7 +196,7 @@ struct LayoutStride { // Verify valid rank order: int check_input = ARRAY_LAYOUT_MAX_RANK < rank ? 0 : int(1 << rank) - 1; for (int r = 0; r < ARRAY_LAYOUT_MAX_RANK; ++r) { - tmp.dimension[r] = 0; + tmp.dimension[r] = KOKKOS_IMPL_CTOR_DEFAULT_ARG; tmp.stride[r] = 0; } for (int r = 0; r < rank; ++r) { @@ -195,12 +214,15 @@ struct LayoutStride { } KOKKOS_INLINE_FUNCTION - explicit constexpr LayoutStride(size_t N0 = 0, size_t S0 = 0, size_t N1 = 0, - size_t S1 = 0, size_t N2 = 0, size_t S2 = 0, - size_t N3 = 0, size_t S3 = 0, size_t N4 = 0, - size_t S4 = 0, size_t N5 = 0, size_t S5 = 0, - size_t N6 = 0, size_t S6 = 0, size_t N7 = 0, - size_t S7 = 0) + explicit constexpr LayoutStride( + size_t N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, size_t S0 = 0, + size_t N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, size_t S1 = 0, + size_t N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, size_t S2 = 0, + size_t N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, size_t S3 = 0, + size_t N4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, size_t S4 = 0, + size_t N5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, size_t S5 = 0, + size_t N6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, size_t S6 = 0, + size_t N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, size_t S7 = 0) : dimension{N0, N1, N2, N3, N4, N5, N6, N7}, stride{S0, S1, S2, S3, S4, S5, S6, S7} {} @@ -234,9 +256,8 @@ template <typename LayoutTiledCheck, class Enable = void> struct is_layouttiled : std::false_type {}; template <typename LayoutTiledCheck> -struct is_layouttiled< - LayoutTiledCheck, - typename std::enable_if<LayoutTiledCheck::is_array_layout_tiled>::type> +struct is_layouttiled<LayoutTiledCheck, + std::enable_if_t<LayoutTiledCheck::is_array_layout_tiled>> : std::true_type {}; namespace Experimental { @@ -336,7 +357,7 @@ template <unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3, unsigned ArgN4, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7> struct layout_iterate_type_selector<Kokkos::Experimental::LayoutTiled< Kokkos::Iterate::Left, Kokkos::Iterate::Left, ArgN0, ArgN1, ArgN2, ArgN3, - ArgN4, ArgN5, ArgN6, ArgN7, true> > { + ArgN4, ArgN5, ArgN6, ArgN7, true>> { static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left; static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left; }; @@ -345,7 +366,7 @@ template <unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3, unsigned ArgN4, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7> struct layout_iterate_type_selector<Kokkos::Experimental::LayoutTiled< Kokkos::Iterate::Right, Kokkos::Iterate::Left, ArgN0, ArgN1, ArgN2, ArgN3, - ArgN4, ArgN5, ArgN6, ArgN7, true> > { + ArgN4, ArgN5, ArgN6, ArgN7, true>> { static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right; static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left; }; @@ -354,7 +375,7 @@ template <unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3, unsigned ArgN4, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7> struct layout_iterate_type_selector<Kokkos::Experimental::LayoutTiled< Kokkos::Iterate::Left, Kokkos::Iterate::Right, ArgN0, ArgN1, ArgN2, ArgN3, - ArgN4, ArgN5, ArgN6, ArgN7, true> > { + ArgN4, ArgN5, ArgN6, ArgN7, true>> { static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left; static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right; }; @@ -363,7 +384,7 @@ template <unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3, unsigned ArgN4, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7> struct layout_iterate_type_selector<Kokkos::Experimental::LayoutTiled< Kokkos::Iterate::Right, Kokkos::Iterate::Right, ArgN0, ArgN1, ArgN2, ArgN3, - ArgN4, ArgN5, ArgN6, ArgN7, true> > { + ArgN4, ArgN5, ArgN6, ArgN7, true>> { static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right; static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right; }; diff --git a/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp b/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp index 6dcbe27900c8905b6810dec67e22b9c55b22544e..d3ce354c2a61a20fd511ab0d569533feb2ea5565 100644 --- a/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp +++ b/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_LOGICALSPACES_HPP #define KOKKOS_LOGICALSPACES_HPP @@ -98,9 +107,9 @@ class LogicalMemorySpace { /// parallel using the View's default execution space). using execution_space = - typename std::conditional<std::is_void<DefaultBaseExecutionSpace>::value, - typename BaseSpace::execution_space, - DefaultBaseExecutionSpace>::type; + std::conditional_t<std::is_void<DefaultBaseExecutionSpace>::value, + typename BaseSpace::execution_space, + DefaultBaseExecutionSpace>; using device_type = Kokkos::Device<execution_space, memory_space>; @@ -247,6 +256,14 @@ class SharedAllocationRecord<Kokkos::Experimental::LogicalMemorySpace< } SharedAllocationRecord() = default; + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, const SpaceType& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + SharedAllocationRecord( const SpaceType& arg_space, const std::string& arg_label, const size_t arg_alloc_size, diff --git a/packages/kokkos/core/src/Kokkos_Macros.hpp b/packages/kokkos/core/src/Kokkos_Macros.hpp index 8c2d414a99603730b10e13c8d6b94c8e2392c543..9dbd2de0c80b56eb67ac4c23eda2796d60a1d954 100644 --- a/packages/kokkos/core/src/Kokkos_Macros.hpp +++ b/packages/kokkos/core/src/Kokkos_Macros.hpp @@ -228,8 +228,8 @@ #define KOKKOS_ENABLE_PRAGMA_SIMD 1 #endif -// FIXME Workaround for ICE with intel 17,18,19 in Trilinos -#if (KOKKOS_COMPILER_INTEL <= 1900) +// FIXME Workaround for ICE with intel 17,18,19,20,21 in Trilinos +#if (KOKKOS_COMPILER_INTEL <= 2100) #define KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS #endif @@ -264,12 +264,13 @@ #define KOKKOS_ENABLE_ASM 1 #endif -#if !defined(KOKKOS_IMPL_FORCEINLINE_FUNCTION) +#if !defined(KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION) #if !defined(_WIN32) -#define KOKKOS_IMPL_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) -#define KOKKOS_IMPL_FORCEINLINE __attribute__((always_inline)) +#define KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION \ + inline __attribute__((always_inline)) +#define KOKKOS_IMPL_HOST_FORCEINLINE __attribute__((always_inline)) #else -#define KOKKOS_IMPL_FORCEINLINE_FUNCTION inline +#define KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION inline #endif #endif @@ -320,9 +321,10 @@ //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 -#if !defined(KOKKOS_IMPL_FORCEINLINE_FUNCTION) -#define KOKKOS_IMPL_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) -#define KOKKOS_IMPL_FORCEINLINE __attribute__((always_inline)) +#if !defined(KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION) +#define KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION \ + inline __attribute__((always_inline)) +#define KOKKOS_IMPL_HOST_FORCEINLINE __attribute__((always_inline)) #endif #if !defined(KOKKOS_IMPL_ALIGN_PTR) @@ -345,9 +347,10 @@ #define KOKKOS_ENABLE_RFO_PREFETCH 1 #endif -#if !defined(KOKKOS_IMPL_FORCEINLINE_FUNCTION) -#define KOKKOS_IMPL_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) -#define KOKKOS_IMPL_FORCEINLINE __attribute__((always_inline)) +#if !defined(KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION) +#define KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION \ + inline __attribute__((always_inline)) +#define KOKKOS_IMPL_HOST_FORCEINLINE __attribute__((always_inline)) #endif #define KOKKOS_RESTRICT __restrict__ @@ -380,12 +383,20 @@ //---------------------------------------------------------------------------- // Define function marking macros if compiler specific macros are undefined: +#if !defined(KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION) +#define KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION inline +#endif + +#if !defined(KOKKOS_IMPL_HOST_FORCEINLINE) +#define KOKKOS_IMPL_HOST_FORCEINLINE inline +#endif + #if !defined(KOKKOS_IMPL_FORCEINLINE_FUNCTION) -#define KOKKOS_IMPL_FORCEINLINE_FUNCTION inline +#define KOKKOS_IMPL_FORCEINLINE_FUNCTION KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION #endif #if !defined(KOKKOS_IMPL_FORCEINLINE) -#define KOKKOS_IMPL_FORCEINLINE inline +#define KOKKOS_IMPL_FORCEINLINE KOKKOS_IMPL_HOST_FORCEINLINE #endif #if !defined(KOKKOS_IMPL_INLINE_FUNCTION) @@ -564,8 +575,9 @@ static constexpr bool kokkos_omp_on_host() { return false; } #endif #if !defined(KOKKOS_IF_ON_HOST) && !defined(KOKKOS_IF_ON_DEVICE) -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || \ - defined(__SYCL_DEVICE_ONLY__) +#if (defined(KOKKOS_ENABLE_CUDA) && defined(__CUDA_ARCH__)) || \ + (defined(KOKKOS_ENABLE_HIP) && defined(__HIP_DEVICE_COMPILE__)) || \ + (defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__)) #define KOKKOS_IF_ON_DEVICE(CODE) \ { KOKKOS_IMPL_STRIP_PARENS(CODE) } #define KOKKOS_IF_ON_HOST(CODE) \ @@ -578,15 +590,6 @@ static constexpr bool kokkos_omp_on_host() { return false; } #endif #endif -//---------------------------------------------------------------------------- - -#if (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) || \ - (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE >= 600) -#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN) -#define KOKKOS_ENABLE_POSIX_MEMALIGN 1 -#endif -#endif - //---------------------------------------------------------------------------- // If compiling with CUDA, we must use relocatable device code to enable the // task policy. @@ -609,11 +612,11 @@ static constexpr bool kokkos_omp_on_host() { return false; } #define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE #endif -// Guard intel compiler version <= 1900 +// Guard intel compiler version 19 and older // intel error #2651: attribute does not apply to any entity // using <deprecated_type> KOKKOS_DEPRECATED = ... #if defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS) && !defined(__NVCC__) && \ - (!defined(KOKKOS_COMPILER_INTEL) || KOKKOS_COMPILER_INTEL > 1900) + (!defined(KOKKOS_COMPILER_INTEL) || KOKKOS_COMPILER_INTEL >= 2021) #define KOKKOS_DEPRECATED [[deprecated]] #define KOKKOS_DEPRECATED_WITH_COMMENT(comment) [[deprecated(comment)]] #else @@ -661,11 +664,9 @@ static constexpr bool kokkos_omp_on_host() { return false; } #undef __CUDA_ARCH__ #endif -#if (defined(KOKKOS_COMPILER_MSVC) && !defined(KOKKOS_COMPILER_CLANG)) || \ - (defined(KOKKOS_COMPILER_INTEL) && defined(_WIN32)) -#define KOKKOS_THREAD_LOCAL __declspec(thread) -#else -#define KOKKOS_THREAD_LOCAL __thread +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +#define KOKKOS_THREAD_LOCAL \ + KOKKOS_DEPRECATED_WITH_COMMENT("Use thread_local instead!") thread_local #endif #if (defined(KOKKOS_IMPL_WINDOWS_CUDA) || defined(KOKKOS_COMPILER_MSVC)) && \ diff --git a/packages/kokkos/core/src/Kokkos_MasterLock.hpp b/packages/kokkos/core/src/Kokkos_MasterLock.hpp index cbfbb92660ba9d75f4aadb67196d969902524a4f..5e48595bf0da7d0b1ad3de9d58f714b050961606 100644 --- a/packages/kokkos/core/src/Kokkos_MasterLock.hpp +++ b/packages/kokkos/core/src/Kokkos_MasterLock.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_MASTER_LOCK_HPP #define KOKKOS_MASTER_LOCK_HPP diff --git a/packages/kokkos/core/src/Kokkos_MathematicalConstants.hpp b/packages/kokkos/core/src/Kokkos_MathematicalConstants.hpp index c6b8c08dc962bc1c1f848be5ec0149c29736771d..8f7b559e786ecdd1841407835c02ffb31e16cdee 100644 --- a/packages/kokkos/core/src/Kokkos_MathematicalConstants.hpp +++ b/packages/kokkos/core/src/Kokkos_MathematicalConstants.hpp @@ -43,6 +43,10 @@ */ #ifndef KOKKOS_MATHEMATICAL_CONSTANTS_HPP #define KOKKOS_MATHEMATICAL_CONSTANTS_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHCONSTANTS +#endif #include <Kokkos_Macros.hpp> #include <type_traits> @@ -82,4 +86,8 @@ KOKKOS_IMPL_MATH_CONSTANT(phi, 1.618033988749894848204586834365638118L); } // namespace Experimental } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHCONSTANTS +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHCONSTANTS +#endif #endif diff --git a/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp b/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp index 6ee8d7745711141373a6ed74999b41bb2798ecef..6942f8495877368727fe9d55a063844260cd63d5 100644 --- a/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp +++ b/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp @@ -44,9 +44,14 @@ #ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_HPP #define KOKKOS_MATHEMATICAL_FUNCTIONS_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHFUNCTIONS +#endif #include <Kokkos_Macros.hpp> #include <cmath> +#include <cstdlib> #include <type_traits> #ifdef KOKKOS_ENABLE_SYCL @@ -76,29 +81,40 @@ struct promote<float> { }; template <class T> using promote_t = typename promote<T>::type; -template <class T, class U> +template <class T, class U, + bool = std::is_arithmetic<T>::value&& std::is_arithmetic<U>::value> struct promote_2 { using type = decltype(promote_t<T>() + promote_t<U>()); }; template <class T, class U> +struct promote_2<T, U, false> {}; +template <class T, class U> using promote_2_t = typename promote_2<T, U>::type; } // namespace Impl -namespace Experimental { - // NOTE long double overloads are not available on the device #if defined(KOKKOS_ENABLE_SYCL) #define KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE sycl #else -#if defined(KOKKOS_COMPILER_NVCC) && defined(__GNUC__) && (__GNUC__ < 6) && \ - !defined(__clang__) +#if (defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_COMPILER_NVHPC)) && \ + defined(__GNUC__) && (__GNUC__ < 6) && !defined(__clang__) #define KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE #else #define KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE std #endif #endif +#if defined(KOKKOS_ENABLE_DEPRECATED_CODE_3) +#define KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED( \ + USING_DECLARATIONS_IN_EXPERIMENTAL_NAMESPACE) \ + USING_DECLARATIONS_IN_EXPERIMENTAL_NAMESPACE +#else +#define KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED( \ + USING_DECLARATIONS_IN_EXPERIMENTAL_NAMESPACE) \ + /* nothing */ +#endif + #define KOKKOS_IMPL_MATH_UNARY_FUNCTION(FUNC) \ KOKKOS_INLINE_FUNCTION float FUNC(float x) { \ using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC; \ @@ -125,7 +141,13 @@ namespace Experimental { FUNC(T x) { \ using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC; \ return FUNC(static_cast<double>(x)); \ - } + } \ + KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED( \ + namespace Experimental { \ + using ::Kokkos::FUNC; \ + using ::Kokkos::FUNC##f; \ + using ::Kokkos::FUNC##l; \ + }) // isinf, isnan, and isinfinite do not work on Windows with CUDA with std:: // getting warnings about calling host function in device function then @@ -142,7 +164,9 @@ namespace Experimental { KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_integral<T>::value, bool> \ FUNC(T x) { \ return ::FUNC(static_cast<double>(x)); \ - } + } \ + KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED( \ + namespace Experimental { using ::Kokkos::FUNC; }) #else #define KOKKOS_IMPL_MATH_UNARY_PREDICATE(FUNC) \ KOKKOS_INLINE_FUNCTION bool FUNC(float x) { \ @@ -162,7 +186,9 @@ namespace Experimental { FUNC(T x) { \ using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::FUNC; \ return FUNC(static_cast<double>(x)); \ - } + } \ + KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED( \ + namespace Experimental { using ::Kokkos::FUNC; }) #endif #define KOKKOS_IMPL_MATH_BINARY_FUNCTION(FUNC) \ @@ -208,20 +234,35 @@ namespace Experimental { static_assert(std::is_same<Promoted, long double>::value, ""); \ using std::FUNC; \ return FUNC(static_cast<Promoted>(x), static_cast<Promoted>(y)); \ - } - + } \ + KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED( \ + namespace Experimental { \ + using ::Kokkos::FUNC; \ + using ::Kokkos::FUNC##f; \ + using ::Kokkos::FUNC##l; \ + }) // Basic operations KOKKOS_INLINE_FUNCTION int abs(int n) { using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs; return abs(n); } KOKKOS_INLINE_FUNCTION long abs(long n) { +// FIXME_NVHPC ptxas fatal : unresolved extern function 'labs' +#ifdef KOKKOS_COMPILER_NVHPC + return n > 0 ? n : -n; +#else using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs; return abs(n); +#endif } KOKKOS_INLINE_FUNCTION long long abs(long long n) { +// FIXME_NVHPC ptxas fatal : unresolved extern function 'labs' +#ifdef KOKKOS_COMPILER_NVHPC + return n > 0 ? n : -n; +#else using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs; return abs(n); +#endif } KOKKOS_INLINE_FUNCTION float abs(float x) { using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs; @@ -235,11 +276,15 @@ inline long double abs(long double x) { using std::abs; return abs(x); } +KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED( + namespace Experimental { using ::Kokkos::abs; }) KOKKOS_IMPL_MATH_UNARY_FUNCTION(fabs) KOKKOS_IMPL_MATH_BINARY_FUNCTION(fmod) KOKKOS_IMPL_MATH_BINARY_FUNCTION(remainder) -KOKKOS_IMPL_MATH_BINARY_FUNCTION(fmin) +// remquo +// fma KOKKOS_IMPL_MATH_BINARY_FUNCTION(fmax) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(fmin) KOKKOS_IMPL_MATH_BINARY_FUNCTION(fdim) #ifndef KOKKOS_ENABLE_SYCL KOKKOS_INLINE_FUNCTION float nanf(char const* arg) { return ::nanf(arg); } @@ -253,19 +298,46 @@ KOKKOS_INLINE_FUNCTION float nanf(char const*) { return sycl::nan(0u); } KOKKOS_INLINE_FUNCTION double nan(char const*) { return sycl::nan(0ul); } #endif inline long double nanl(char const* arg) { return ::nanl(arg); } -// Power functions -KOKKOS_IMPL_MATH_BINARY_FUNCTION(pow) -KOKKOS_IMPL_MATH_UNARY_FUNCTION(sqrt) -KOKKOS_IMPL_MATH_UNARY_FUNCTION(cbrt) -KOKKOS_IMPL_MATH_BINARY_FUNCTION(hypot) +KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED( + namespace Experimental { + using ::Kokkos::nan; + using ::Kokkos::nanf; + using ::Kokkos::nanl; + }) // Exponential functions KOKKOS_IMPL_MATH_UNARY_FUNCTION(exp) +// FIXME_NVHPC nvc++ has issues with exp2 +#ifndef KOKKOS_COMPILER_NVHPC KOKKOS_IMPL_MATH_UNARY_FUNCTION(exp2) +#else +KOKKOS_INLINE_FUNCTION float exp2(float val) { + constexpr float ln2 = 0.693147180559945309417232121458176568L; + return exp(ln2 * val); +} +KOKKOS_INLINE_FUNCTION double exp2(double val) { + constexpr double ln2 = 0.693147180559945309417232121458176568L; + return exp(ln2 * val); +} +inline long double exp2(long double val) { + constexpr long double ln2 = 0.693147180559945309417232121458176568L; + return exp(ln2 * val); +} +template <class T> +KOKKOS_INLINE_FUNCTION double exp2(T val) { + constexpr double ln2 = 0.693147180559945309417232121458176568L; + return exp(ln2 * static_cast<double>(val)); +} +#endif KOKKOS_IMPL_MATH_UNARY_FUNCTION(expm1) KOKKOS_IMPL_MATH_UNARY_FUNCTION(log) KOKKOS_IMPL_MATH_UNARY_FUNCTION(log10) KOKKOS_IMPL_MATH_UNARY_FUNCTION(log2) KOKKOS_IMPL_MATH_UNARY_FUNCTION(log1p) +// Power functions +KOKKOS_IMPL_MATH_BINARY_FUNCTION(pow) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(sqrt) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(cbrt) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(hypot) // Trigonometric functions KOKKOS_IMPL_MATH_UNARY_FUNCTION(sin) KOKKOS_IMPL_MATH_UNARY_FUNCTION(cos) @@ -290,21 +362,51 @@ KOKKOS_IMPL_MATH_UNARY_FUNCTION(lgamma) KOKKOS_IMPL_MATH_UNARY_FUNCTION(ceil) KOKKOS_IMPL_MATH_UNARY_FUNCTION(floor) KOKKOS_IMPL_MATH_UNARY_FUNCTION(trunc) -// FIXME_SYCL not available as of current SYCL specification v1.2.1 -#ifndef KOKKOS_ENABLE_SYCL +KOKKOS_IMPL_MATH_UNARY_FUNCTION(round) +// lround +// llround +// FIXME_SYCL not available as of current SYCL 2020 specification (revision 4) +#ifndef KOKKOS_ENABLE_SYCL // FIXME_SYCL KOKKOS_IMPL_MATH_UNARY_FUNCTION(nearbyint) #endif +// rint +// lrint +// llrint +// Floating point manipulation functions +// frexp +// ldexp +// modf +// scalbn +// scalbln +// ilog +KOKKOS_IMPL_MATH_UNARY_FUNCTION(logb) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(nextafter) +// nexttoward +KOKKOS_IMPL_MATH_BINARY_FUNCTION(copysign) // Classification and comparison +// fpclassify KOKKOS_IMPL_MATH_UNARY_PREDICATE(isfinite) KOKKOS_IMPL_MATH_UNARY_PREDICATE(isinf) KOKKOS_IMPL_MATH_UNARY_PREDICATE(isnan) +// isnormal +KOKKOS_IMPL_MATH_UNARY_PREDICATE(signbit) +// isgreater +// isgreaterequal +// isless +// islessequal +// islessgreater +// isunordered +#undef KOKKOS_IMPL_MATH_FUNCTIONS_DEFINED_IF_DEPRECATED_CODE_ENABLED #undef KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE #undef KOKKOS_IMPL_MATH_UNARY_FUNCTION #undef KOKKOS_IMPL_MATH_UNARY_PREDICATE #undef KOKKOS_IMPL_MATH_BINARY_FUNCTION -} // namespace Experimental } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHFUNCTIONS +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHFUNCTIONS +#endif #endif diff --git a/packages/kokkos/core/src/Kokkos_MathematicalSpecialFunctions.hpp b/packages/kokkos/core/src/Kokkos_MathematicalSpecialFunctions.hpp index 03c491c36d8c6843b24a784983a9401b4ec80dae..07da1dbd7e4965b231242a0359d8baa05ade98da 100644 --- a/packages/kokkos/core/src/Kokkos_MathematicalSpecialFunctions.hpp +++ b/packages/kokkos/core/src/Kokkos_MathematicalSpecialFunctions.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_MATHEMATICAL_SPECIAL_FUNCTIONS_HPP #define KOKKOS_MATHEMATICAL_SPECIAL_FUNCTIONS_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHSPECFUNCTIONS +#endif #include <Kokkos_Macros.hpp> #include <cmath> @@ -62,12 +66,12 @@ template <class RealType> KOKKOS_INLINE_FUNCTION RealType expint1(RealType x) { // This function is a conversion of the corresponding Fortran program in // S. Zhang & J. Jin "Computation of Special Functions" (Wiley, 1996). + using Kokkos::exp; + using Kokkos::fabs; + using Kokkos::log; + using Kokkos::pow; using Kokkos::Experimental::epsilon; - using Kokkos::Experimental::exp; - using Kokkos::Experimental::fabs; using Kokkos::Experimental::infinity; - using Kokkos::Experimental::log; - using Kokkos::Experimental::pow; RealType e1; @@ -114,14 +118,14 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> erf( // (1) abs(z)<=2 - Power series, NBS Handbook, p. 298 // (2) abs(z)>2 and x>1 - continued fraction, NBS Handbook, p. 298 // (3) abs(z)>2 and 0<=x<=1 and abs(y)<6 - series, NBS Handbook, p. 299 - // (4) abs(z)>2 and 0<=x<=1 and abs(y)>=6 - asymtotic expansion + // (4) abs(z)>2 and 0<=x<=1 and abs(y)>=6 - asymptotic expansion // Error condition: abs(z^2) > 670 is a fatal overflow error - using Kokkos::Experimental::cos; + using Kokkos::cos; + using Kokkos::exp; + using Kokkos::fabs; + using Kokkos::sin; using Kokkos::Experimental::epsilon; - using Kokkos::Experimental::exp; - using Kokkos::Experimental::fabs; using Kokkos::Experimental::infinity; - using Kokkos::Experimental::sin; using CmplxType = Kokkos::complex<RealType>; @@ -248,7 +252,7 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> erf( if (z.real() < 0.0) cans = -cans; } // end (abs(yp) < 6.0) else { //(abs(YP)>=6.0) - // Asymtotic expansion for 0<=xp<=1 and abs(yp)>=6 + // Asymptotic expansion for 0<=xp<=1 and abs(yp)>=6 CmplxType rcz = 0.5 / cz; CmplxType accum = CmplxType(1.0, 0.0); CmplxType term = accum; @@ -291,15 +295,15 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> erfcx( // (1) abs(z)<=2 - Power series, NBS Handbook, p. 298 // (2) abs(z)>2 and x>1 - continued fraction, NBS Handbook, p. 298 // (3) abs(z)>2 and 0<=x<=1 and abs(y)<6 - series, NBS Handbook, p. 299 - // (4) abs(z)>2 and 0<=x<=1 and abs(y)>=6 - asymtotic expansion + // (4) abs(z)>2 and 0<=x<=1 and abs(y)>=6 - asymptotic expansion // Error condition: abs(z^2) > 670 is a fatal overflow error when x<0 - using Kokkos::Experimental::cos; + using Kokkos::cos; + using Kokkos::exp; + using Kokkos::fabs; + using Kokkos::isinf; + using Kokkos::sin; using Kokkos::Experimental::epsilon; - using Kokkos::Experimental::exp; - using Kokkos::Experimental::fabs; using Kokkos::Experimental::infinity; - using Kokkos::Experimental::isinf; - using Kokkos::Experimental::sin; using CmplxType = Kokkos::complex<RealType>; @@ -440,7 +444,7 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> erfcx( cans = cz * (1.0 + w) + rcz * CmplxType(s1, s2) / pi; } // end (abs(yp) < 6.0) else { //(abs(YP)>=6.0) - // Asymtotic expansion for 0<=xp<=1 and abs(yp)>=6 + // Asymptotic expansion for 0<=xp<=1 and abs(yp)>=6 CmplxType rcz = 0.5 / cz; CmplxType accum = CmplxType(1.0, 0.0); CmplxType term = accum; @@ -486,8 +490,8 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_j0(const CmplxType& z, // argument regions // bw_start --- Starting point for backward recurrence // Output: cbj0 --- J0(z) - using Kokkos::Experimental::fabs; - using Kokkos::Experimental::pow; + using Kokkos::fabs; + using Kokkos::pow; CmplxType cbj0; constexpr auto pi = Kokkos::Experimental::pi_v<RealType>; @@ -574,9 +578,9 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_y0(const CmplxType& z, // argument regions // bw_start --- Starting point for backward recurrence // Output: cby0 --- Y0(z) - using Kokkos::Experimental::fabs; + using Kokkos::fabs; + using Kokkos::pow; using Kokkos::Experimental::infinity; - using Kokkos::Experimental::pow; constexpr auto inf = infinity<RealType>::value; @@ -675,8 +679,8 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_j1(const CmplxType& z, // argument regions // bw_start --- Starting point for backward recurrence // Output: cbj1 --- J1(z) - using Kokkos::Experimental::fabs; - using Kokkos::Experimental::pow; + using Kokkos::fabs; + using Kokkos::pow; CmplxType cbj1; constexpr auto pi = Kokkos::Experimental::pi_v<RealType>; @@ -767,9 +771,9 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_y1(const CmplxType& z, // argument regions // bw_start --- Starting point for backward recurrence // Output: cby1 --- Y1(z) - using Kokkos::Experimental::fabs; + using Kokkos::fabs; + using Kokkos::pow; using Kokkos::Experimental::infinity; - using Kokkos::Experimental::pow; constexpr auto inf = infinity<RealType>::value; @@ -943,8 +947,8 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_k0(const CmplxType& z, // argument regions // bw_start --- Starting point for backward recurrence // Output: cbk0 --- K0(z) + using Kokkos::pow; using Kokkos::Experimental::infinity; - using Kokkos::Experimental::pow; constexpr auto inf = infinity<RealType>::value; @@ -1089,8 +1093,8 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_k1(const CmplxType& z, // argument regions // bw_start --- Starting point for backward recurrence // Output: cbk1 --- K1(z) + using Kokkos::pow; using Kokkos::Experimental::infinity; - using Kokkos::Experimental::pow; constexpr auto inf = infinity<RealType>::value; @@ -1274,4 +1278,8 @@ KOKKOS_INLINE_FUNCTION CmplxType cyl_bessel_h21(const CmplxType& z) { } // namespace Experimental } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHSPECFUNCTIONS +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_MATHSPECFUNCTIONS +#endif #endif diff --git a/packages/kokkos/core/src/Kokkos_MemoryPool.hpp b/packages/kokkos/core/src/Kokkos_MemoryPool.hpp index 7dce3f4780352c34c40b72e7e591acda84e07f9d..368d4901806792258618bcb77c5fdf7759ed1888 100644 --- a/packages/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/packages/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_MEMORYPOOL_HPP #define KOKKOS_MEMORYPOOL_HPP diff --git a/packages/kokkos/core/src/Kokkos_MemoryTraits.hpp b/packages/kokkos/core/src/Kokkos_MemoryTraits.hpp index e3cee93e257b154b73df1d0c40514040d7083f22..079384f153c743b9ec96f5bdc1cef42f3f547ef4 100644 --- a/packages/kokkos/core/src/Kokkos_MemoryTraits.hpp +++ b/packages/kokkos/core/src/Kokkos_MemoryTraits.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_MEMORYTRAITS_HPP #define KOKKOS_MEMORYTRAITS_HPP diff --git a/packages/kokkos/core/src/Kokkos_MinMaxClamp.hpp b/packages/kokkos/core/src/Kokkos_MinMaxClamp.hpp index a82e13df7fa222271247016999a7e206c8ecfd25..6cb8d1669b96c3ad215e3fdc3cde9563f76036e7 100644 --- a/packages/kokkos/core/src/Kokkos_MinMaxClamp.hpp +++ b/packages/kokkos/core/src/Kokkos_MinMaxClamp.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_MIN_MAX_CLAMP_HPP #define KOKKOS_MIN_MAX_CLAMP_HPP @@ -51,7 +60,6 @@ #include <initializer_list> namespace Kokkos { -namespace Experimental { // clamp template <class T> @@ -223,7 +231,15 @@ KOKKOS_INLINE_FUNCTION constexpr Kokkos::pair<T, T> minmax( return result; } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +namespace Experimental { +using ::Kokkos::clamp; +using ::Kokkos::max; +using ::Kokkos::min; +using ::Kokkos::minmax; } // namespace Experimental +#endif + } // namespace Kokkos #endif diff --git a/packages/kokkos/core/src/Kokkos_NumericTraits.hpp b/packages/kokkos/core/src/Kokkos_NumericTraits.hpp index 67f017c69146dcae1eab1d19e8bbb5a7f1610c31..e529aba8533229c42fe95f884b115a291945bcd0 100644 --- a/packages/kokkos/core/src/Kokkos_NumericTraits.hpp +++ b/packages/kokkos/core/src/Kokkos_NumericTraits.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_NUMERIC_TRAITS_HPP #define KOKKOS_NUMERIC_TRAITS_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NUMERICTRAITS +#endif #include <Kokkos_Macros.hpp> #include <cfloat> @@ -650,4 +654,8 @@ struct reduction_identity<long double> { } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NUMERICTRAITS +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NUMERICTRAITS +#endif #endif diff --git a/packages/kokkos/core/src/Kokkos_OpenMP.hpp b/packages/kokkos/core/src/Kokkos_OpenMP.hpp index 767e5b9324487e2e050828201c1eae6fcbb296d2..775b4704489f79a03c1ffecb8d37de6925dd3acc 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMP.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_OPENMP_HPP #define KOKKOS_OPENMP_HPP @@ -62,8 +71,9 @@ #include <Kokkos_Parallel.hpp> #include <Kokkos_TaskScheduler.hpp> #include <Kokkos_Layout.hpp> +#include <impl/Kokkos_HostSharedPtr.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> #include <vector> @@ -72,7 +82,7 @@ namespace Kokkos { namespace Impl { -class OpenMPExec; +class OpenMPInternal; } /// \class OpenMP @@ -95,8 +105,10 @@ class OpenMP { using size_type = memory_space::size_type; using scratch_memory_space = ScratchMemorySpace<OpenMP>; + OpenMP(); + /// \brief Print configuration information to the given output stream. - static void print_configuration(std::ostream&, const bool verbose = false); + void print_configuration(std::ostream& os, bool verbose = false) const; /// \brief is the instance running a parallel algorithm inline static bool in_parallel(OpenMP const& = OpenMP()) noexcept; @@ -104,11 +116,10 @@ class OpenMP { /// \brief Wait until all dispatched functors complete on the given instance /// /// This is a no-op on OpenMP - static void impl_static_fence(OpenMP const& = OpenMP(), - const std::string& name = "") noexcept; + static void impl_static_fence(std::string const& name); - void fence() const; - void fence(const std::string& name) const; + void fence(std::string const& name = + "Kokkos::OpenMP::fence: Unnamed Instance Fence") const; /// \brief Does the given instance return immediately after launching /// a parallel algorithm @@ -116,6 +127,7 @@ class OpenMP { /// This always returns false on OpenMP inline static bool is_asynchronous(OpenMP const& = OpenMP()) noexcept; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 /// \brief Partition the default instance into new instances without creating /// new masters /// @@ -129,7 +141,6 @@ class OpenMP { /// This is a no-op on OpenMP since a non default instance cannot be created static OpenMP create_instance(...); -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 /// \brief Partition the default instance and call 'f' on each new 'master' /// thread /// @@ -144,7 +155,7 @@ class OpenMP { // use UniqueToken static int concurrency(); - static void impl_initialize(int thread_count = -1); + static void impl_initialize(InitializationSettings const&); /// \brief is the default execution space initialized for current 'master' /// thread @@ -170,8 +181,23 @@ class OpenMP { static int impl_get_current_max_threads() noexcept; + Impl::OpenMPInternal* impl_internal_space_instance() const { +#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS + return m_space_instance; +#else + return m_space_instance.get(); +#endif + } + static constexpr const char* name() noexcept { return "OpenMP"; } uint32_t impl_instance_id() const noexcept { return 1; } + + private: +#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS + Impl::OpenMPInternal* m_space_instance; +#else + Kokkos::Impl::HostSharedPtr<Impl::OpenMPInternal> m_space_instance; +#endif }; namespace Tools { @@ -183,21 +209,6 @@ struct DeviceTypeTraits<OpenMP> { }; } // namespace Experimental } // namespace Tools - -namespace Impl { - -class OpenMPSpaceInitializer : public ExecSpaceInitializerBase { - public: - OpenMPSpaceInitializer() = default; - ~OpenMPSpaceInitializer() = default; - void initialize(const InitArguments& args) final; - void finalize(const bool) final; - void fence() final; - void fence(const std::string&) final; - void print_configuration(std::ostream& msg, const bool detail) final; -}; - -} // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ @@ -220,7 +231,7 @@ struct MemorySpaceAccess<Kokkos::OpenMP::memory_space, /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ -#include <OpenMP/Kokkos_OpenMP_Exec.hpp> +#include <OpenMP/Kokkos_OpenMP_Instance.hpp> #include <OpenMP/Kokkos_OpenMP_Team.hpp> #include <OpenMP/Kokkos_OpenMP_Parallel.hpp> #include <OpenMP/Kokkos_OpenMP_Task.hpp> diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp index 373dc3d9c7bc22fb6e3b539c232e8ebfad7bb9bc..637b4c08f9088508f53d2bf13cce6e840b4006f1 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_OPENMPTARGET_HPP #define KOKKOS_OPENMPTARGET_HPP @@ -59,8 +68,8 @@ #include <Kokkos_TaskScheduler.hpp> #include <Kokkos_Layout.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> #include <KokkosExp_MDRangePolicy.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> /*--------------------------------------------------------------------------*/ namespace Kokkos { @@ -90,27 +99,27 @@ class OpenMPTarget { inline static bool in_parallel() { return omp_in_parallel(); } - static void fence(); - static void fence(const std::string&); + static void fence(const std::string& name = + "Kokkos::OpenMPTarget::fence: Unnamed Instance Fence"); + + static void impl_static_fence(const std::string& name); - static void impl_static_fence(); - static void impl_static_fence(const std::string&); /** \brief Return the maximum amount of concurrency. */ static int concurrency(); //! Print configuration information to the given output stream. - void print_configuration(std::ostream&, const bool detail = false); + void print_configuration(std::ostream& os, bool verbose = false) const; static const char* name(); //! Free any resources being consumed by the device. - void impl_finalize(); + static void impl_finalize(); //! Has been initialized static int impl_is_initialized(); //! Initialize, telling the CUDA run-time library which device to use. - void impl_initialize(); + static void impl_initialize(InitializationSettings const&); inline Impl::OpenMPTargetInternal* impl_internal_space_instance() const { return m_space_instance; @@ -124,6 +133,17 @@ class OpenMPTarget { }; } // namespace Experimental +namespace Impl { +template <> +struct MemorySpaceAccess< + Kokkos::Experimental::OpenMPTargetSpace, + Kokkos::Experimental::OpenMPTarget::scratch_memory_space> { + enum : bool { assignable = false }; + enum : bool { accessible = true }; + enum : bool { deepcopy = false }; +}; +} // namespace Impl + namespace Tools { namespace Experimental { template <> @@ -137,20 +157,6 @@ struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> { } // namespace Experimental } // namespace Tools -namespace Impl { - -class OpenMPTargetSpaceInitializer : public ExecSpaceInitializerBase { - public: - OpenMPTargetSpaceInitializer() = default; - ~OpenMPTargetSpaceInitializer() = default; - void initialize(const InitArguments& args) final; - void finalize(const bool) final; - void fence() final; - void fence(const std::string&) final; - void print_configuration(std::ostream& msg, const bool detail) final; -}; - -} // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp index 25c852717c23da4e25c5573e59f9cd21c03a06e0..b4897449cb9e769438e3d5a0c48ad39a55cd8bad 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_OPENMPTARGETSPACE_HPP #define KOKKOS_OPENMPTARGETSPACE_HPP @@ -113,14 +122,6 @@ struct MemorySpaceAccess<Kokkos::Experimental::OpenMPTargetSpace, }; //---------------------------------------- - -template <> -struct MemorySpaceAccess<Kokkos::Experimental::OpenMPTargetSpace, - Kokkos::Experimental::OpenMPTargetSpace> { - enum : bool { assignable = true }; - enum : bool { accessible = true }; - enum : bool { deepcopy = false }; -}; } // namespace Impl } // namespace Kokkos @@ -161,13 +162,29 @@ class OpenMPTargetSpace { /**\brief Allocate untracked memory in the space */ void* allocate(const size_t arg_alloc_size) const; + void* allocate(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; /**\brief Deallocate untracked memory in the space */ - void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const; + void deallocate(void* const arg_alloc_ptr, + const std::size_t arg_alloc_size) const; + void deallocate(const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; static constexpr const char* name() { return "OpenMPTargetSpace"; } private: + void* impl_allocate(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; + void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace, void>; }; @@ -208,6 +225,15 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void> ~SharedAllocationRecord(); SharedAllocationRecord() = default; + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, + const Kokkos::Experimental::OpenMPTargetSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + SharedAllocationRecord( const Kokkos::Experimental::OpenMPTargetSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, diff --git a/packages/kokkos/core/src/Kokkos_Pair.hpp b/packages/kokkos/core/src/Kokkos_Pair.hpp index 6045737aa936b078e02b54fa821fcdff6154734e..7e5b7ce250021b089cba6b97b972a78dc6633104 100644 --- a/packages/kokkos/core/src/Kokkos_Pair.hpp +++ b/packages/kokkos/core/src/Kokkos_Pair.hpp @@ -48,6 +48,10 @@ #ifndef KOKKOS_PAIR_HPP #define KOKKOS_PAIR_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PAIR +#endif #include <Kokkos_Macros.hpp> #include <utility> @@ -84,17 +88,28 @@ struct pair { /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type const& f, - second_type const& s) - : first(f), second(s) {} +#ifdef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC bug in NVHPC regarding constexpr + // constructors used in device code + KOKKOS_FORCEINLINE_FUNCTION +#else + KOKKOS_FORCEINLINE_FUNCTION constexpr +#endif + pair(first_type const& f, second_type const& s) : first(f), second(s) {} /// \brief Copy constructor. /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template <class U, class V> - KOKKOS_FORCEINLINE_FUNCTION constexpr pair(const pair<U, V>& p) - : first(p.first), second(p.second) {} +#ifdef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC bug in NVHPC regarding constexpr + // constructors used in device code + KOKKOS_FORCEINLINE_FUNCTION +#else + KOKKOS_FORCEINLINE_FUNCTION constexpr +#endif + pair(const pair<U, V>& p) + : first(p.first), second(p.second) { + } /// \brief Copy constructor. /// @@ -504,4 +519,8 @@ struct is_pair_like<std::pair<T, U>> : std::true_type {}; } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PAIR +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PAIR +#endif #endif // KOKKOS_PAIR_HPP diff --git a/packages/kokkos/core/src/Kokkos_Parallel.hpp b/packages/kokkos/core/src/Kokkos_Parallel.hpp index c12cd77d38bc93d666037e9fd3a30194494e177b..2b5e39d24d808abcddf36f4840c2d47082a7a905 100644 --- a/packages/kokkos/core/src/Kokkos_Parallel.hpp +++ b/packages/kokkos/core/src/Kokkos_Parallel.hpp @@ -45,6 +45,15 @@ /// \file Kokkos_Parallel.hpp /// \brief Declaration of parallel operators +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_PARALLEL_HPP #define KOKKOS_PARALLEL_HPP @@ -58,7 +67,6 @@ #include <impl/Kokkos_Traits.hpp> #include <impl/Kokkos_FunctorAnalysis.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> #include <cstddef> #include <type_traits> @@ -151,12 +159,11 @@ namespace Kokkos { * This compares to a single iteration \c iwork of a \c for loop. * If \c execution_space is not defined DefaultExecutionSpace will be used. */ -template <class ExecPolicy, class FunctorType> -inline void parallel_for( - const ExecPolicy& policy, const FunctorType& functor, - const std::string& str = "", - typename std::enable_if< - Kokkos::is_execution_policy<ExecPolicy>::value>::type* = nullptr) { +template < + class ExecPolicy, class FunctorType, + class Enable = std::enable_if_t<is_execution_policy<ExecPolicy>::value>> +inline void parallel_for(const std::string& str, const ExecPolicy& policy, + const FunctorType& functor) { uint64_t kpID = 0; ExecPolicy inner_policy = policy; @@ -171,34 +178,51 @@ inline void parallel_for( Kokkos::Tools::Impl::end_parallel_for(inner_policy, functor, str, kpID); } +template <class ExecPolicy, class FunctorType> +inline void parallel_for( + const ExecPolicy& policy, const FunctorType& functor, + std::enable_if_t<is_execution_policy<ExecPolicy>::value>* = nullptr) { + Kokkos::parallel_for("", policy, functor); +} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +template <class ExecPolicy, class FunctorType> +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload taking the label as first argument instead!") +inline void parallel_for( + const ExecPolicy& policy, const FunctorType& functor, + const std::string& str, + std::enable_if_t<is_execution_policy<ExecPolicy>::value>* = nullptr) { + Kokkos::parallel_for(str, policy, functor); +} +#endif + template <class FunctorType> -inline void parallel_for(const size_t work_count, const FunctorType& functor, - const std::string& str = "") { +inline void parallel_for(const std::string& str, const size_t work_count, + const FunctorType& functor) { using execution_space = typename Impl::FunctorPolicyExecutionSpace<FunctorType, void>::execution_space; using policy = RangePolicy<execution_space>; - uint64_t kpID = 0; - policy execution_policy = policy(0, work_count); + ::Kokkos::parallel_for(str, execution_policy, functor); +} - Kokkos::Tools::Impl::begin_parallel_for(execution_policy, functor, str, kpID); - - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelFor<FunctorType, policy> closure(functor, execution_policy); - Kokkos::Impl::shared_allocation_tracking_enable(); - - closure.execute(); - - Kokkos::Tools::Impl::end_parallel_for(execution_policy, functor, str, kpID); +template <class FunctorType> +inline void parallel_for(const size_t work_count, const FunctorType& functor) { + ::Kokkos::parallel_for("", work_count, functor); } -template <class ExecPolicy, class FunctorType> -inline void parallel_for(const std::string& str, const ExecPolicy& policy, - const FunctorType& functor) { - ::Kokkos::parallel_for(policy, functor, str); +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +template <class FunctorType> +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload taking the label as first argument instead!") +inline void parallel_for(const size_t work_count, const FunctorType& functor, + const std::string& str) { + ::Kokkos::parallel_for(str, work_count, functor); } +#endif } // namespace Kokkos @@ -245,8 +269,8 @@ namespace Kokkos { /// value_type& update, /// const bool final_pass) const; /// void init (value_type& update) const; -/// void join (volatile value_type& update, -// volatile const value_type& input) const +/// void join (value_type& update, +// const value_type& input) const /// }; /// \endcode /// @@ -276,7 +300,7 @@ namespace Kokkos { /// void init (value_type& update) const { /// update = 0; /// } -/// void join (volatile value_type& update, volatile const value_type& input) +/// void join (value_type& update, const value_type& input) /// const { /// update += input; /// } @@ -314,7 +338,7 @@ namespace Kokkos { /// void init (value_type& update) const { /// update = 0; /// } -/// void join (volatile value_type& update, volatile const value_type& input) +/// void join (value_type& update, const value_type& input) /// const { /// update += input; /// } @@ -361,7 +385,7 @@ namespace Kokkos { /// void init (value_type& update) const { /// update = 0; /// } -/// void join (volatile value_type& update, volatile const value_type& input) +/// void join (value_type& update, const value_type& input) /// const { /// update += input; /// } @@ -373,12 +397,11 @@ namespace Kokkos { /// }; /// \endcode /// -template <class ExecutionPolicy, class FunctorType> -inline void parallel_scan( - const ExecutionPolicy& policy, const FunctorType& functor, - const std::string& str = "", - typename std::enable_if< - Kokkos::is_execution_policy<ExecutionPolicy>::value>::type* = nullptr) { +template <class ExecutionPolicy, class FunctorType, + class Enable = + std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>> +inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, + const FunctorType& functor) { uint64_t kpID = 0; ExecutionPolicy inner_policy = policy; Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID); @@ -393,40 +416,59 @@ inline void parallel_scan( Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID); } +template <class ExecutionPolicy, class FunctorType> +inline void parallel_scan( + const ExecutionPolicy& policy, const FunctorType& functor, + std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = nullptr) { + ::Kokkos::parallel_scan("", policy, functor); +} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +template <class ExecutionPolicy, class FunctorType> +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload taking the label as first argument instead!") +inline void parallel_scan( + const ExecutionPolicy& policy, const FunctorType& functor, + const std::string& str, + std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = nullptr) { + ::Kokkos::parallel_scan(str, policy, functor); +} +#endif + template <class FunctorType> -inline void parallel_scan(const size_t work_count, const FunctorType& functor, - const std::string& str = "") { +inline void parallel_scan(const std::string& str, const size_t work_count, + const FunctorType& functor) { using execution_space = typename Kokkos::Impl::FunctorPolicyExecutionSpace<FunctorType, void>::execution_space; using policy = Kokkos::RangePolicy<execution_space>; - uint64_t kpID = 0; policy execution_policy(0, work_count); - Kokkos::Tools::Impl::begin_parallel_scan(execution_policy, functor, str, - kpID); - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelScan<FunctorType, policy> closure(functor, execution_policy); - Kokkos::Impl::shared_allocation_tracking_enable(); - - closure.execute(); + parallel_scan(str, execution_policy, functor); +} - Kokkos::Tools::Impl::end_parallel_scan(execution_policy, functor, str, kpID); +template <class FunctorType> +inline void parallel_scan(const size_t work_count, const FunctorType& functor) { + ::Kokkos::parallel_scan("", work_count, functor); } -template <class ExecutionPolicy, class FunctorType> -inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, - const FunctorType& functor) { - ::Kokkos::parallel_scan(policy, functor, str); +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +template <class FunctorType> +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload taking the label as first argument instead!") +inline void parallel_scan(const size_t work_count, const FunctorType& functor, + const std::string& str) { + ::Kokkos::parallel_scan(str, work_count, functor); } +#endif -template <class ExecutionPolicy, class FunctorType, class ReturnType> -inline void parallel_scan( - const ExecutionPolicy& policy, const FunctorType& functor, - ReturnType& return_value, const std::string& str = "", - typename std::enable_if< - Kokkos::is_execution_policy<ExecutionPolicy>::value>::type* = nullptr) { +template <class ExecutionPolicy, class FunctorType, class ReturnType, + class Enable = + std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>> +inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, + const FunctorType& functor, + ReturnType& return_value) { uint64_t kpID = 0; ExecutionPolicy inner_policy = policy; Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID); @@ -444,10 +486,30 @@ inline void parallel_scan( "Kokkos::parallel_scan: fence due to result being a value, not a view"); } +template <class ExecutionPolicy, class FunctorType, class ReturnType> +inline void parallel_scan( + const ExecutionPolicy& policy, const FunctorType& functor, + ReturnType& return_value, + std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = nullptr) { + ::Kokkos::parallel_scan("", policy, functor, return_value); +} + +#ifdef KOKKOS_ENABLE_DISABLE_DEPRECATED_CODE_3 +template <class ExecutionPolicy, class FunctorType, class ReturnType> +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload taking the label as first argument instead!") +inline void parallel_scan( + const ExecutionPolicy& policy, const FunctorType& functor, + ReturnType& return_value, const std::string& str, + std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = nullptr) { + ::Kokkos::parallel_scan(str, policy, functor, return_value); +} +#endif + template <class FunctorType, class ReturnType> -inline void parallel_scan(const size_t work_count, const FunctorType& functor, - ReturnType& return_value, - const std::string& str = "") { +inline void parallel_scan(const std::string& str, const size_t work_count, + const FunctorType& functor, + ReturnType& return_value) { using execution_space = typename Kokkos::Impl::FunctorPolicyExecutionSpace<FunctorType, void>::execution_space; @@ -455,29 +517,24 @@ inline void parallel_scan(const size_t work_count, const FunctorType& functor, using policy = Kokkos::RangePolicy<execution_space>; policy execution_policy(0, work_count); - uint64_t kpID = 0; - Kokkos::Tools::Impl::begin_parallel_scan(execution_policy, functor, str, - kpID); - - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelScanWithTotal<FunctorType, policy, ReturnType> closure( - functor, execution_policy, return_value); - Kokkos::Impl::shared_allocation_tracking_enable(); - - closure.execute(); - - Kokkos::Tools::Impl::end_parallel_scan(execution_policy, functor, str, kpID); - - execution_space().fence( - "Kokkos::parallel_scan: fence after scan with return value"); + parallel_scan(str, execution_policy, functor, return_value); } -template <class ExecutionPolicy, class FunctorType, class ReturnType> -inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, - const FunctorType& functor, +template <class FunctorType, class ReturnType> +inline void parallel_scan(const size_t work_count, const FunctorType& functor, ReturnType& return_value) { - ::Kokkos::parallel_scan(policy, functor, return_value, str); + ::Kokkos::parallel_scan("", work_count, functor, return_value); +} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +template <class FunctorType, class ReturnType> +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload taking the label as first argument instead!") +inline void parallel_scan(const size_t work_count, const FunctorType& functor, + ReturnType& return_value, const std::string& str) { + ::Kokkos::parallel_scan(str, work_count, functor, return_value); } +#endif } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index abd5c39bb65b302092696ab6f078ba1decaa0b41..9213383ac9736f01dc59c89d58154fa92fd5a9cf 100644 --- a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -42,38 +42,39 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_PARALLEL_REDUCE_HPP #define KOKKOS_PARALLEL_REDUCE_HPP #include <Kokkos_NumericTraits.hpp> #include <Kokkos_View.hpp> #include <impl/Kokkos_FunctorAnalysis.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_Tools_Generic.hpp> #include <type_traits> #include <iostream> namespace Kokkos { -template <class T, class Enable = void> -struct is_reducer_type { - enum { value = 0 }; -}; - +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 template <class T> -struct is_reducer_type< - T, typename std::enable_if<std::is_same< - typename std::remove_cv<T>::type, - typename std::remove_cv<typename T::reducer>::type>::value>::type> { - enum { value = 1 }; -}; +using is_reducer_type KOKKOS_DEPRECATED_WITH_COMMENT( + "Use Kokkos::is_reducer instead!") = Kokkos::is_reducer<T>; +#endif template <class Scalar, class Space> struct Sum { public: // Required using reducer = Sum<Scalar, Space>; - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -93,11 +94,6 @@ struct Sum { KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { dest += src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest += src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val = reduction_identity<value_type>::sum(); @@ -118,7 +114,7 @@ struct Prod { public: // Required using reducer = Prod<Scalar, Space>; - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -138,11 +134,6 @@ struct Prod { KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { dest *= src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest *= src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val = reduction_identity<value_type>::prod(); @@ -163,7 +154,7 @@ struct Min { public: // Required using reducer = Min<Scalar, Space>; - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -185,11 +176,6 @@ struct Min { if (src < dest) dest = src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (src < dest) dest = src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val = reduction_identity<value_type>::min(); @@ -210,7 +196,7 @@ struct Max { public: // Required using reducer = Max<Scalar, Space>; - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -232,11 +218,6 @@ struct Max { if (src > dest) dest = src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (src > dest) dest = src; - } - // Required KOKKOS_INLINE_FUNCTION void init(value_type& val) const { @@ -258,7 +239,7 @@ struct LAnd { public: // Required using reducer = LAnd<Scalar, Space>; - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -279,11 +260,6 @@ struct LAnd { dest = dest && src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest = dest && src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val = reduction_identity<value_type>::land(); @@ -304,7 +280,7 @@ struct LOr { public: // Required using reducer = LOr<Scalar, Space>; - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -326,11 +302,6 @@ struct LOr { dest = dest || src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest = dest || src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val = reduction_identity<value_type>::lor(); @@ -351,7 +322,7 @@ struct BAnd { public: // Required using reducer = BAnd<Scalar, Space>; - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -373,11 +344,6 @@ struct BAnd { dest = dest & src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest = dest & src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val = reduction_identity<value_type>::band(); @@ -398,7 +364,7 @@ struct BOr { public: // Required using reducer = BOr<Scalar, Space>; - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -420,11 +386,6 @@ struct BOr { dest = dest | src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest = dest | src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val = reduction_identity<value_type>::bor(); @@ -450,19 +411,13 @@ struct ValLocScalar { val = rhs.val; loc = rhs.loc; } - - KOKKOS_INLINE_FUNCTION - void operator=(const volatile ValLocScalar& rhs) volatile { - val = rhs.val; - loc = rhs.loc; - } }; template <class Scalar, class Index, class Space> struct MinLoc { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -489,11 +444,6 @@ struct MinLoc { if (src.val < dest.val) dest = src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (src.val < dest.val) dest = src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.val = reduction_identity<scalar_type>::min(); @@ -513,8 +463,8 @@ struct MinLoc { template <class Scalar, class Index, class Space> struct MaxLoc { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -541,11 +491,6 @@ struct MaxLoc { if (src.val > dest.val) dest = src; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (src.val > dest.val) dest = src; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.val = reduction_identity<scalar_type>::max(); @@ -571,18 +516,12 @@ struct MinMaxScalar { min_val = rhs.min_val; max_val = rhs.max_val; } - - KOKKOS_INLINE_FUNCTION - void operator=(const volatile MinMaxScalar& rhs) volatile { - min_val = rhs.min_val; - max_val = rhs.max_val; - } }; template <class Scalar, class Space> struct MinMax { private: - using scalar_type = typename std::remove_cv<Scalar>::type; + using scalar_type = std::remove_cv_t<Scalar>; public: // Required @@ -614,16 +553,6 @@ struct MinMax { } } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (src.min_val < dest.min_val) { - dest.min_val = src.min_val; - } - if (src.max_val > dest.max_val) { - dest.max_val = src.max_val; - } - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.max_val = reduction_identity<scalar_type>::max(); @@ -652,21 +581,13 @@ struct MinMaxLocScalar { max_val = rhs.max_val; max_loc = rhs.max_loc; } - - KOKKOS_INLINE_FUNCTION - void operator=(const volatile MinMaxLocScalar& rhs) volatile { - min_val = rhs.min_val; - min_loc = rhs.min_loc; - max_val = rhs.max_val; - max_loc = rhs.max_loc; - } }; template <class Scalar, class Index, class Space> struct MinMaxLoc { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -700,18 +621,6 @@ struct MinMaxLoc { } } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (src.min_val < dest.min_val) { - dest.min_val = src.min_val; - dest.min_loc = src.min_loc; - } - if (src.max_val > dest.max_val) { - dest.max_val = src.max_val; - dest.max_loc = src.max_loc; - } - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.max_val = reduction_identity<scalar_type>::max(); @@ -740,8 +649,8 @@ struct MinMaxLoc { template <class Scalar, class Index, class Space> struct MaxFirstLoc { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -772,15 +681,6 @@ struct MaxFirstLoc { } } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (dest.val < src.val) { - dest = src; - } else if (!(src.val < dest.val)) { - dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc; - } - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.val = reduction_identity<scalar_type>::max(); @@ -804,8 +704,8 @@ struct MaxFirstLoc { template <class Scalar, class Index, class ComparatorType, class Space> struct MaxFirstLocCustomComparator { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -840,15 +740,6 @@ struct MaxFirstLocCustomComparator { } } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (m_comp(dest.val, src.val)) { - dest = src; - } else if (!m_comp(src.val, dest.val)) { - dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc; - } - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.val = reduction_identity<scalar_type>::max(); @@ -871,8 +762,8 @@ struct MaxFirstLocCustomComparator { template <class Scalar, class Index, class Space> struct MinFirstLoc { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -903,15 +794,6 @@ struct MinFirstLoc { } } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (src.val < dest.val) { - dest = src; - } else if (!(dest.val < src.val)) { - dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc; - } - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.val = reduction_identity<scalar_type>::min(); @@ -935,8 +817,8 @@ struct MinFirstLoc { template <class Scalar, class Index, class ComparatorType, class Space> struct MinFirstLocCustomComparator { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -971,15 +853,6 @@ struct MinFirstLocCustomComparator { } } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (m_comp(src.val, dest.val)) { - dest = src; - } else if (!m_comp(dest.val, src.val)) { - dest.loc = (src.loc < dest.loc) ? src.loc : dest.loc; - } - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.val = reduction_identity<scalar_type>::min(); @@ -1002,8 +875,8 @@ struct MinFirstLocCustomComparator { template <class Scalar, class Index, class Space> struct MinMaxFirstLastLoc { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -1043,23 +916,6 @@ struct MinMaxFirstLastLoc { } } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (src.min_val < dest.min_val) { - dest.min_val = src.min_val; - dest.min_loc = src.min_loc; - } else if (!(dest.min_val < src.min_val)) { - dest.min_loc = (src.min_loc < dest.min_loc) ? src.min_loc : dest.min_loc; - } - - if (dest.max_val < src.max_val) { - dest.max_val = src.max_val; - dest.max_loc = src.max_loc; - } else if (!(src.max_val < dest.max_val)) { - dest.max_loc = (src.max_loc > dest.max_loc) ? src.max_loc : dest.max_loc; - } - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.max_val = ::Kokkos::reduction_identity<scalar_type>::max(); @@ -1085,8 +941,8 @@ struct MinMaxFirstLastLoc { template <class Scalar, class Index, class ComparatorType, class Space> struct MinMaxFirstLastLocCustomComparator { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -1129,23 +985,6 @@ struct MinMaxFirstLastLocCustomComparator { } } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - if (m_comp(src.min_val, dest.min_val)) { - dest.min_val = src.min_val; - dest.min_loc = src.min_loc; - } else if (!m_comp(dest.min_val, src.min_val)) { - dest.min_loc = (src.min_loc < dest.min_loc) ? src.min_loc : dest.min_loc; - } - - if (m_comp(dest.max_val, src.max_val)) { - dest.max_val = src.max_val; - dest.max_loc = src.max_loc; - } else if (!m_comp(src.max_val, dest.max_val)) { - dest.max_loc = (src.max_loc > dest.max_loc) ? src.max_loc : dest.max_loc; - } - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.max_val = ::Kokkos::reduction_identity<scalar_type>::max(); @@ -1173,17 +1012,12 @@ struct FirstLocScalar { KOKKOS_INLINE_FUNCTION void operator=(const FirstLocScalar& rhs) { min_loc_true = rhs.min_loc_true; } - - KOKKOS_INLINE_FUNCTION - void operator=(const volatile FirstLocScalar& rhs) volatile { - min_loc_true = rhs.min_loc_true; - } }; template <class Index, class Space> struct FirstLoc { private: - using index_type = typename std::remove_cv<Index>::type; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -1212,13 +1046,6 @@ struct FirstLoc { : dest.min_loc_true; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest.min_loc_true = (src.min_loc_true < dest.min_loc_true) - ? src.min_loc_true - : dest.min_loc_true; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.min_loc_true = ::Kokkos::reduction_identity<index_type>::min(); @@ -1243,17 +1070,12 @@ struct LastLocScalar { KOKKOS_INLINE_FUNCTION void operator=(const LastLocScalar& rhs) { max_loc_true = rhs.max_loc_true; } - - KOKKOS_INLINE_FUNCTION - void operator=(const volatile LastLocScalar& rhs) volatile { - max_loc_true = rhs.max_loc_true; - } }; template <class Index, class Space> struct LastLoc { private: - using index_type = typename std::remove_cv<Index>::type; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -1282,13 +1104,6 @@ struct LastLoc { : dest.max_loc_true; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest.max_loc_true = (src.max_loc_true > dest.max_loc_true) - ? src.max_loc_true - : dest.max_loc_true; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.max_loc_true = ::Kokkos::reduction_identity<index_type>::max(); @@ -1313,12 +1128,6 @@ struct StdIsPartScalar { min_loc_false = rhs.min_loc_false; max_loc_true = rhs.max_loc_true; } - - KOKKOS_INLINE_FUNCTION - void operator=(const volatile StdIsPartScalar& rhs) volatile { - min_loc_false = rhs.min_loc_false; - max_loc_true = rhs.max_loc_true; - } }; // @@ -1327,7 +1136,7 @@ struct StdIsPartScalar { template <class Index, class Space> struct StdIsPartitioned { private: - using index_type = typename std::remove_cv<Index>::type; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -1361,17 +1170,6 @@ struct StdIsPartitioned { : src.min_loc_false; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest.max_loc_true = (dest.max_loc_true < src.max_loc_true) - ? src.max_loc_true - : dest.max_loc_true; - - dest.min_loc_false = (dest.min_loc_false < src.min_loc_false) - ? dest.min_loc_false - : src.min_loc_false; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.max_loc_true = ::Kokkos::reduction_identity<index_type>::max(); @@ -1396,11 +1194,6 @@ struct StdPartPointScalar { void operator=(const StdPartPointScalar& rhs) { min_loc_false = rhs.min_loc_false; } - - KOKKOS_INLINE_FUNCTION - void operator=(const volatile StdPartPointScalar& rhs) volatile { - min_loc_false = rhs.min_loc_false; - } }; // @@ -1409,7 +1202,7 @@ struct StdPartPointScalar { template <class Index, class Space> struct StdPartitionPoint { private: - using index_type = typename std::remove_cv<Index>::type; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -1439,13 +1232,6 @@ struct StdPartitionPoint { : src.min_loc_false; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest.min_loc_false = (dest.min_loc_false < src.min_loc_false) - ? dest.min_loc_false - : src.min_loc_false; - } - KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.min_loc_false = ::Kokkos::reduction_identity<index_type>::min(); @@ -1470,8 +1256,8 @@ struct ParallelReduceReturnValue; template <class ReturnType, class FunctorType> struct ParallelReduceReturnValue< - typename std::enable_if<Kokkos::is_view<ReturnType>::value>::type, - ReturnType, FunctorType> { + std::enable_if_t<Kokkos::is_view<ReturnType>::value>, ReturnType, + FunctorType> { using return_type = ReturnType; using reducer_type = InvalidType; @@ -1488,10 +1274,10 @@ struct ParallelReduceReturnValue< template <class ReturnType, class FunctorType> struct ParallelReduceReturnValue< - typename std::enable_if<!Kokkos::is_view<ReturnType>::value && - (!std::is_array<ReturnType>::value && - !std::is_pointer<ReturnType>::value) && - !Kokkos::is_reducer_type<ReturnType>::value>::type, + std::enable_if_t<!Kokkos::is_view<ReturnType>::value && + (!std::is_array<ReturnType>::value && + !std::is_pointer<ReturnType>::value) && + !Kokkos::is_reducer<ReturnType>::value>, ReturnType, FunctorType> { using return_type = Kokkos::View<ReturnType, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>; @@ -1507,10 +1293,10 @@ struct ParallelReduceReturnValue< template <class ReturnType, class FunctorType> struct ParallelReduceReturnValue< - typename std::enable_if<(std::is_array<ReturnType>::value || - std::is_pointer<ReturnType>::value)>::type, + std::enable_if_t<(std::is_array<ReturnType>::value || + std::is_pointer<ReturnType>::value)>, ReturnType, FunctorType> { - using return_type = Kokkos::View<typename std::remove_const<ReturnType>::type, + using return_type = Kokkos::View<std::remove_const_t<ReturnType>, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>; using reducer_type = InvalidType; @@ -1528,8 +1314,8 @@ struct ParallelReduceReturnValue< template <class ReturnType, class FunctorType> struct ParallelReduceReturnValue< - typename std::enable_if<Kokkos::is_reducer_type<ReturnType>::value>::type, - ReturnType, FunctorType> { + std::enable_if_t<Kokkos::is_reducer<ReturnType>::value>, ReturnType, + FunctorType> { using return_type = ReturnType; using reducer_type = ReturnType; using value_type = typename return_type::value_type; @@ -1544,8 +1330,7 @@ struct ParallelReducePolicyType; template <class PolicyType, class FunctorType> struct ParallelReducePolicyType< - typename std::enable_if< - Kokkos::is_execution_policy<PolicyType>::value>::type, + std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value>, PolicyType, FunctorType> { using policy_type = PolicyType; static PolicyType policy(const PolicyType& policy_) { return policy_; } @@ -1553,8 +1338,8 @@ struct ParallelReducePolicyType< template <class PolicyType, class FunctorType> struct ParallelReducePolicyType< - typename std::enable_if<std::is_integral<PolicyType>::value>::type, - PolicyType, FunctorType> { + std::enable_if_t<std::is_integral<PolicyType>::value>, PolicyType, + FunctorType> { using execution_space = typename Impl::FunctorPolicyExecutionSpace<FunctorType, void>::execution_space; @@ -1619,7 +1404,7 @@ struct ParallelReduceAdaptor { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 template <typename Dummy = ReturnType> KOKKOS_DEPRECATED_WITH_COMMENT( - "Array reductions with a raw pointer return type a deprecated. Use a " + "Array reductions with a raw pointer return type are deprecated. Use a " "Kokkos::View as return argument!") static inline std:: enable_if_t<is_array_reduction && std::is_pointer<Dummy>::value> execute( @@ -1720,8 +1505,8 @@ struct ParallelReduceFence { * using value_type = <podType>; * void operator()( <intType> iwork , <podType> & update ) const ; * void init( <podType> & update ) const ; - * void join( volatile <podType> & update , - * volatile const <podType> & input ) const ; + * void join( <podType> & update , + * const <podType> & input ) const ; * * void final( <podType> & update ) const ; * }; @@ -1736,8 +1521,8 @@ struct ParallelReduceFence { * using value_type = <podType>[]; * void operator()( <intType> , <podType> update[] ) const ; * void init( <podType> update[] ) const ; - * void join( volatile <podType> update[] , - * volatile const <podType> input[] ) const ; + * void join( <podType> update[] , + * const <podType> input[] ) const ; * * void final( <podType> update[] ) const ; * }; @@ -1915,16 +1700,17 @@ template <class PolicyType, class FunctorType> inline void parallel_reduce( const std::string& label, const PolicyType& policy, const FunctorType& functor, - typename std::enable_if< - Kokkos::is_execution_policy<PolicyType>::value>::type* = nullptr) { - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = std::conditional_t<(ValueTraits::StaticValueSize != 0), - typename ValueTraits::value_type, - typename ValueTraits::pointer_type>; + std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value>* = + nullptr) { + using FunctorAnalysis = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, + FunctorType>; + using value_type = std::conditional_t<(FunctorAnalysis::StaticValueSize != 0), + typename FunctorAnalysis::value_type, + typename FunctorAnalysis::pointer_type>; static_assert( - Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, - FunctorType>::has_final_member_function, + FunctorAnalysis::has_final_member_function, "Calling parallel_reduce without either return value or final function."); using result_view_type = @@ -1939,16 +1725,17 @@ inline void parallel_reduce( template <class PolicyType, class FunctorType> inline void parallel_reduce( const PolicyType& policy, const FunctorType& functor, - typename std::enable_if< - Kokkos::is_execution_policy<PolicyType>::value>::type* = nullptr) { - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = std::conditional_t<(ValueTraits::StaticValueSize != 0), - typename ValueTraits::value_type, - typename ValueTraits::pointer_type>; + std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value>* = + nullptr) { + using FunctorAnalysis = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, + FunctorType>; + using value_type = std::conditional_t<(FunctorAnalysis::StaticValueSize != 0), + typename FunctorAnalysis::value_type, + typename FunctorAnalysis::pointer_type>; static_assert( - Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, - FunctorType>::has_final_member_function, + FunctorAnalysis::has_final_member_function, "Calling parallel_reduce without either return value or final function."); using result_view_type = @@ -1965,15 +1752,15 @@ inline void parallel_reduce(const size_t& policy, const FunctorType& functor) { using policy_type = typename Impl::ParallelReducePolicyType<void, size_t, FunctorType>::policy_type; - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = std::conditional_t<(ValueTraits::StaticValueSize != 0), - typename ValueTraits::value_type, - typename ValueTraits::pointer_type>; + using FunctorAnalysis = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, policy_type, + FunctorType>; + using value_type = std::conditional_t<(FunctorAnalysis::StaticValueSize != 0), + typename FunctorAnalysis::value_type, + typename FunctorAnalysis::pointer_type>; static_assert( - Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, - RangePolicy<>, - FunctorType>::has_final_member_function, + FunctorAnalysis::has_final_member_function, "Calling parallel_reduce without either return value or final function."); using result_view_type = @@ -1992,15 +1779,15 @@ inline void parallel_reduce(const std::string& label, const size_t& policy, using policy_type = typename Impl::ParallelReducePolicyType<void, size_t, FunctorType>::policy_type; - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = std::conditional_t<(ValueTraits::StaticValueSize != 0), - typename ValueTraits::value_type, - typename ValueTraits::pointer_type>; + using FunctorAnalysis = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, policy_type, + FunctorType>; + using value_type = std::conditional_t<(FunctorAnalysis::StaticValueSize != 0), + typename FunctorAnalysis::value_type, + typename FunctorAnalysis::pointer_type>; static_assert( - Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, - RangePolicy<>, - FunctorType>::has_final_member_function, + FunctorAnalysis::has_final_member_function, "Calling parallel_reduce without either return value or final function."); using result_view_type = diff --git a/packages/kokkos/core/src/Kokkos_PointerOwnership.hpp b/packages/kokkos/core/src/Kokkos_PointerOwnership.hpp index f1f168c38fea159835b34c1c25e0479f653cc76a..41b18a8d1441dcc0b7f8237c91c1420039a8526a 100644 --- a/packages/kokkos/core/src/Kokkos_PointerOwnership.hpp +++ b/packages/kokkos/core/src/Kokkos_PointerOwnership.hpp @@ -44,6 +44,15 @@ // Experimental unified task-data parallel manycore LDRD +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_IMPL_POINTEROWNERSHIP_HPP #define KOKKOS_IMPL_POINTEROWNERSHIP_HPP diff --git a/packages/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp b/packages/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp index 4556cddbabd748fdad211317822689749ff9defe..266605c0feca189bf6abbd5a2bc3078dd345cec2 100644 --- a/packages/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp +++ b/packages/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOSP_PROFILE_SECTION_HPP #define KOKKOSP_PROFILE_SECTION_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_PROFILING_PROFILESECTION +#endif #include <Kokkos_Macros.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> @@ -103,4 +107,8 @@ class ProfilingSection { } // namespace Profiling } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_CORE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PROFILING_PROFILESECTION +#endif #endif diff --git a/packages/kokkos/core/src/Kokkos_Rank.hpp b/packages/kokkos/core/src/Kokkos_Rank.hpp index 3603e2860891758e489471683d34438f219f84d5..025cf511fe90f4d21616449b226e4ef634ee9fe1 100644 --- a/packages/kokkos/core/src/Kokkos_Rank.hpp +++ b/packages/kokkos/core/src/Kokkos_Rank.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_KOKKOS_RANK_HPP #define KOKKOS_KOKKOS_RANK_HPP diff --git a/packages/kokkos/core/src/Kokkos_SYCL.hpp b/packages/kokkos/core/src/Kokkos_SYCL.hpp index e29093db32953b09e5b98541ec3078a46313a885..a7f169606f087628260e502ccc5eebbaf67c9501 100644 --- a/packages/kokkos/core/src/Kokkos_SYCL.hpp +++ b/packages/kokkos/core/src/Kokkos_SYCL.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_SYCL_HPP #define KOKKOS_SYCL_HPP @@ -52,9 +61,9 @@ #include <Kokkos_SYCL_Space.hpp> #include <Kokkos_Layout.hpp> #include <Kokkos_ScratchSpace.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> #include <impl/Kokkos_HostSharedPtr.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> namespace Kokkos { namespace Experimental { @@ -87,9 +96,9 @@ class SYCL { return m_space_instance->impl_get_instance_id(); } - sycl::context sycl_context() const noexcept { - return m_space_instance->m_queue->get_context(); - }; + sycl::queue& sycl_queue() const noexcept { + return *m_space_instance->m_queue; + } //@} //------------------------------------ @@ -111,38 +120,19 @@ class SYCL { static bool wake(); /** \brief Wait until all dispatched functors complete. A noop for OpenMP. */ - static void impl_static_fence(); - static void impl_static_fence(const std::string&); - void fence() const; - void fence(const std::string&) const; + static void impl_static_fence(const std::string& name); + + void fence( + const std::string& name = + "Kokkos::Experimental::SYCL::fence: Unnamed Instance Fence") const; /// \brief Print configuration information to the given output stream. - void print_configuration(std::ostream&, const bool detail = false); + void print_configuration(std::ostream& os, bool verbose = false) const; /// \brief Free any resources being consumed by the device. static void impl_finalize(); - /** \brief Initialize the device. - * - */ - - struct SYCLDevice { - SYCLDevice() : SYCLDevice(sycl::default_selector()) {} - explicit SYCLDevice(sycl::device d); - explicit SYCLDevice(const sycl::device_selector& selector); - explicit SYCLDevice(size_t id); - - sycl::device get_device() const; - - friend std::ostream& operator<<(std::ostream& os, const SYCLDevice& that) { - return SYCL::impl_sycl_info(os, that.m_device); - } - - private: - sycl::device m_device; - }; - - static void impl_initialize(SYCLDevice = SYCLDevice()); + static void impl_initialize(InitializationSettings const&); int sycl_device() const; @@ -162,18 +152,6 @@ class SYCL { Kokkos::Impl::HostSharedPtr<Impl::SYCLInternal> m_space_instance; }; -namespace Impl { - -class SYCLSpaceInitializer : public Kokkos::Impl::ExecSpaceInitializerBase { - public: - void initialize(const InitArguments& args) final; - void finalize(const bool) final; - void fence() final; - void fence(const std::string&) final; - void print_configuration(std::ostream& msg, const bool detail) final; -}; - -} // namespace Impl } // namespace Experimental namespace Tools { @@ -198,12 +176,13 @@ std::vector<SYCL> partition_space(const SYCL& sycl_space, Args...) { "Kokkos Error: partitioning arguments must be integers or floats"); #endif - sycl::context context = sycl_space.sycl_context(); - sycl::default_selector device_selector; + sycl::context context = sycl_space.sycl_queue().get_context(); + sycl::device device = + sycl_space.impl_internal_space_instance()->m_queue->get_device(); std::vector<SYCL> instances; instances.reserve(sizeof...(Args)); for (unsigned int i = 0; i < sizeof...(Args); ++i) - instances.emplace_back(sycl::queue(context, device_selector)); + instances.emplace_back(sycl::queue(context, device)); return instances; } @@ -214,12 +193,13 @@ std::vector<SYCL> partition_space(const SYCL& sycl_space, std::is_arithmetic<T>::value, "Kokkos Error: partitioning arguments must be integers or floats"); - sycl::context context = sycl_space.sycl_context(); - sycl::default_selector device_selector; + sycl::context context = sycl_space.sycl_queue().get_context(); + sycl::device device = + sycl_space.impl_internal_space_instance()->m_queue->get_device(); std::vector<SYCL> instances; instances.reserve(weights.size()); for (unsigned int i = 0; i < weights.size(); ++i) - instances.emplace_back(sycl::queue(context, device_selector)); + instances.emplace_back(sycl::queue(context, device)); return instances; } } // namespace Experimental diff --git a/packages/kokkos/core/src/Kokkos_SYCL_Space.hpp b/packages/kokkos/core/src/Kokkos_SYCL_Space.hpp index 15ef11024d53501356d8004c58fc94fbeca80227..e147d04dc86c4415276e192ece1eda20c01110c0 100644 --- a/packages/kokkos/core/src/Kokkos_SYCL_Space.hpp +++ b/packages/kokkos/core/src/Kokkos_SYCL_Space.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_SYCLSPACE_HPP #define KOKKOS_SYCLSPACE_HPP @@ -74,6 +83,11 @@ class SYCLDeviceUSMSpace { SYCLDeviceUSMSpace(); explicit SYCLDeviceUSMSpace(sycl::queue queue); + void* allocate(const SYCL& exec_space, + const std::size_t arg_alloc_size) const; + void* allocate(const SYCL& exec_space, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; void* allocate(const std::size_t arg_alloc_size) const; void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; @@ -105,6 +119,11 @@ class SYCLSharedUSMSpace { SYCLSharedUSMSpace(); explicit SYCLSharedUSMSpace(sycl::queue queue); + void* allocate(const SYCL& exec_space, + const std::size_t arg_alloc_size) const; + void* allocate(const SYCL& exec_space, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; void* allocate(const std::size_t arg_alloc_size) const; void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; @@ -136,6 +155,11 @@ class SYCLHostUSMSpace { SYCLHostUSMSpace(); explicit SYCLHostUSMSpace(sycl::queue queue); + void* allocate(const SYCL& exec_space, + const std::size_t arg_alloc_size) const; + void* allocate(const SYCL& exec_space, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; void* allocate(const std::size_t arg_alloc_size) const; void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; @@ -347,6 +371,21 @@ class SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void> protected: ~SharedAllocationRecord(); + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, + const Kokkos::Experimental::SYCLDeviceUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + + SharedAllocationRecord( + const Kokkos::Experimental::SYCL& exec_space, + const Kokkos::Experimental::SYCLDeviceUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate); + SharedAllocationRecord( const Kokkos::Experimental::SYCLDeviceUSMSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, @@ -378,6 +417,21 @@ class SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, void> SharedAllocationRecord() = default; + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, + const Kokkos::Experimental::SYCLSharedUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + + SharedAllocationRecord( + const Kokkos::Experimental::SYCL& exec_space, + const Kokkos::Experimental::SYCLSharedUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate); + SharedAllocationRecord( const Kokkos::Experimental::SYCLSharedUSMSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, @@ -409,6 +463,21 @@ class SharedAllocationRecord<Kokkos::Experimental::SYCLHostUSMSpace, void> SharedAllocationRecord() = default; + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, + const Kokkos::Experimental::SYCLHostUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + + SharedAllocationRecord( + const Kokkos::Experimental::SYCL& exec_space, + const Kokkos::Experimental::SYCLHostUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate); + SharedAllocationRecord( const Kokkos::Experimental::SYCLHostUSMSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, diff --git a/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp b/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp index bb740cfb86a966aefd4ac1ab6c9233ab81e0a97d..3e37eb61dcdc59a2ae2556aae0ce04128d0c4e04 100644 --- a/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_SCRATCHSPACE_HPP #define KOKKOS_SCRATCHSPACE_HPP diff --git a/packages/kokkos/core/src/Kokkos_Serial.hpp b/packages/kokkos/core/src/Kokkos_Serial.hpp index b2e524c3744556d2797d40bf0ad9becefc69a904..ffdd1e9fc840595bd73c272ac592a19940f1a000 100644 --- a/packages/kokkos/core/src/Kokkos_Serial.hpp +++ b/packages/kokkos/core/src/Kokkos_Serial.hpp @@ -45,6 +45,15 @@ /// \file Kokkos_Serial.hpp /// \brief Declaration and definition of Kokkos::Serial device. +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_SERIAL_HPP #define KOKKOS_SERIAL_HPP @@ -56,7 +65,6 @@ #include <mutex> #include <thread> #include <Kokkos_Core_fwd.hpp> -#include <Kokkos_Parallel.hpp> #include <Kokkos_TaskScheduler.hpp> #include <Kokkos_Layout.hpp> #include <Kokkos_HostSpace.hpp> @@ -64,14 +72,9 @@ #include <Kokkos_MemoryTraits.hpp> #include <impl/Kokkos_HostThreadTeam.hpp> #include <impl/Kokkos_FunctorAnalysis.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_Tools.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> #include <impl/Kokkos_HostSharedPtr.hpp> - -#include <KokkosExp_MDRangePolicy.hpp> - -#include <Kokkos_UniqueToken.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> namespace Kokkos { @@ -151,10 +154,6 @@ class Serial { /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. - static void impl_static_fence() { - impl_static_fence( - "Kokkos::Serial::impl_static_fence: Unnamed Static Fence"); - } static void impl_static_fence(const std::string& name) { Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::Serial>( name, @@ -164,8 +163,8 @@ class Serial { Kokkos::memory_fence(); } - void fence() const { fence("Kokkos::Serial::fence: Unnamed Instance Fence"); } - void fence(const std::string& name) const { + void fence(const std::string& name = + "Kokkos::Serial::fence: Unnamed Instance Fence") const { Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::Serial>( name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, []() {}); // TODO: correct device ID @@ -176,10 +175,9 @@ class Serial { static int concurrency() { return 1; } //! Print configuration information to the given output stream. - static void print_configuration(std::ostream&, - const bool /* detail */ = false) {} + void print_configuration(std::ostream& os, bool verbose = false) const; - static void impl_initialize(); + static void impl_initialize(InitializationSettings const&); static bool impl_is_initialized(); @@ -230,21 +228,6 @@ struct DeviceTypeTraits<Serial> { }; } // namespace Experimental } // namespace Tools - -namespace Impl { - -class SerialSpaceInitializer : public ExecSpaceInitializerBase { - public: - SerialSpaceInitializer() = default; - ~SerialSpaceInitializer() = default; - void initialize(const InitArguments& args) final; - void finalize(const bool) final; - void fence() final; - void fence(const std::string&) final; - void print_configuration(std::ostream& msg, const bool detail) final; -}; - -} // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ @@ -253,6 +236,23 @@ class SerialSpaceInitializer : public ExecSpaceInitializerBase { namespace Kokkos { namespace Impl { +// We only need to provide a specialization for Serial if there is a host +// parallel execution space since the specialization for +// DefaultHostExecutionSpace is defined elsewhere. +struct DummyExecutionSpace; +template <class DT, class... DP> +struct ZeroMemset< + std::conditional_t<!std::is_same<Serial, DefaultHostExecutionSpace>::value, + Serial, DummyExecutionSpace>, + DT, DP...> : public ZeroMemset<DefaultHostExecutionSpace, DT, DP...> { + using Base = ZeroMemset<DefaultHostExecutionSpace, DT, DP...>; + using Base::Base; + + ZeroMemset(const Serial&, const View<DT, DP...>& dst, + typename View<DT, DP...>::const_value_type& value) + : Base(dst, value) {} +}; + template <> struct MemorySpaceAccess<Kokkos::Serial::memory_space, Kokkos::Serial::scratch_memory_space> { @@ -264,923 +264,11 @@ struct MemorySpaceAccess<Kokkos::Serial::memory_space, } // namespace Impl } // namespace Kokkos -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -/* - * < Kokkos::Serial , WorkArgTag > - * < WorkArgTag , Impl::enable_if< std::is_same< Kokkos::Serial , - * Kokkos::DefaultExecutionSpace >::value >::type > - * - */ -template <class... Properties> -class TeamPolicyInternal<Kokkos::Serial, Properties...> - : public PolicyTraits<Properties...> { - private: - size_t m_team_scratch_size[2]; - size_t m_thread_scratch_size[2]; - int m_league_size; - int m_chunk_size; - - public: - //! Tag this class as a kokkos execution policy - using execution_policy = TeamPolicyInternal; - - using traits = PolicyTraits<Properties...>; - - //! Execution space of this execution policy: - using execution_space = Kokkos::Serial; - - const typename traits::execution_space& space() const { - static typename traits::execution_space m_space; - return m_space; - } - - template <class ExecSpace, class... OtherProperties> - friend class TeamPolicyInternal; - - template <class... OtherProperties> - TeamPolicyInternal( - const TeamPolicyInternal<Kokkos::Serial, OtherProperties...>& p) { - m_league_size = p.m_league_size; - m_team_scratch_size[0] = p.m_team_scratch_size[0]; - m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; - m_team_scratch_size[1] = p.m_team_scratch_size[1]; - m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; - m_chunk_size = p.m_chunk_size; - } - - //---------------------------------------- - - template <class FunctorType> - int team_size_max(const FunctorType&, const ParallelForTag&) const { - return 1; - } - template <class FunctorType> - int team_size_max(const FunctorType&, const ParallelReduceTag&) const { - return 1; - } - template <class FunctorType, class ReducerType> - int team_size_max(const FunctorType&, const ReducerType&, - const ParallelReduceTag&) const { - return 1; - } - template <class FunctorType> - int team_size_recommended(const FunctorType&, const ParallelForTag&) const { - return 1; - } - template <class FunctorType> - int team_size_recommended(const FunctorType&, - const ParallelReduceTag&) const { - return 1; - } - template <class FunctorType, class ReducerType> - int team_size_recommended(const FunctorType&, const ReducerType&, - const ParallelReduceTag&) const { - return 1; - } - - //---------------------------------------- - - inline int team_size() const { return 1; } - inline bool impl_auto_team_size() const { return false; } - inline bool impl_auto_vector_length() const { return false; } - inline void impl_set_team_size(size_t) {} - inline void impl_set_vector_length(size_t) {} - inline int league_size() const { return m_league_size; } - inline size_t scratch_size(const int& level, int = 0) const { - return m_team_scratch_size[level] + m_thread_scratch_size[level]; - } - - inline int impl_vector_length() const { return 1; } - inline static int vector_length_max() { - return 1024; - } // Use arbitrary large number, is meant as a vectorizable length - - inline static int scratch_size_max(int level) { - return (level == 0 ? 1024 * 32 : 20 * 1024 * 1024); - } - /** \brief Specify league size, request team size */ - TeamPolicyInternal(const execution_space&, int league_size_request, - int team_size_request, int /* vector_length_request */ = 1) - : m_team_scratch_size{0, 0}, - m_thread_scratch_size{0, 0}, - m_league_size(league_size_request), - m_chunk_size(32) { - if (team_size_request > 1) - Kokkos::abort("Kokkos::abort: Requested Team Size is too large!"); - } - - TeamPolicyInternal(const execution_space& space, int league_size_request, - const Kokkos::AUTO_t& /**team_size_request*/, - int vector_length_request = 1) - : TeamPolicyInternal(space, league_size_request, -1, - vector_length_request) {} - - TeamPolicyInternal(const execution_space& space, int league_size_request, - const Kokkos::AUTO_t& /* team_size_request */ - , - const Kokkos::AUTO_t& /* vector_length_request */ - ) - : TeamPolicyInternal(space, league_size_request, -1, -1) {} - - TeamPolicyInternal(const execution_space& space, int league_size_request, - int team_size_request, - const Kokkos::AUTO_t& /* vector_length_request */ - ) - : TeamPolicyInternal(space, league_size_request, team_size_request, -1) {} - - TeamPolicyInternal(int league_size_request, - const Kokkos::AUTO_t& team_size_request, - int vector_length_request = 1) - : TeamPolicyInternal(typename traits::execution_space(), - league_size_request, team_size_request, - vector_length_request) {} - - TeamPolicyInternal(int league_size_request, - const Kokkos::AUTO_t& team_size_request, - const Kokkos::AUTO_t& vector_length_request) - : TeamPolicyInternal(typename traits::execution_space(), - league_size_request, team_size_request, - vector_length_request) {} - TeamPolicyInternal(int league_size_request, int team_size_request, - const Kokkos::AUTO_t& vector_length_request) - : TeamPolicyInternal(typename traits::execution_space(), - league_size_request, team_size_request, - vector_length_request) {} - - TeamPolicyInternal(int league_size_request, int team_size_request, - int vector_length_request = 1) - : TeamPolicyInternal(typename traits::execution_space(), - league_size_request, team_size_request, - vector_length_request) {} - - inline int chunk_size() const { return m_chunk_size; } - - /** \brief set chunk_size to a discrete value*/ - inline TeamPolicyInternal& set_chunk_size( - typename traits::index_type chunk_size_) { - m_chunk_size = chunk_size_; - return *this; - } - - /** \brief set per team scratch size for a specific level of the scratch - * hierarchy */ - inline TeamPolicyInternal& set_scratch_size(const int& level, - const PerTeamValue& per_team) { - m_team_scratch_size[level] = per_team.value; - return *this; - } - - /** \brief set per thread scratch size for a specific level of the scratch - * hierarchy */ - inline TeamPolicyInternal& set_scratch_size( - const int& level, const PerThreadValue& per_thread) { - m_thread_scratch_size[level] = per_thread.value; - return *this; - } - - /** \brief set per thread and per team scratch size for a specific level of - * the scratch hierarchy */ - inline TeamPolicyInternal& set_scratch_size( - const int& level, const PerTeamValue& per_team, - const PerThreadValue& per_thread) { - m_team_scratch_size[level] = per_team.value; - m_thread_scratch_size[level] = per_thread.value; - return *this; - } - - using member_type = Impl::HostThreadTeamMember<Kokkos::Serial>; -}; -} /* namespace Impl */ -} /* namespace Kokkos */ - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ -/* Parallel patterns for Kokkos::Serial with RangePolicy */ - -namespace Kokkos { -namespace Impl { - -template <class FunctorType, class... Traits> -class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Serial> { - private: - using Policy = Kokkos::RangePolicy<Traits...>; - - const FunctorType m_functor; - const Policy m_policy; - - template <class TagType> - typename std::enable_if<std::is_same<TagType, void>::value>::type exec() - const { - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - m_functor(i); - } - } - - template <class TagType> - typename std::enable_if<!std::is_same<TagType, void>::value>::type exec() - const { - const TagType t{}; - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - m_functor(t, i); - } - } - - public: - inline void execute() const { - this->template exec<typename Policy::work_tag>(); - } - - inline ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} -}; - -/*--------------------------------------------------------------------------*/ - -template <class FunctorType, class ReducerType, class... Traits> -class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, - Kokkos::Serial> { - private: - using Policy = Kokkos::RangePolicy<Traits...>; - using WorkTag = typename Policy::work_tag; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, - void>; - - using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; - - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename Analysis::pointer_type; - using reference_type = typename Analysis::reference_type; - - const FunctorType m_functor; - const Policy m_policy; - const ReducerType m_reducer; - const pointer_type m_result_ptr; - - template <class TagType> - inline typename std::enable_if<std::is_same<TagType, void>::value>::type exec( - reference_type update) const { - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - m_functor(i, update); - } - } - - template <class TagType> - inline typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec(reference_type update) const { - const TagType t{}; - - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - m_functor(t, i, update); - } - } - - public: - inline void execute() const { - const size_t pool_reduce_size = - Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); - const size_t team_reduce_size = 0; // Never shrinks - const size_t team_shared_size = 0; // Never shrinks - const size_t thread_local_size = 0; // Never shrinks - - auto* internal_instance = m_policy.space().impl_internal_space_instance(); - // Need to lock resize_thread_team_data - std::lock_guard<std::mutex> lock( - internal_instance->m_thread_team_data_mutex); - internal_instance->resize_thread_team_data( - pool_reduce_size, team_reduce_size, team_shared_size, - thread_local_size); - - pointer_type ptr = - m_result_ptr - ? m_result_ptr - : pointer_type( - internal_instance->m_thread_team_data.pool_reduce_local()); - - reference_type update = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), ptr); - - this->template exec<WorkTag>(update); - - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), ptr); - } - - template <class HostViewType> - ParallelReduce( - const FunctorType& arg_functor, const Policy& arg_policy, - const HostViewType& arg_result_view, - typename std::enable_if<Kokkos::is_view<HostViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = nullptr) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(InvalidType()), - m_result_ptr(arg_result_view.data()) { - static_assert(Kokkos::is_view<HostViewType>::value, - "Kokkos::Serial reduce result must be a View"); - - static_assert( - Kokkos::Impl::MemorySpaceAccess<typename HostViewType::memory_space, - Kokkos::HostSpace>::accessible, - "Kokkos::Serial reduce result must be a View in HostSpace"); - } - - inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, - const ReducerType& reducer) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(reducer), - m_result_ptr(reducer.view().data()) { - /*static_assert( std::is_same< typename ViewType::memory_space - , Kokkos::HostSpace >::value - , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" - );*/ - } -}; - -/*--------------------------------------------------------------------------*/ - -template <class FunctorType, class... Traits> -class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, - Kokkos::Serial> { - private: - using Policy = Kokkos::RangePolicy<Traits...>; - using WorkTag = typename Policy::work_tag; - - using Analysis = - FunctorAnalysis<FunctorPatternInterface::SCAN, Policy, FunctorType>; - - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - - using pointer_type = typename Analysis::pointer_type; - using reference_type = typename Analysis::reference_type; - - const FunctorType m_functor; - const Policy m_policy; - - template <class TagType> - inline typename std::enable_if<std::is_same<TagType, void>::value>::type exec( - reference_type update) const { - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - m_functor(i, update, true); - } - } - - template <class TagType> - inline typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec(reference_type update) const { - const TagType t{}; - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - m_functor(t, i, update, true); - } - } - - public: - inline void execute() const { - const size_t pool_reduce_size = Analysis::value_size(m_functor); - const size_t team_reduce_size = 0; // Never shrinks - const size_t team_shared_size = 0; // Never shrinks - const size_t thread_local_size = 0; // Never shrinks - - // Need to lock resize_thread_team_data - auto* internal_instance = m_policy.space().impl_internal_space_instance(); - std::lock_guard<std::mutex> lock( - internal_instance->m_thread_team_data_mutex); - internal_instance->resize_thread_team_data( - pool_reduce_size, team_reduce_size, team_shared_size, - thread_local_size); - - reference_type update = ValueInit::init( - m_functor, - pointer_type( - internal_instance->m_thread_team_data.pool_reduce_local())); - - this->template exec<WorkTag>(update); - } - - inline ParallelScan(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} -}; - -/*--------------------------------------------------------------------------*/ -template <class FunctorType, class ReturnType, class... Traits> -class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, - ReturnType, Kokkos::Serial> { - private: - using Policy = Kokkos::RangePolicy<Traits...>; - using WorkTag = typename Policy::work_tag; - - using Analysis = - FunctorAnalysis<FunctorPatternInterface::SCAN, Policy, FunctorType>; - - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - - using pointer_type = typename Analysis::pointer_type; - using reference_type = typename Analysis::reference_type; - - const FunctorType m_functor; - const Policy m_policy; - ReturnType& m_returnvalue; - - template <class TagType> - inline typename std::enable_if<std::is_same<TagType, void>::value>::type exec( - reference_type update) const { - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - m_functor(i, update, true); - } - } - - template <class TagType> - inline typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec(reference_type update) const { - const TagType t{}; - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - m_functor(t, i, update, true); - } - } - - public: - inline void execute() { - const size_t pool_reduce_size = Analysis::value_size(m_functor); - const size_t team_reduce_size = 0; // Never shrinks - const size_t team_shared_size = 0; // Never shrinks - const size_t thread_local_size = 0; // Never shrinks - - // Need to lock resize_thread_team_data - auto* internal_instance = m_policy.space().impl_internal_space_instance(); - std::lock_guard<std::mutex> lock( - internal_instance->m_thread_team_data_mutex); - internal_instance->resize_thread_team_data( - pool_reduce_size, team_reduce_size, team_shared_size, - thread_local_size); - - reference_type update = ValueInit::init( - m_functor, - pointer_type( - internal_instance->m_thread_team_data.pool_reduce_local())); - - this->template exec<WorkTag>(update); - - m_returnvalue = update; - } - - inline ParallelScanWithTotal(const FunctorType& arg_functor, - const Policy& arg_policy, - ReturnType& arg_returnvalue) - : m_functor(arg_functor), - m_policy(arg_policy), - m_returnvalue(arg_returnvalue) {} -}; - -} // namespace Impl -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ -/* Parallel patterns for Kokkos::Serial with MDRangePolicy */ - -namespace Kokkos { -namespace Impl { - -template <class FunctorType, class... Traits> -class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, - Kokkos::Serial> { - private: - using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; - using Policy = typename MDRangePolicy::impl_range_policy; - - using iterate_type = typename Kokkos::Impl::HostIterateTile< - MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void>; - - const FunctorType m_functor; - const MDRangePolicy m_mdr_policy; - const Policy m_policy; - - void exec() const { - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - iterate_type(m_mdr_policy, m_functor)(i); - } - } - - public: - inline void execute() const { this->exec(); } - template <typename Policy, typename Functor> - static int max_tile_size_product(const Policy&, const Functor&) { - /** - * 1024 here is just our guess for a reasonable max tile size, - * it isn't a hardware constraint. If people see a use for larger - * tile size products, we're happy to change this. - */ - return 1024; - } - inline ParallelFor(const FunctorType& arg_functor, - const MDRangePolicy& arg_policy) - : m_functor(arg_functor), - m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} -}; - -template <class FunctorType, class ReducerType, class... Traits> -class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, - Kokkos::Serial> { - private: - using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; - using Policy = typename MDRangePolicy::impl_range_policy; - - using WorkTag = typename MDRangePolicy::work_tag; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, - void>; - - using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, - MDRangePolicy, FunctorType>; - - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename Analysis::pointer_type; - using value_type = typename Analysis::value_type; - using reference_type = typename Analysis::reference_type; - - using iterate_type = - typename Kokkos::Impl::HostIterateTile<MDRangePolicy, FunctorType, - WorkTag, reference_type>; - - const FunctorType m_functor; - const MDRangePolicy m_mdr_policy; - const Policy m_policy; - const ReducerType m_reducer; - const pointer_type m_result_ptr; - - inline void exec(reference_type update) const { - const typename Policy::member_type e = m_policy.end(); - for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { - iterate_type(m_mdr_policy, m_functor, update)(i); - } - } - - public: - template <typename Policy, typename Functor> - static int max_tile_size_product(const Policy&, const Functor&) { - /** - * 1024 here is just our guess for a reasonable max tile size, - * it isn't a hardware constraint. If people see a use for larger - * tile size products, we're happy to change this. - */ - return 1024; - } - inline void execute() const { - const size_t pool_reduce_size = - Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); - const size_t team_reduce_size = 0; // Never shrinks - const size_t team_shared_size = 0; // Never shrinks - const size_t thread_local_size = 0; // Never shrinks - - auto* internal_instance = m_policy.space().impl_internal_space_instance(); - // Need to lock resize_thread_team_data - std::lock_guard<std::mutex> lock( - internal_instance->m_thread_team_data_mutex); - internal_instance->resize_thread_team_data( - pool_reduce_size, team_reduce_size, team_shared_size, - thread_local_size); - - pointer_type ptr = - m_result_ptr - ? m_result_ptr - : pointer_type( - internal_instance->m_thread_team_data.pool_reduce_local()); - - reference_type update = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), ptr); - - this->exec(update); - - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), ptr); - } - - template <class HostViewType> - ParallelReduce( - const FunctorType& arg_functor, const MDRangePolicy& arg_policy, - const HostViewType& arg_result_view, - typename std::enable_if<Kokkos::is_view<HostViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = nullptr) - : m_functor(arg_functor), - m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), - m_reducer(InvalidType()), - m_result_ptr(arg_result_view.data()) { - static_assert(Kokkos::is_view<HostViewType>::value, - "Kokkos::Serial reduce result must be a View"); - - static_assert( - Kokkos::Impl::MemorySpaceAccess<typename HostViewType::memory_space, - Kokkos::HostSpace>::accessible, - "Kokkos::Serial reduce result must be a View in HostSpace"); - } - - inline ParallelReduce(const FunctorType& arg_functor, - MDRangePolicy arg_policy, const ReducerType& reducer) - : m_functor(arg_functor), - m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), - m_reducer(reducer), - m_result_ptr(reducer.view().data()) { - /*static_assert( std::is_same< typename ViewType::memory_space - , Kokkos::HostSpace >::value - , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" - );*/ - } -}; - -} // namespace Impl -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ -/* Parallel patterns for Kokkos::Serial with TeamPolicy */ - -namespace Kokkos { -namespace Impl { - -template <class FunctorType, class... Properties> -class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, - Kokkos::Serial> { - private: - enum { TEAM_REDUCE_SIZE = 512 }; - - using Policy = TeamPolicyInternal<Kokkos::Serial, Properties...>; - using Member = typename Policy::member_type; - - const FunctorType m_functor; - const Policy m_policy; - const int m_league; - const int m_shared; - - template <class TagType> - inline typename std::enable_if<std::is_same<TagType, void>::value>::type exec( - HostThreadTeamData& data) const { - for (int ileague = 0; ileague < m_league; ++ileague) { - m_functor(Member(data, ileague, m_league)); - } - } - - template <class TagType> - inline typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec(HostThreadTeamData& data) const { - const TagType t{}; - for (int ileague = 0; ileague < m_league; ++ileague) { - m_functor(t, Member(data, ileague, m_league)); - } - } - - public: - inline void execute() const { - const size_t pool_reduce_size = 0; // Never shrinks - const size_t team_reduce_size = TEAM_REDUCE_SIZE; - const size_t team_shared_size = m_shared; - const size_t thread_local_size = 0; // Never shrinks - - auto* internal_instance = m_policy.space().impl_internal_space_instance(); - // Need to lock resize_thread_team_data - std::lock_guard<std::mutex> lock( - internal_instance->m_thread_team_data_mutex); - internal_instance->resize_thread_team_data( - pool_reduce_size, team_reduce_size, team_shared_size, - thread_local_size); - - this->template exec<typename Policy::work_tag>( - internal_instance->m_thread_team_data); - } - - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), - m_policy(arg_policy), - m_league(arg_policy.league_size()), - m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + - FunctorTeamShmemSize<FunctorType>::value(arg_functor, 1)) {} -}; - -/*--------------------------------------------------------------------------*/ - -template <class FunctorType, class ReducerType, class... Properties> -class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, - ReducerType, Kokkos::Serial> { - private: - enum { TEAM_REDUCE_SIZE = 512 }; - - using Policy = TeamPolicyInternal<Kokkos::Serial, Properties...>; - - using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; - - using Member = typename Policy::member_type; - using WorkTag = typename Policy::work_tag; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, - void>; - - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename Analysis::pointer_type; - using reference_type = typename Analysis::reference_type; - - const FunctorType m_functor; - const Policy m_policy; - const int m_league; - const ReducerType m_reducer; - pointer_type m_result_ptr; - const int m_shared; - - template <class TagType> - inline typename std::enable_if<std::is_same<TagType, void>::value>::type exec( - HostThreadTeamData& data, reference_type update) const { - for (int ileague = 0; ileague < m_league; ++ileague) { - m_functor(Member(data, ileague, m_league), update); - } - } - - template <class TagType> - inline typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec(HostThreadTeamData& data, reference_type update) const { - const TagType t{}; - - for (int ileague = 0; ileague < m_league; ++ileague) { - m_functor(t, Member(data, ileague, m_league), update); - } - } - - public: - inline void execute() const { - const size_t pool_reduce_size = - Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); - - const size_t team_reduce_size = TEAM_REDUCE_SIZE; - const size_t team_shared_size = m_shared; - const size_t thread_local_size = 0; // Never shrinks - - auto* internal_instance = m_policy.space().impl_internal_space_instance(); - // Need to lock resize_thread_team_data - std::lock_guard<std::mutex> lock( - internal_instance->m_thread_team_data_mutex); - internal_instance->resize_thread_team_data( - pool_reduce_size, team_reduce_size, team_shared_size, - thread_local_size); - - pointer_type ptr = - m_result_ptr - ? m_result_ptr - : pointer_type( - internal_instance->m_thread_team_data.pool_reduce_local()); - - reference_type update = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), ptr); - - this->template exec<WorkTag>(internal_instance->m_thread_team_data, update); - - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), ptr); - } - - template <class ViewType> - ParallelReduce( - const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = nullptr) - : m_functor(arg_functor), - m_policy(arg_policy), - m_league(arg_policy.league_size()), - m_reducer(InvalidType()), - m_result_ptr(arg_result.data()), - m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + - FunctorTeamShmemSize<FunctorType>::value(m_functor, 1)) { - static_assert(Kokkos::is_view<ViewType>::value, - "Reduction result on Kokkos::Serial must be a Kokkos::View"); - - static_assert( - Kokkos::Impl::MemorySpaceAccess<typename ViewType::memory_space, - Kokkos::HostSpace>::accessible, - "Reduction result on Kokkos::Serial must be a Kokkos::View in " - "HostSpace"); - } - - inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, - const ReducerType& reducer) - : m_functor(arg_functor), - m_policy(arg_policy), - m_league(arg_policy.league_size()), - m_reducer(reducer), - m_result_ptr(reducer.view().data()), - m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + - FunctorTeamShmemSize<FunctorType>::value(arg_functor, 1)) { - /*static_assert( std::is_same< typename ViewType::memory_space - , Kokkos::HostSpace >::value - , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" - );*/ - } -}; - -} // namespace Impl -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Experimental { - -template <> -class UniqueToken<Serial, UniqueTokenScope::Instance> { - public: - using execution_space = Serial; - using size_type = int; - - /// \brief create object size for concurrency on the given instance - /// - /// This object should not be shared between instances - UniqueToken(execution_space const& = execution_space()) noexcept {} - - /// \brief create object size for requested size on given instance - /// - /// It is the users responsibility to only acquire size tokens concurrently - UniqueToken(size_type, execution_space const& = execution_space()) {} - - /// \brief upper bound for acquired values, i.e. 0 <= value < size() - KOKKOS_INLINE_FUNCTION - int size() const noexcept { return 1; } - - /// \brief acquire value such that 0 <= value < size() - KOKKOS_INLINE_FUNCTION - int acquire() const noexcept { return 0; } - - /// \brief release a value acquired by generate - KOKKOS_INLINE_FUNCTION - void release(int) const noexcept {} -}; - -template <> -class UniqueToken<Serial, UniqueTokenScope::Global> { - public: - using execution_space = Serial; - using size_type = int; - - /// \brief create object size for concurrency on the given instance - /// - /// This object should not be shared between instances - UniqueToken(execution_space const& = execution_space()) noexcept {} - - /// \brief upper bound for acquired values, i.e. 0 <= value < size() - KOKKOS_INLINE_FUNCTION - int size() const noexcept { return 1; } - - /// \brief acquire value such that 0 <= value < size() - KOKKOS_INLINE_FUNCTION - int acquire() const noexcept { return 0; } - - /// \brief release a value acquired by generate - KOKKOS_INLINE_FUNCTION - void release(int) const noexcept {} -}; - -} // namespace Experimental -} // namespace Kokkos - -#include <impl/Kokkos_Serial_Task.hpp> +#include <Serial/Kokkos_Serial_Parallel_Range.hpp> +#include <Serial/Kokkos_Serial_Parallel_MDRange.hpp> +#include <Serial/Kokkos_Serial_Parallel_Team.hpp> +#include <Serial/Kokkos_Serial_Task.hpp> +#include <Serial/Kokkos_Serial_UniqueToken.hpp> #endif // defined( KOKKOS_ENABLE_SERIAL ) #endif /* #define KOKKOS_SERIAL_HPP */ diff --git a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp index e45feb8554dd9a0e594d0d4a373669801f8ba055..c3453b79e7d38dd696cffb485686b84db601a990 100644 --- a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_TASKSCHEDULER_HPP #define KOKKOS_TASKSCHEDULER_HPP @@ -145,7 +154,7 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { typename task_base::destroy_type /*arg_destroy*/, FunctorType&& arg_functor) { using functor_future_type = - future_type_for_functor<typename std::decay<FunctorType>::type>; + future_type_for_functor<std::decay_t<FunctorType>>; using task_type = Impl::Task<BasicTaskScheduler, typename functor_future_type::value_type, FunctorType>; @@ -301,11 +310,9 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { } template <int TaskEnum, typename DepFutureType, typename FunctorType> - KOKKOS_FUNCTION - future_type_for_functor<typename std::decay<FunctorType>::type> - spawn( - Impl::TaskPolicyWithPredecessor<TaskEnum, DepFutureType>&& arg_policy, - FunctorType&& arg_functor) { + KOKKOS_FUNCTION future_type_for_functor<std::decay_t<FunctorType>> spawn( + Impl::TaskPolicyWithPredecessor<TaskEnum, DepFutureType>&& arg_policy, + FunctorType&& arg_functor) { using task_type = runnable_task_type<FunctorType>; typename task_type::function_type const ptr = task_type::apply; typename task_type::destroy_type const dtor = task_type::destroy; @@ -521,7 +528,7 @@ namespace Kokkos { template <class T, class Scheduler> Impl::TaskPolicyWithPredecessor<Impl::TaskType::TaskTeam, - Kokkos::BasicFuture<T, Scheduler> > + Kokkos::BasicFuture<T, Scheduler>> KOKKOS_INLINE_FUNCTION TaskTeam(Kokkos::BasicFuture<T, Scheduler> arg_future, TaskPriority arg_priority = TaskPriority::Regular) { @@ -530,23 +537,22 @@ Impl::TaskPolicyWithPredecessor<Impl::TaskType::TaskTeam, template <class Scheduler> Impl::TaskPolicyWithScheduler<Impl::TaskType::TaskTeam, Scheduler> - KOKKOS_INLINE_FUNCTION - TaskTeam(Scheduler arg_scheduler, - typename std::enable_if<Kokkos::is_scheduler<Scheduler>::value, - TaskPriority>::type arg_priority = - TaskPriority::Regular) { + KOKKOS_INLINE_FUNCTION TaskTeam( + Scheduler arg_scheduler, + std::enable_if_t<Kokkos::is_scheduler<Scheduler>::value, TaskPriority> + arg_priority = TaskPriority::Regular) { return {std::move(arg_scheduler), arg_priority}; } template <class Scheduler, class PredecessorFuture> Impl::TaskPolicyWithScheduler<Kokkos::Impl::TaskType::TaskTeam, Scheduler, PredecessorFuture> - KOKKOS_INLINE_FUNCTION TaskTeam( - Scheduler arg_scheduler, PredecessorFuture arg_future, - typename std::enable_if<Kokkos::is_scheduler<Scheduler>::value && - Kokkos::is_future<PredecessorFuture>::value, - TaskPriority>::type arg_priority = - TaskPriority::Regular) { + KOKKOS_INLINE_FUNCTION + TaskTeam(Scheduler arg_scheduler, PredecessorFuture arg_future, + std::enable_if_t<Kokkos::is_scheduler<Scheduler>::value && + Kokkos::is_future<PredecessorFuture>::value, + TaskPriority> + arg_priority = TaskPriority::Regular) { static_assert(std::is_same<typename PredecessorFuture::scheduler_type, Scheduler>::value, "Can't create a task policy from a scheduler and a future from " @@ -559,7 +565,7 @@ Impl::TaskPolicyWithScheduler<Kokkos::Impl::TaskType::TaskTeam, Scheduler, template <class T, class Scheduler> Impl::TaskPolicyWithPredecessor<Impl::TaskType::TaskSingle, - Kokkos::BasicFuture<T, Scheduler> > + Kokkos::BasicFuture<T, Scheduler>> KOKKOS_INLINE_FUNCTION TaskSingle(Kokkos::BasicFuture<T, Scheduler> arg_future, TaskPriority arg_priority = TaskPriority::Regular) { @@ -568,23 +574,22 @@ Impl::TaskPolicyWithPredecessor<Impl::TaskType::TaskSingle, template <class Scheduler> Impl::TaskPolicyWithScheduler<Impl::TaskType::TaskSingle, Scheduler> - KOKKOS_INLINE_FUNCTION - TaskSingle(Scheduler arg_scheduler, - typename std::enable_if<Kokkos::is_scheduler<Scheduler>::value, - TaskPriority>::type arg_priority = - TaskPriority::Regular) { + KOKKOS_INLINE_FUNCTION TaskSingle( + Scheduler arg_scheduler, + std::enable_if_t<Kokkos::is_scheduler<Scheduler>::value, TaskPriority> + arg_priority = TaskPriority::Regular) { return {std::move(arg_scheduler), arg_priority}; } template <class Scheduler, class PredecessorFuture> Impl::TaskPolicyWithScheduler<Kokkos::Impl::TaskType::TaskSingle, Scheduler, PredecessorFuture> - KOKKOS_INLINE_FUNCTION TaskSingle( - Scheduler arg_scheduler, PredecessorFuture arg_future, - typename std::enable_if<Kokkos::is_scheduler<Scheduler>::value && + KOKKOS_INLINE_FUNCTION + TaskSingle(Scheduler arg_scheduler, PredecessorFuture arg_future, + std::enable_if_t<Kokkos::is_scheduler<Scheduler>::value && Kokkos::is_future<PredecessorFuture>::value, - TaskPriority>::type arg_priority = - TaskPriority::Regular) { + TaskPriority> + arg_priority = TaskPriority::Regular) { static_assert(std::is_same<typename PredecessorFuture::scheduler_type, Scheduler>::value, "Can't create a task policy from a scheduler and a future from " @@ -603,8 +608,7 @@ Impl::TaskPolicyWithScheduler<Kokkos::Impl::TaskType::TaskSingle, Scheduler, */ template <int TaskEnum, typename Scheduler, typename DepFutureType, typename FunctorType> -typename Scheduler::template future_type_for_functor< - typename std::decay<FunctorType>::type> +typename Scheduler::template future_type_for_functor<std::decay_t<FunctorType>> host_spawn(Impl::TaskPolicyWithScheduler<TaskEnum, Scheduler, DepFutureType> arg_policy, FunctorType&& arg_functor) { @@ -635,8 +639,7 @@ host_spawn(Impl::TaskPolicyWithScheduler<TaskEnum, Scheduler, DepFutureType> */ template <int TaskEnum, typename Scheduler, typename DepFutureType, typename FunctorType> -typename Scheduler::template future_type_for_functor< - typename std::decay<FunctorType>::type> +typename Scheduler::template future_type_for_functor<std::decay_t<FunctorType>> KOKKOS_INLINE_FUNCTION task_spawn(Impl::TaskPolicyWithScheduler<TaskEnum, Scheduler, DepFutureType> arg_policy, diff --git a/packages/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp b/packages/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp index 28af6345d1da49ee92b5da8cd7739f0a8cb80967..075a9bae2c1fa60e9615b603692ca5e870972af3 100644 --- a/packages/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp +++ b/packages/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_TASKSCHEDULER_FWD_HPP #define KOKKOS_TASKSCHEDULER_FWD_HPP diff --git a/packages/kokkos/core/src/Kokkos_Threads.hpp b/packages/kokkos/core/src/Kokkos_Threads.hpp index 5879209f12ab8b2abd265878f2d4c276d6d69087..e6dcad54c1409fc81df6679efa7527230705a5e6 100644 --- a/packages/kokkos/core/src/Kokkos_Threads.hpp +++ b/packages/kokkos/core/src/Kokkos_Threads.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_THREADS_HPP #define KOKKOS_THREADS_HPP @@ -57,7 +66,7 @@ #include <Kokkos_Layout.hpp> #include <Kokkos_MemoryTraits.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> /*--------------------------------------------------------------------------*/ @@ -99,7 +108,7 @@ class Threads { static int in_parallel(); /// \brief Print configuration information to the given output stream. - static void print_configuration(std::ostream&, const bool detail = false); + void print_configuration(std::ostream& os, bool verbose = false) const; /// \brief Wait until all dispatched functors complete. /// @@ -107,11 +116,10 @@ class Threads { /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. - static void impl_static_fence(); static void impl_static_fence(const std::string& name); - void fence() const; - void fence(const std::string&) const; + void fence(const std::string& name = + "Kokkos::Threads::fence: Unnamed Instance Fence") const; /** \brief Return the maximum amount of concurrency. */ static int concurrency(); @@ -127,18 +135,7 @@ class Threads { //! \name Space-specific functions //@{ - /** - * Teams of threads are distributed as evenly as possible across - * the requested number of numa regions and cores per numa region. - * A team will not be split across a numa region. - * - * If the 'use_' arguments are not supplied, the hwloc is queried - * to use all available cores. - */ - static void impl_initialize(unsigned threads_count = 0, - unsigned use_numa_count = 0, - unsigned use_cores_per_numa = 0, - bool allow_asynchronous_threadpool = false); + static void impl_initialize(InitializationSettings const&); static int impl_is_initialized(); @@ -179,21 +176,6 @@ struct DeviceTypeTraits<Threads> { }; } // namespace Experimental } // namespace Tools - -namespace Impl { - -class ThreadsSpaceInitializer : public ExecSpaceInitializerBase { - public: - ThreadsSpaceInitializer() = default; - ~ThreadsSpaceInitializer() = default; - void initialize(const InitArguments& args) final; - void finalize(const bool) final; - void fence() final; - void fence(const std::string&) final; - void print_configuration(std::ostream& msg, const bool detail) final; -}; - -} // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ @@ -218,7 +200,10 @@ struct MemorySpaceAccess<Kokkos::Threads::memory_space, #include <Kokkos_Parallel.hpp> #include <Threads/Kokkos_ThreadsExec.hpp> #include <Threads/Kokkos_ThreadsTeam.hpp> -#include <Threads/Kokkos_Threads_Parallel.hpp> +#include <Threads/Kokkos_Threads_Parallel_Range.hpp> +#include <Threads/Kokkos_Threads_Parallel_MDRange.hpp> +#include <Threads/Kokkos_Threads_Parallel_Team.hpp> +#include <Threads/Kokkos_Threads_UniqueToken.hpp> #include <KokkosExp_MDRangePolicy.hpp> diff --git a/packages/kokkos/core/src/Kokkos_Timer.hpp b/packages/kokkos/core/src/Kokkos_Timer.hpp index a3a0b32574ebd78dd89f72f426d5dca250e9c202..38309b0a3e88374abe65169a59369194007e800d 100644 --- a/packages/kokkos/core/src/Kokkos_Timer.hpp +++ b/packages/kokkos/core/src/Kokkos_Timer.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_TIMER_HPP #define KOKKOS_TIMER_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_TIMER +#endif #include <Kokkos_Macros.hpp> // gcc 10.3.0 with CUDA doesn't support std::chrono, @@ -111,4 +115,8 @@ class Timer { } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_TIMER +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_TIMER +#endif #endif /* #ifndef KOKKOS_TIMER_HPP */ diff --git a/packages/kokkos/core/src/Kokkos_Tuners.hpp b/packages/kokkos/core/src/Kokkos_Tuners.hpp index 52edd82052f4cfb3919b4733e4acb167780eaf8e..dba602732c957098b5275adaba1c9bfda61a5efe 100644 --- a/packages/kokkos/core/src/Kokkos_Tuners.hpp +++ b/packages/kokkos/core/src/Kokkos_Tuners.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_KOKKOS_TUNERS_HPP #define KOKKOS_KOKKOS_TUNERS_HPP diff --git a/packages/kokkos/core/src/Kokkos_UniqueToken.hpp b/packages/kokkos/core/src/Kokkos_UniqueToken.hpp index c6c1e7cead68e9932205ef0646bd3305dd22864d..3c58423d37dcdce407a6975b701e75dad3d3f60a 100644 --- a/packages/kokkos/core/src/Kokkos_UniqueToken.hpp +++ b/packages/kokkos/core/src/Kokkos_UniqueToken.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_UNIQUE_TOKEN_HPP #define KOKKOS_UNIQUE_TOKEN_HPP diff --git a/packages/kokkos/core/src/Kokkos_Vectorization.hpp b/packages/kokkos/core/src/Kokkos_Vectorization.hpp index a232e5b3abc1191bc357940b30d9707613c10957..4314ea4417e0e61fac9a9cae8b891d099787754f 100644 --- a/packages/kokkos/core/src/Kokkos_Vectorization.hpp +++ b/packages/kokkos/core/src/Kokkos_Vectorization.hpp @@ -44,6 +44,15 @@ /// \file Kokkos_Vectorization.hpp /// \brief Declaration and definition of Kokkos::Vectorization interface. +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_VECTORIZATION_HPP #define KOKKOS_VECTORIZATION_HPP diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp index b8d33e30c02331f69382ee1ae024592a5d82b50b..e92ed7d2e91395aef45292b3a3b3a4f5c9cd5cf7 100644 --- a/packages/kokkos/core/src/Kokkos_View.hpp +++ b/packages/kokkos/core/src/Kokkos_View.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_VIEW_HPP #define KOKKOS_VIEW_HPP @@ -54,6 +63,7 @@ #include <Kokkos_HostSpace.hpp> #include <Kokkos_MemoryTraits.hpp> #include <Kokkos_ExecPolicy.hpp> +#include <View/Hooks/Kokkos_ViewHooks.hpp> #include <impl/Kokkos_Tools.hpp> @@ -79,7 +89,7 @@ class ViewMapping { }; template <typename IntType> -KOKKOS_INLINE_FUNCTION std::size_t count_valid_integers( +constexpr KOKKOS_INLINE_FUNCTION std::size_t count_valid_integers( const IntType i0, const IntType i1, const IntType i2, const IntType i3, const IntType i4, const IntType i5, const IntType i6, const IntType i7) { static_assert(std::is_integral<IntType>::value, @@ -92,40 +102,27 @@ KOKKOS_INLINE_FUNCTION std::size_t count_valid_integers( } KOKKOS_INLINE_FUNCTION -void runtime_check_rank_device(const size_t dyn_rank, const bool is_void_spec, - const size_t i0, const size_t i1, - const size_t i2, const size_t i3, - const size_t i4, const size_t i5, - const size_t i6, const size_t i7) { - if (is_void_spec) { - const size_t num_passed_args = - count_valid_integers(i0, i1, i2, i3, i4, i5, i6, i7); - - if (num_passed_args != dyn_rank && is_void_spec) { - Kokkos::abort( - "Number of arguments passed to Kokkos::View() constructor must match " - "the dynamic rank of the view."); - } - } -} +void runtime_check_rank(const size_t rank, const size_t dyn_rank, + const bool is_void_spec, const size_t i0, + const size_t i1, const size_t i2, const size_t i3, + const size_t i4, const size_t i5, const size_t i6, + const size_t i7, const std::string& label) { + (void)(label); -inline void runtime_check_rank_host(const size_t dyn_rank, - const bool is_void_spec, const size_t i0, - const size_t i1, const size_t i2, - const size_t i3, const size_t i4, - const size_t i5, const size_t i6, - const size_t i7, const std::string& label) { if (is_void_spec) { const size_t num_passed_args = count_valid_integers(i0, i1, i2, i3, i4, i5, i6, i7); - if (num_passed_args != dyn_rank) { - const std::string message = - "Constructor for Kokkos View '" + label + - "' has mismatched number of arguments. Number of arguments = " + - std::to_string(num_passed_args) + - " but dynamic rank = " + std::to_string(dyn_rank) + " \n"; - Kokkos::abort(message.c_str()); + if (num_passed_args != dyn_rank && num_passed_args != rank) { + KOKKOS_IF_ON_HOST( + const std::string message = + "Constructor for Kokkos View '" + label + + "' has mismatched number of arguments. Number of arguments = " + + std::to_string(num_passed_args) + + " but dynamic rank = " + std::to_string(dyn_rank) + " \n"; + Kokkos::abort(message.c_str());) + KOKKOS_IF_ON_DEVICE(Kokkos::abort("Constructor for Kokkos View has " + "mismatched number of arguments.");) } } } @@ -174,6 +171,7 @@ struct ViewTraits<void> { using array_layout = void; using memory_traits = void; using specialize = void; + using hooks_policy = void; }; template <class... Prop> @@ -185,12 +183,25 @@ struct ViewTraits<void, void, Prop...> { using array_layout = typename ViewTraits<void, Prop...>::array_layout; using memory_traits = typename ViewTraits<void, Prop...>::memory_traits; using specialize = typename ViewTraits<void, Prop...>::specialize; + using hooks_policy = typename ViewTraits<void, Prop...>::hooks_policy; }; -template <class ArrayLayout, class... Prop> +template <class HooksPolicy, class... Prop> struct ViewTraits< - typename std::enable_if<Kokkos::is_array_layout<ArrayLayout>::value>::type, - ArrayLayout, Prop...> { + std::enable_if_t<Kokkos::Experimental::is_hooks_policy<HooksPolicy>::value>, + HooksPolicy, Prop...> { + using execution_space = typename ViewTraits<void, Prop...>::execution_space; + using memory_space = typename ViewTraits<void, Prop...>::memory_space; + using HostMirrorSpace = typename ViewTraits<void, Prop...>::HostMirrorSpace; + using array_layout = typename ViewTraits<void, Prop...>::array_layout; + using memory_traits = typename ViewTraits<void, Prop...>::memory_traits; + using specialize = typename ViewTraits<void, Prop...>::specialize; + using hooks_policy = HooksPolicy; +}; + +template <class ArrayLayout, class... Prop> +struct ViewTraits<std::enable_if_t<Kokkos::is_array_layout<ArrayLayout>::value>, + ArrayLayout, Prop...> { // Specify layout, keep subsequent space and memory traits arguments using execution_space = typename ViewTraits<void, Prop...>::execution_space; @@ -199,11 +210,12 @@ struct ViewTraits< using array_layout = ArrayLayout; using memory_traits = typename ViewTraits<void, Prop...>::memory_traits; using specialize = typename ViewTraits<void, Prop...>::specialize; + using hooks_policy = typename ViewTraits<void, Prop...>::hooks_policy; }; template <class Space, class... Prop> -struct ViewTraits<typename std::enable_if<Kokkos::is_space<Space>::value>::type, - Space, Prop...> { +struct ViewTraits<std::enable_if_t<Kokkos::is_space<Space>::value>, Space, + Prop...> { // Specify Space, memory traits should be the only subsequent argument. static_assert( @@ -224,12 +236,13 @@ struct ViewTraits<typename std::enable_if<Kokkos::is_space<Space>::value>::type, using array_layout = typename execution_space::array_layout; using memory_traits = typename ViewTraits<void, Prop...>::memory_traits; using specialize = typename ViewTraits<void, Prop...>::specialize; + using hooks_policy = typename ViewTraits<void, Prop...>::hooks_policy; }; template <class MemoryTraits, class... Prop> -struct ViewTraits<typename std::enable_if< - Kokkos::is_memory_traits<MemoryTraits>::value>::type, - MemoryTraits, Prop...> { +struct ViewTraits< + std::enable_if_t<Kokkos::is_memory_traits<MemoryTraits>::value>, + MemoryTraits, Prop...> { // Specify memory trait, should not be any subsequent arguments static_assert( @@ -240,6 +253,8 @@ struct ViewTraits<typename std::enable_if< std::is_same<typename ViewTraits<void, Prop...>::array_layout, void>::value && std::is_same<typename ViewTraits<void, Prop...>::memory_traits, + void>::value && + std::is_same<typename ViewTraits<void, Prop...>::hooks_policy, void>::value, "MemoryTrait is the final optional template argument for a View"); @@ -249,6 +264,7 @@ struct ViewTraits<typename std::enable_if< using array_layout = void; using memory_traits = MemoryTraits; using specialize = void; + using hooks_policy = void; }; template <class DataType, class... Properties> @@ -257,26 +273,35 @@ struct ViewTraits { // Unpack the properties arguments using prop = ViewTraits<void, Properties...>; - using ExecutionSpace = typename std::conditional< - !std::is_same<typename prop::execution_space, void>::value, - typename prop::execution_space, Kokkos::DefaultExecutionSpace>::type; + using ExecutionSpace = + std::conditional_t<!std::is_void<typename prop::execution_space>::value, + typename prop::execution_space, + Kokkos::DefaultExecutionSpace>; - using MemorySpace = typename std::conditional< - !std::is_same<typename prop::memory_space, void>::value, - typename prop::memory_space, typename ExecutionSpace::memory_space>::type; + using MemorySpace = + std::conditional_t<!std::is_void<typename prop::memory_space>::value, + typename prop::memory_space, + typename ExecutionSpace::memory_space>; - using ArrayLayout = typename std::conditional< - !std::is_same<typename prop::array_layout, void>::value, - typename prop::array_layout, typename ExecutionSpace::array_layout>::type; + using ArrayLayout = + std::conditional_t<!std::is_void<typename prop::array_layout>::value, + typename prop::array_layout, + typename ExecutionSpace::array_layout>; - using HostMirrorSpace = typename std::conditional< - !std::is_same<typename prop::HostMirrorSpace, void>::value, + using HostMirrorSpace = std::conditional_t< + !std::is_void<typename prop::HostMirrorSpace>::value, typename prop::HostMirrorSpace, - typename Kokkos::Impl::HostMirror<ExecutionSpace>::Space>::type; + typename Kokkos::Impl::HostMirror<ExecutionSpace>::Space>; - using MemoryTraits = typename std::conditional< - !std::is_same<typename prop::memory_traits, void>::value, - typename prop::memory_traits, typename Kokkos::MemoryManaged>::type; + using MemoryTraits = + std::conditional_t<!std::is_void<typename prop::memory_traits>::value, + typename prop::memory_traits, + typename Kokkos::MemoryManaged>; + + using HooksPolicy = + std::conditional_t<!std::is_void<typename prop::hooks_policy>::value, + typename prop::hooks_policy, + Kokkos::Experimental::DefaultViewHooks>; // Analyze data type's properties, // May be specialized based upon the layout and value type @@ -312,10 +337,10 @@ struct ViewTraits { using array_layout = ArrayLayout; using dimension = typename data_analysis::dimension; - using specialize = typename std::conditional< - std::is_same<typename data_analysis::specialize, void>::value, - typename prop::specialize, typename data_analysis::specialize>:: - type; /* mapping specialization tag */ + using specialize = std::conditional_t< + std::is_void<typename data_analysis::specialize>::value, + typename prop::specialize, + typename data_analysis::specialize>; /* mapping specialization tag */ enum { rank = dimension::rank }; enum { rank_dynamic = dimension::rank_dynamic }; @@ -328,6 +353,7 @@ struct ViewTraits { using device_type = Kokkos::Device<ExecutionSpace, MemorySpace>; using memory_traits = MemoryTraits; using host_mirror_space = HostMirrorSpace; + using hooks_policy = HooksPolicy; using size_type = typename MemorySpace::size_type; @@ -445,9 +471,8 @@ struct is_always_assignable_impl<Kokkos::View<ViewTDst...>, template <class View1, class View2> using is_always_assignable = is_always_assignable_impl< - typename std::remove_reference<View1>::type, - typename std::remove_const< - typename std::remove_reference<View2>::type>::type>; + std::remove_reference_t<View1>, + std::remove_const_t<std::remove_reference_t<View2>>>; #ifdef KOKKOS_ENABLE_CXX17 template <class T1, class T2> @@ -589,6 +614,7 @@ class View : public ViewTraits<DataType, Properties...> { Kokkos::Impl::ViewMapping<traits, typename traits::specialize>; template <typename V> friend struct Kokkos::Impl::ViewTracker; + using hooks_policy = typename traits::hooks_policy; view_tracker_type m_track; map_type m_map; @@ -598,28 +624,32 @@ class View : public ViewTraits<DataType, Properties...> { /** \brief Compatible view of array of scalar types */ using array_type = View<typename traits::scalar_array_type, typename traits::array_layout, - typename traits::device_type, typename traits::memory_traits>; + typename traits::device_type, typename traits::hooks_policy, + typename traits::memory_traits>; /** \brief Compatible view of const data type */ using const_type = View<typename traits::const_data_type, typename traits::array_layout, - typename traits::device_type, typename traits::memory_traits>; + typename traits::device_type, typename traits::hooks_policy, + typename traits::memory_traits>; /** \brief Compatible view of non-const data type */ using non_const_type = View<typename traits::non_const_data_type, typename traits::array_layout, - typename traits::device_type, typename traits::memory_traits>; + typename traits::device_type, typename traits::hooks_policy, + typename traits::memory_traits>; /** \brief Compatible HostMirror view */ using HostMirror = View<typename traits::non_const_data_type, typename traits::array_layout, Device<DefaultHostExecutionSpace, - typename traits::host_mirror_space::memory_space>>; + typename traits::host_mirror_space::memory_space>, + typename traits::hooks_policy>; /** \brief Compatible HostMirror view */ using host_mirror_type = View<typename traits::non_const_data_type, typename traits::array_layout, - typename traits::host_mirror_space>; + typename traits::host_mirror_space, typename traits::hooks_policy>; /** \brief Unified types */ using uniform_type = typename Impl::ViewUniformType<View, 0>::type; @@ -650,9 +680,9 @@ class View : public ViewTraits<DataType, Properties...> { // constexpr unsigned rank() { return map_type::Rank; } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, size_t>::type - extent(const iType& r) const noexcept { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, size_t> + extent(const iType& r) const noexcept { return m_map.extent(r); } @@ -662,9 +692,9 @@ class View : public ViewTraits<DataType, Properties...> { } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, int>::type - extent_int(const iType& r) const noexcept { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, int> + extent_int(const iType& r) const noexcept { return static_cast<int>(m_map.extent(r)); } @@ -710,9 +740,9 @@ class View : public ViewTraits<DataType, Properties...> { } template <typename iType> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<std::is_integral<iType>::value, size_t>::type - stride(iType r) const { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + std::is_integral<iType>::value, size_t> + stride(iType r) const { return ( r == 0 ? m_map.stride_0() @@ -783,440 +813,302 @@ class View : public ViewTraits<DataType, Properties...> { std::is_same<typename traits::array_layout, Kokkos::LayoutStride>::value; static constexpr bool is_default_map = - std::is_same<typename traits::specialize, void>::value && + std::is_void<typename traits::specialize>::value && (is_layout_left || is_layout_right || is_layout_stride); #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) -#define KOKKOS_IMPL_SINK(ARG) ARG - -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ - Kokkos::Impl::runtime_check_memory_access_violation< \ - typename traits::memory_space>( \ - "Kokkos::View ERROR: attempt to access inaccessible memory space"); \ - Kokkos::Impl::view_verify_operator_bounds<typename traits::memory_space> ARG; +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(...) \ + Kokkos::Impl::runtime_check_memory_access_violation< \ + typename traits::memory_space>( \ + "Kokkos::View ERROR: attempt to access inaccessible memory space", \ + __VA_ARGS__); \ + Kokkos::Impl::view_verify_operator_bounds<typename traits::memory_space>( \ + __VA_ARGS__); #else -#define KOKKOS_IMPL_SINK(ARG) - -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ - Kokkos::Impl::runtime_check_memory_access_violation< \ - typename traits::memory_space>( \ - "Kokkos::View ERROR: attempt to access inaccessible memory space"); +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(...) \ + Kokkos::Impl::runtime_check_memory_access_violation< \ + typename traits::memory_space>( \ + "Kokkos::View ERROR: attempt to access inaccessible memory space", \ + __VA_ARGS__); #endif - public: - //------------------------------ - // Rank 0 operator() - - KOKKOS_FORCEINLINE_FUNCTION - reference_type operator()() const { return m_map.reference(); } - //------------------------------ - // Rank 1 operator() + template <typename... Is> + static KOKKOS_FUNCTION void check_access_member_function_valid_args(Is...) { + static_assert(Rank <= sizeof...(Is), ""); + static_assert(sizeof...(Is) <= 8, ""); + static_assert(Kokkos::Impl::are_integral<Is...>::value, ""); + } - template <typename I0> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && !is_default_map), - reference_type>::type - operator()(const I0& i0) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0)) - return m_map.reference(i0); + template <typename... Is> + static KOKKOS_FUNCTION void check_operator_parens_valid_args(Is...) { + static_assert(Rank == sizeof...(Is), ""); + static_assert(Kokkos::Impl::are_integral<Is...>::value, ""); } + public: + //------------------------------ + // Rank 1 default map operator() + template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && is_default_map && - !is_layout_stride), - reference_type>::type - operator()(const I0& i0) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0)) + std::enable_if_t<(Kokkos::Impl::always_true<I0>::value && // + (1 == Rank) && is_default_map && !is_layout_stride), + reference_type> + operator()(I0 i0) const { + check_operator_parens_valid_args(i0); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0) return m_map.m_impl_handle[i0]; } template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && is_default_map && - is_layout_stride), - reference_type>::type - operator()(const I0& i0) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0)) + std::enable_if_t<(Kokkos::Impl::always_true<I0>::value && // + (1 == Rank) && is_default_map && is_layout_stride), + reference_type> + operator()(I0 i0) const { + check_operator_parens_valid_args(i0); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0) return m_map.m_impl_handle[m_map.m_impl_offset.m_stride.S0 * i0]; } + //------------------------------ // Rank 1 operator[] template <typename I0> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && !is_default_map), - reference_type>::type - operator[](const I0& i0) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0)) + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + ((1 == Rank) && Kokkos::Impl::are_integral<I0>::value && !is_default_map), + reference_type> + operator[](I0 i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0) return m_map.reference(i0); } template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && is_default_map && - !is_layout_stride), - reference_type>::type - operator[](const I0& i0) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0)) + std::enable_if_t<((1 == Rank) && Kokkos::Impl::are_integral<I0>::value && + is_default_map && !is_layout_stride), + reference_type> + operator[](I0 i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0) return m_map.m_impl_handle[i0]; } template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0>::value && - (1 == Rank) && is_default_map && - is_layout_stride), - reference_type>::type - operator[](const I0& i0) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0)) + std::enable_if_t<((1 == Rank) && Kokkos::Impl::are_integral<I0>::value && + is_default_map && is_layout_stride), + reference_type> + operator[](I0 i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0) return m_map.m_impl_handle[m_map.m_impl_offset.m_stride.S0 * i0]; } //------------------------------ - // Rank 2 - - template <typename I0, typename I1> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1)) - return m_map.reference(i0, i1); - } + // Rank 2 default map operator() template <typename I0, typename I1> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_left && (traits::rank_dynamic == 0)), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1)) + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1>::value && // + (2 == Rank) && is_default_map && is_layout_left && + (traits::rank_dynamic == 0)), + reference_type> + operator()(I0 i0, I1 i1) const { + check_operator_parens_valid_args(i0, i1); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; } template <typename I0, typename I1> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_left && (traits::rank_dynamic != 0)), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1)) + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1>::value && // + (2 == Rank) && is_default_map && is_layout_left && + (traits::rank_dynamic != 0)), + reference_type> + operator()(I0 i0, I1 i1) const { + check_operator_parens_valid_args(i0, i1); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; } template <typename I0, typename I1> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_right && (traits::rank_dynamic == 0)), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1)) + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1>::value && // + (2 == Rank) && is_default_map && is_layout_right && + (traits::rank_dynamic == 0)), + reference_type> + operator()(I0 i0, I1 i1) const { + check_operator_parens_valid_args(i0, i1); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; } template <typename I0, typename I1> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_right && (traits::rank_dynamic != 0)), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1)) + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1>::value && // + (2 == Rank) && is_default_map && is_layout_right && + (traits::rank_dynamic != 0)), + reference_type> + operator()(I0 i0, I1 i1) const { + check_operator_parens_valid_args(i0, i1); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; } template <typename I0, typename I1> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1>::value && - (2 == Rank) && is_default_map && - is_layout_stride), - reference_type>::type - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1)) + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1>::value && // + (2 == Rank) && is_default_map && is_layout_stride), + reference_type> + operator()(I0 i0, I1 i1) const { + check_operator_parens_valid_args(i0, i1); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + i1 * m_map.m_impl_offset.m_stride.S1]; } - //------------------------------ - // Rank 3 + // Rank 0 -> 8 operator() except for rank-1 and rank-2 with default map which + // have "inlined" versions above - template <typename I0, typename I1, typename I2> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1, I2>::value && - (3 == Rank) && is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2)) - return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2)]; + template <typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<Is...>::value && // + (2 != Rank) && (1 != Rank) && (0 != Rank) && is_default_map), + reference_type> + operator()(Is... indices) const { + check_operator_parens_valid_args(indices...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, indices...) + return m_map.m_impl_handle[m_map.m_impl_offset(indices...)]; } - template <typename I0, typename I1, typename I2> + template <typename... Is> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, I1, I2>::value && - (3 == Rank) && !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2)) - return m_map.reference(i0, i1, i2); - } - - //------------------------------ - // Rank 4 - - template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3>::value && (4 == Rank) && - is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, i3)) - return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3)]; - } - - template <typename I0, typename I1, typename I2, typename I3> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3>::value && (4 == Rank) && - !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, i3)) - return m_map.reference(i0, i1, i2, i3); - } - - //------------------------------ - // Rank 5 - - template <typename I0, typename I1, typename I2, typename I3, typename I4> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4>::value && (5 == Rank) && - is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, i3, i4)) - return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4)]; - } - - template <typename I0, typename I1, typename I2, typename I3, typename I4> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4>::value && (5 == Rank) && - !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, i3, i4)) - return m_map.reference(i0, i1, i2, i3, i4); - } - - //------------------------------ - // Rank 6 - - template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5>::value && - (6 == Rank) && is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4, const I5& i5) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, i3, i4, i5)) - return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5)]; - } - - template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5>::value && - (6 == Rank) && !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4, const I5& i5) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, i3, i4, i5)) - return m_map.reference(i0, i1, i2, i3, i4, i5); + std::enable_if_t<(Kokkos::Impl::always_true<Is...>::value && // + ((0 == Rank) || !is_default_map)), + reference_type> + operator()(Is... indices) const { + check_operator_parens_valid_args(indices...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, indices...) + return m_map.reference(indices...); } //------------------------------ - // Rank 7 - - template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, typename I6> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6>::value && - (7 == Rank) && is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4, const I5& i5, const I6& i6) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6)) - return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5, i6)]; - } - - template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, typename I6> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6>::value && - (7 == Rank) && !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4, const I5& i5, const I6& i6) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6)) - return m_map.reference(i0, i1, i2, i3, i4, i5, i6); + // Rank 0 + + template <typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<Is...>::value && (0 == Rank)), reference_type> + access(Is... extra) const { + check_access_member_function_valid_args(extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, extra...) + return m_map.reference(); } //------------------------------ - // Rank 8 + // Rank 1 - template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, typename I6, typename I7> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6, I7>::value && - (8 == Rank) && is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4, const I5& i5, const I6& i6, const I7& i7) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, i7)) - return m_map - .m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5, i6, i7)]; - } - - template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, typename I6, typename I7> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6, I7>::value && - (8 == Rank) && !is_default_map), - reference_type>::type - operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - const I4& i4, const I5& i5, const I6& i6, const I7& i7) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, i7)) - return m_map.reference(i0, i1, i2, i3, i4, i5, i6, i7); - } - - template <class... Args> + template <typename I0, typename... Is> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<Args...>::value && - (0 == Rank)), - reference_type>::type - access(Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, args...))) - return m_map.reference(); - } - - template <typename I0, class... Args> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, Args...>::value && - (1 == Rank) && !is_default_map), - reference_type>::type - access(const I0& i0, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, args...))) + std::enable_if_t<(Kokkos::Impl::always_true<I0, Is...>::value && + (1 == Rank) && !is_default_map), + reference_type> + access(I0 i0, Is... extra) const { + check_access_member_function_valid_args(i0, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, extra...) return m_map.reference(i0); } - template <typename I0, class... Args> + template <typename I0, typename... Is> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, Args...>::value && - (1 == Rank) && is_default_map && - !is_layout_stride), - reference_type>::type - access(const I0& i0, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, args...))) + std::enable_if_t<(Kokkos::Impl::always_true<I0, Is...>::value && + (1 == Rank) && is_default_map && !is_layout_stride), + reference_type> + access(I0 i0, Is... extra) const { + check_access_member_function_valid_args(i0, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, extra...) return m_map.m_impl_handle[i0]; } - template <typename I0, class... Args> + template <typename I0, typename... Is> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(Kokkos::Impl::are_integral<I0, Args...>::value && - (1 == Rank) && is_default_map && - is_layout_stride), - reference_type>::type - access(const I0& i0, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, args...))) + std::enable_if_t<(Kokkos::Impl::always_true<I0, Is...>::value && + (1 == Rank) && is_default_map && is_layout_stride), + reference_type> + access(I0 i0, Is... extra) const { + check_access_member_function_valid_args(i0, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, extra...) return m_map.m_impl_handle[m_map.m_impl_offset.m_stride.S0 * i0]; } - template <typename I0, typename I1, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, Args...>::value && (2 == Rank) && - !is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) + //------------------------------ + // Rank 2 + + template <typename I0, typename I1, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1, Is...>::value && + (2 == Rank) && !is_default_map), + reference_type> + access(I0 i0, I1 i1, Is... extra) const { + check_access_member_function_valid_args(i0, i1, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) return m_map.reference(i0, i1); } - template <typename I0, typename I1, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, Args...>::value && (2 == Rank) && + template <typename I0, typename I1, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, Is...>::value && (2 == Rank) && is_default_map && is_layout_left && (traits::rank_dynamic == 0)), - reference_type>::type - access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) + reference_type> + access(I0 i0, I1 i1, Is... extra) const { + check_access_member_function_valid_args(i0, i1, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; } - template <typename I0, typename I1, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, Args...>::value && (2 == Rank) && + template <typename I0, typename I1, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, Is...>::value && (2 == Rank) && is_default_map && is_layout_left && (traits::rank_dynamic != 0)), - reference_type>::type - access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) + reference_type> + access(I0 i0, I1 i1, Is... extra) const { + check_access_member_function_valid_args(i0, i1, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; } - template <typename I0, typename I1, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, Args...>::value && (2 == Rank) && + template <typename I0, typename I1, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, Is...>::value && (2 == Rank) && is_default_map && is_layout_right && (traits::rank_dynamic == 0)), - reference_type>::type - access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) + reference_type> + access(I0 i0, I1 i1, Is... extra) const { + check_access_member_function_valid_args(i0, i1, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; } - template <typename I0, typename I1, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, Args...>::value && (2 == Rank) && + template <typename I0, typename I1, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, Is...>::value && (2 == Rank) && is_default_map && is_layout_right && (traits::rank_dynamic != 0)), - reference_type>::type - access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) + reference_type> + access(I0 i0, I1 i1, Is... extra) const { + check_access_member_function_valid_args(i0, i1, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; } - template <typename I0, typename I1, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, Args...>::value && (2 == Rank) && - is_default_map && is_layout_stride), - reference_type>::type - access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) + template <typename I0, typename I1, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1, Is...>::value && + (2 == Rank) && is_default_map && is_layout_stride), + reference_type> + access(I0 i0, I1 i1, Is... extra) const { + check_access_member_function_valid_args(i0, i1, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + i1 * m_map.m_impl_offset.m_stride.S1]; } @@ -1224,54 +1116,50 @@ class View : public ViewTraits<DataType, Properties...> { //------------------------------ // Rank 3 - template <typename I0, typename I1, typename I2, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, Args...>::value && (3 == Rank) && - is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, - Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, args...))) + template <typename I0, typename I1, typename I2, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1, I2, Is...>::value && + (3 == Rank) && is_default_map), + reference_type> + access(I0 i0, I1 i1, I2 i2, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, extra...) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2)]; } - template <typename I0, typename I1, typename I2, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, Args...>::value && (3 == Rank) && - !is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, - Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, args...))) + template <typename I0, typename I1, typename I2, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1, I2, Is...>::value && + (3 == Rank) && !is_default_map), + reference_type> + access(I0 i0, I1 i1, I2 i2, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, extra...) return m_map.reference(i0, i1, i2); } //------------------------------ // Rank 4 - template <typename I0, typename I1, typename I2, typename I3, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, Args...>::value && - (4 == Rank) && is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, args...))) + template <typename I0, typename I1, typename I2, typename I3, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, I2, I3, Is...>::value && (4 == Rank) && + is_default_map), + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, extra...) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3)]; } - template <typename I0, typename I1, typename I2, typename I3, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, Args...>::value && - (4 == Rank) && !is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, args...))) + template <typename I0, typename I1, typename I2, typename I3, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, I2, I3, Is...>::value && (4 == Rank) && + !is_default_map), + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, extra...) return m_map.reference(i0, i1, i2, i3); } @@ -1279,28 +1167,28 @@ class View : public ViewTraits<DataType, Properties...> { // Rank 5 template <typename I0, typename I1, typename I2, typename I3, typename I4, - class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, Args...>::value && + typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, I2, I3, I4, Is...>::value && (5 == Rank) && is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, args...))) + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, i4, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, i4, + extra...) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4)]; } template <typename I0, typename I1, typename I2, typename I3, typename I4, - class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, Args...>::value && + typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, I2, I3, I4, Is...>::value && (5 == Rank) && !is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, args...))) + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, i4, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, i4, + extra...) return m_map.reference(i0, i1, i2, i3, i4); } @@ -1308,28 +1196,28 @@ class View : public ViewTraits<DataType, Properties...> { // Rank 6 template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, Args...>::value && + typename I5, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, I2, I3, I4, I5, Is...>::value && (6 == Rank) && is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, i5, args...))) + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, i4, i5, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, i4, i5, + extra...) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5)]; } template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, Args...>::value && + typename I5, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, I2, I3, I4, I5, Is...>::value && (6 == Rank) && !is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, i5, args...))) + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, i4, i5, extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, i4, i5, + extra...) return m_map.reference(i0, i1, i2, i3, i4, i5); } @@ -1337,28 +1225,30 @@ class View : public ViewTraits<DataType, Properties...> { // Rank 7 template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, typename I6, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6, Args...>::value && + typename I5, typename I6, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, I2, I3, I4, I5, I6, Is...>::value && (7 == Rank) && is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, const I6& i6, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, i5, i6, args...))) + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, i4, i5, i6, + extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, i4, i5, i6, + extra...) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5, i6)]; } template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, typename I6, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6, Args...>::value && + typename I5, typename I6, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< + (Kokkos::Impl::always_true<I0, I1, I2, I3, I4, I5, I6, Is...>::value && (7 == Rank) && !is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, const I6& i6, Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, i5, i6, args...))) + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6, Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, i4, i5, i6, + extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, i4, i5, i6, + extra...) return m_map.reference(i0, i1, i2, i3, i4, i5, i6); } @@ -1366,33 +1256,35 @@ class View : public ViewTraits<DataType, Properties...> { // Rank 8 template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, typename I6, typename I7, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6, I7, - Args...>::value && - (8 == Rank) && is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, const I6& i6, const I7& i7, - Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(KOKKOS_IMPL_SINK( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, i7, args...))) + typename I5, typename I6, typename I7, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1, I2, I3, I4, I5, I6, + I7, Is...>::value && + (8 == Rank) && is_default_map), + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6, I7 i7, + Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, i4, i5, i6, i7, + extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, i4, i5, i6, + i7, extra...) return m_map .m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5, i6, i7)]; } template <typename I0, typename I1, typename I2, typename I3, typename I4, - typename I5, typename I6, typename I7, class... Args> - KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if< - (Kokkos::Impl::are_integral<I0, I1, I2, I3, I4, I5, I6, I7, - Args...>::value && - (8 == Rank) && !is_default_map), - reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, const I6& i6, const I7& i7, - Args... KOKKOS_IMPL_SINK(args)) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(KOKKOS_IMPL_SINK( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, i7, args...))) + typename I5, typename I6, typename I7, typename... Is> + KOKKOS_FORCEINLINE_FUNCTION + std::enable_if_t<(Kokkos::Impl::always_true<I0, I1, I2, I3, I4, I5, I6, + I7, Is...>::value && + (8 == Rank) && !is_default_map), + reference_type> + access(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6, I7 i7, + Is... extra) const { + check_access_member_function_valid_args(i0, i1, i2, i3, i4, i5, i6, i7, + extra...); + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, i2, i3, i4, i5, i6, + i7, extra...) return m_map.reference(i0, i1, i2, i3, i4, i5, i6, i7); } @@ -1407,17 +1299,36 @@ class View : public ViewTraits<DataType, Properties...> { KOKKOS_DEFAULTED_FUNCTION View() = default; - KOKKOS_DEFAULTED_FUNCTION - View(const View&) = default; + KOKKOS_FUNCTION + View(const View& other) : m_track(other.m_track), m_map(other.m_map) { + KOKKOS_IF_ON_HOST((hooks_policy::copy_construct(*this, other);)) + } - KOKKOS_DEFAULTED_FUNCTION - View(View&&) = default; + KOKKOS_FUNCTION + View(View&& other) + : m_track{std::move(other.m_track)}, m_map{std::move(other.m_map)} { + KOKKOS_IF_ON_HOST((hooks_policy::move_construct(*this, other);)) + } - KOKKOS_DEFAULTED_FUNCTION - View& operator=(const View&) = default; + KOKKOS_FUNCTION + View& operator=(const View& other) { + m_map = other.m_map; + m_track = other.m_track; - KOKKOS_DEFAULTED_FUNCTION - View& operator=(View&&) = default; + KOKKOS_IF_ON_HOST((hooks_policy::copy_assign(*this, other);)) + + return *this; + } + + KOKKOS_FUNCTION + View& operator=(View&& other) { + m_map = std::move(other.m_map); + m_track = std::move(other.m_track); + + KOKKOS_IF_ON_HOST((hooks_policy::move_assign(*this, other);)) + + return *this; + } //---------------------------------------- // Compatible view copy constructor and assignment @@ -1426,10 +1337,9 @@ class View : public ViewTraits<DataType, Properties...> { template <class RT, class... RP> KOKKOS_INLINE_FUNCTION View( const View<RT, RP...>& rhs, - typename std::enable_if<Kokkos::Impl::ViewMapping< + std::enable_if_t<Kokkos::Impl::ViewMapping< traits, typename View<RT, RP...>::traits, - typename traits::specialize>::is_assignable_data_type>::type* = - nullptr) + typename traits::specialize>::is_assignable_data_type>* = nullptr) : m_track(rhs), m_map() { using SrcTraits = typename View<RT, RP...>::traits; using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, @@ -1440,11 +1350,11 @@ class View : public ViewTraits<DataType, Properties...> { } template <class RT, class... RP> - KOKKOS_INLINE_FUNCTION typename std::enable_if< + KOKKOS_INLINE_FUNCTION std::enable_if_t< Kokkos::Impl::ViewMapping< traits, typename View<RT, RP...>::traits, typename traits::specialize>::is_assignable_data_type, - View>::type& + View>& operator=(const View<RT, RP...>& rhs) { using SrcTraits = typename View<RT, RP...>::traits; using Mapping = Kokkos::Impl::ViewMapping<traits, SrcTraits, @@ -1489,15 +1399,19 @@ class View : public ViewTraits<DataType, Properties...> { .template get_label<typename traits::memory_space>(); } + private: + enum class check_input_args : bool { yes = true, no = false }; + + public: //---------------------------------------- // Allocation according to allocation properties and array layout template <class... P> explicit inline View( const Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<!Impl::ViewCtorProp<P...>::has_pointer, - typename traits::array_layout>::type const& - arg_layout) + std::enable_if_t<!Impl::ViewCtorProp<P...>::has_pointer, + typename traits::array_layout> const& arg_layout, + check_input_args check_args = check_input_args::no) : m_track(), m_map() { // Append layout and spaces if not input using alloc_prop_input = Impl::ViewCtorProp<P...>; @@ -1506,17 +1420,15 @@ class View : public ViewTraits<DataType, Properties...> { // to avoid duplicate class error. using alloc_prop = Impl::ViewCtorProp< P..., - typename std::conditional<alloc_prop_input::has_label, - std::integral_constant<unsigned int, 0>, - typename std::string>::type, - typename std::conditional< - alloc_prop_input::has_memory_space, - std::integral_constant<unsigned int, 1>, - typename traits::device_type::memory_space>::type, - typename std::conditional< - alloc_prop_input::has_execution_space, - std::integral_constant<unsigned int, 2>, - typename traits::device_type::execution_space>::type>; + std::conditional_t<alloc_prop_input::has_label, + std::integral_constant<unsigned int, 0>, + std::string>, + std::conditional_t<alloc_prop_input::has_memory_space, + std::integral_constant<unsigned int, 1>, + typename traits::device_type::memory_space>, + std::conditional_t<alloc_prop_input::has_execution_space, + std::integral_constant<unsigned int, 2>, + typename traits::device_type::execution_space>>; static_assert(traits::is_managed, "View allocation constructor requires managed memory"); @@ -1533,6 +1445,26 @@ class View : public ViewTraits<DataType, Properties...> { // Copy the input allocation properties with possibly defaulted properties alloc_prop prop_copy(arg_prop); + if (check_args == check_input_args::yes) { + size_t i0 = arg_layout.dimension[0]; + size_t i1 = arg_layout.dimension[1]; + size_t i2 = arg_layout.dimension[2]; + size_t i3 = arg_layout.dimension[3]; + size_t i4 = arg_layout.dimension[4]; + size_t i5 = arg_layout.dimension[5]; + size_t i6 = arg_layout.dimension[6]; + size_t i7 = arg_layout.dimension[7]; + + const std::string& alloc_name = + static_cast<Kokkos::Impl::ViewCtorProp<void, std::string> const&>( + prop_copy) + .value; + Impl::runtime_check_rank( + traits::rank, traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, i0, i1, i2, + i3, i4, i5, i6, i7, alloc_name); + } + //------------------------------------------------------------ #if defined(KOKKOS_ENABLE_CUDA) // If allocating in CudaUVMSpace must fence before and after @@ -1548,8 +1480,8 @@ class View : public ViewTraits<DataType, Properties...> { #endif //------------------------------------------------------------ - Kokkos::Impl::SharedAllocationRecord<>* record = - m_map.allocate_shared(prop_copy, arg_layout); + Kokkos::Impl::SharedAllocationRecord<>* record = m_map.allocate_shared( + prop_copy, arg_layout, Impl::ViewCtorProp<P...>::has_execution_space); //------------------------------------------------------------ #if defined(KOKKOS_ENABLE_CUDA) @@ -1575,9 +1507,9 @@ class View : public ViewTraits<DataType, Properties...> { template <class... P> explicit KOKKOS_INLINE_FUNCTION View( const Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<Impl::ViewCtorProp<P...>::has_pointer, - typename traits::array_layout>::type const& - arg_layout) + std::enable_if_t<Impl::ViewCtorProp<P...>::has_pointer, + typename traits::array_layout> const& arg_layout, + check_input_args /*ignored*/ = check_input_args::no) // Not checking : m_track() // No memory tracking , m_map(arg_prop, arg_layout) { @@ -1592,9 +1524,8 @@ class View : public ViewTraits<DataType, Properties...> { template <class... P> explicit inline View( const Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<!Impl::ViewCtorProp<P...>::has_pointer, - size_t>::type const arg_N0 = - KOKKOS_IMPL_CTOR_DEFAULT_ARG, + std::enable_if_t<!Impl::ViewCtorProp<P...>::has_pointer, size_t> const + arg_N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -1604,25 +1535,18 @@ class View : public ViewTraits<DataType, Properties...> { const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) : View(arg_prop, typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, - arg_N4, arg_N5, arg_N6, arg_N7)) { - KOKKOS_IF_ON_HOST( - (Impl::runtime_check_rank_host( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());)) - KOKKOS_IF_ON_DEVICE( - (Impl::runtime_check_rank_device( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);)) + arg_N4, arg_N5, arg_N6, arg_N7), + check_input_args::yes) { + static_assert(traits::array_layout::is_extent_constructible, + "Layout is not constructible from extent arguments. Use " + "overload taking a layout object instead."); } template <class... P> explicit KOKKOS_INLINE_FUNCTION View( const Impl::ViewCtorProp<P...>& arg_prop, - typename std::enable_if<Impl::ViewCtorProp<P...>::has_pointer, - size_t>::type const arg_N0 = - KOKKOS_IMPL_CTOR_DEFAULT_ARG, + std::enable_if_t<Impl::ViewCtorProp<P...>::has_pointer, size_t> const + arg_N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -1632,35 +1556,28 @@ class View : public ViewTraits<DataType, Properties...> { const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) : View(arg_prop, typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, - arg_N4, arg_N5, arg_N6, arg_N7)) { - KOKKOS_IF_ON_HOST( - (Impl::runtime_check_rank_host( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());)) - KOKKOS_IF_ON_DEVICE( - (Impl::runtime_check_rank_device( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);)) + arg_N4, arg_N5, arg_N6, arg_N7), + check_input_args::yes) { + static_assert(traits::array_layout::is_extent_constructible, + "Layout is not constructible from extent arguments. Use " + "overload taking a layout object instead."); } // Allocate with label and layout template <typename Label> explicit inline View( const Label& arg_label, - typename std::enable_if<Kokkos::Impl::is_view_label<Label>::value, - typename traits::array_layout>::type const& - arg_layout) - : View(Impl::ViewCtorProp<std::string>(arg_label), arg_layout) {} + std::enable_if_t<Kokkos::Impl::is_view_label<Label>::value, + typename traits::array_layout> const& arg_layout) + : View(Impl::ViewCtorProp<std::string>(arg_label), arg_layout, + check_input_args::yes) {} // Allocate label and layout, must disambiguate from subview constructor. template <typename Label> explicit inline View( const Label& arg_label, - typename std::enable_if<Kokkos::Impl::is_view_label<Label>::value, - const size_t>::type arg_N0 = - KOKKOS_IMPL_CTOR_DEFAULT_ARG, + std::enable_if_t<Kokkos::Impl::is_view_label<Label>::value, const size_t> + arg_N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t arg_N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -1670,21 +1587,11 @@ class View : public ViewTraits<DataType, Properties...> { const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) : View(Impl::ViewCtorProp<std::string>(arg_label), typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, - arg_N4, arg_N5, arg_N6, arg_N7)) { + arg_N4, arg_N5, arg_N6, arg_N7), + check_input_args::yes) { static_assert(traits::array_layout::is_extent_constructible, - "Layout is not extent constructible. A layout object should " - "be passed too.\n"); - - KOKKOS_IF_ON_HOST( - (Impl::runtime_check_rank_host( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());)) - KOKKOS_IF_ON_DEVICE( - (Impl::runtime_check_rank_device( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);)) + "Layout is not constructible from extent arguments. Use " + "overload taking a layout object instead."); } // Construct view from ViewTracker and map @@ -1719,10 +1626,18 @@ class View : public ViewTraits<DataType, Properties...> { //---------------------------------------- // Memory span required to wrap these dimensions. + static constexpr size_t required_allocation_size( + typename traits::array_layout const& layout) { + return map_type::memory_span(layout); + } + static constexpr size_t required_allocation_size( const size_t arg_N0 = 0, const size_t arg_N1 = 0, const size_t arg_N2 = 0, const size_t arg_N3 = 0, const size_t arg_N4 = 0, const size_t arg_N5 = 0, const size_t arg_N6 = 0, const size_t arg_N7 = 0) { + static_assert(traits::array_layout::is_extent_constructible, + "Layout is not constructible from extent arguments. Use " + "overload taking a layout object instead."); return map_type::memory_span(typename traits::array_layout( arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)); } @@ -1738,17 +1653,11 @@ class View : public ViewTraits<DataType, Properties...> { const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) : View(Impl::ViewCtorProp<pointer_type>(arg_ptr), typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, - arg_N4, arg_N5, arg_N6, arg_N7)) { - KOKKOS_IF_ON_HOST( - (Impl::runtime_check_rank_host( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());)) - KOKKOS_IF_ON_DEVICE( - (Impl::runtime_check_rank_device( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);)) + arg_N4, arg_N5, arg_N6, arg_N7), + check_input_args::yes) { + static_assert(traits::array_layout::is_extent_constructible, + "Layout is not constructible from extent arguments. Use " + "overload taking a layout object instead."); } explicit KOKKOS_INLINE_FUNCTION View( @@ -1758,23 +1667,22 @@ class View : public ViewTraits<DataType, Properties...> { //---------------------------------------- // Shared scratch memory constructor - static inline size_t shmem_size(const size_t arg_N0 = KOKKOS_INVALID_INDEX, - const size_t arg_N1 = KOKKOS_INVALID_INDEX, - const size_t arg_N2 = KOKKOS_INVALID_INDEX, - const size_t arg_N3 = KOKKOS_INVALID_INDEX, - const size_t arg_N4 = KOKKOS_INVALID_INDEX, - const size_t arg_N5 = KOKKOS_INVALID_INDEX, - const size_t arg_N6 = KOKKOS_INVALID_INDEX, - const size_t arg_N7 = KOKKOS_INVALID_INDEX) { - if (is_layout_stride) { - Kokkos::abort( - "Kokkos::View::shmem_size(extents...) doesn't work with " - "LayoutStride. Pass a LayoutStride object instead"); - } + static KOKKOS_INLINE_FUNCTION size_t + shmem_size(const size_t arg_N0 = KOKKOS_INVALID_INDEX, + const size_t arg_N1 = KOKKOS_INVALID_INDEX, + const size_t arg_N2 = KOKKOS_INVALID_INDEX, + const size_t arg_N3 = KOKKOS_INVALID_INDEX, + const size_t arg_N4 = KOKKOS_INVALID_INDEX, + const size_t arg_N5 = KOKKOS_INVALID_INDEX, + const size_t arg_N6 = KOKKOS_INVALID_INDEX, + const size_t arg_N7 = KOKKOS_INVALID_INDEX) { + static_assert(traits::array_layout::is_extent_constructible, + "Layout is not constructible from extent arguments. Use " + "overload taking a layout object instead."); const size_t num_passed_args = Impl::count_valid_integers( arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7); - if (std::is_same<typename traits::specialize, void>::value && + if (std::is_void<typename traits::specialize>::value && num_passed_args != traits::rank_dynamic) { Kokkos::abort( "Kokkos::View::shmem_size() rank_dynamic != number of arguments.\n"); @@ -1784,8 +1692,8 @@ class View : public ViewTraits<DataType, Properties...> { arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)); } - static inline size_t shmem_size( - typename traits::array_layout const& arg_layout) { + static KOKKOS_INLINE_FUNCTION size_t + shmem_size(typename traits::array_layout const& arg_layout) { return map_type::memory_span(arg_layout) + sizeof(typename traits::value_type); } @@ -1816,17 +1724,11 @@ class View : public ViewTraits<DataType, Properties...> { arg_N7)), sizeof(typename traits::value_type)))), typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, - arg_N4, arg_N5, arg_N6, arg_N7)) { - KOKKOS_IF_ON_HOST( - (Impl::runtime_check_rank_host( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label());)) - KOKKOS_IF_ON_DEVICE( - (Impl::runtime_check_rank_device( - traits::rank_dynamic, - std::is_same<typename traits::specialize, void>::value, arg_N0, - arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7);)) + arg_N4, arg_N5, arg_N6, arg_N7), + check_input_args::yes) { + static_assert(traits::array_layout::is_extent_constructible, + "Layout is not constructible from extent arguments. Use " + "overload taking a layout object instead."); } }; @@ -1852,8 +1754,8 @@ struct RankDataType<ValueType, 0> { }; template <unsigned N, typename... Args> -std::enable_if_t<N == View<Args...>::Rank, View<Args...>> as_view_of_rank_n( - View<Args...> v) { +KOKKOS_FUNCTION std::enable_if_t<N == View<Args...>::Rank, View<Args...>> +as_view_of_rank_n(View<Args...> v) { return v; } @@ -1982,7 +1884,7 @@ struct CommonViewValueType; template <typename A, typename B> struct CommonViewValueType<void, A, B> { - using value_type = typename std::common_type<A, B>::type; + using value_type = std::common_type_t<A, B>; }; template <class Specialize, class ValueType> @@ -2033,17 +1935,17 @@ struct DeduceCommonViewAllocProp<FirstView, NextViews...> { // if first and next specialize differ, but are not the same specialize, error // out static_assert(!(!std::is_same<first_specialize, next_specialize>::value && - !std::is_same<first_specialize, void>::value && - !std::is_same<void, next_specialize>::value), + !std::is_void<first_specialize>::value && + !std::is_void<next_specialize>::value), "Kokkos DeduceCommonViewAllocProp ERROR: Only one non-void " "specialize trait allowed"); // otherwise choose non-void specialize if either/both are non-void - using specialize = typename std::conditional< + using specialize = std::conditional_t< std::is_same<first_specialize, next_specialize>::value, first_specialize, - typename std::conditional<(std::is_same<first_specialize, void>::value && - !std::is_same<next_specialize, void>::value), - next_specialize, first_specialize>::type>::type; + std::conditional_t<(std::is_void<first_specialize>::value && + !std::is_void<next_specialize>::value), + next_specialize, first_specialize>>; using value_type = typename CommonViewValueType<specialize, first_value_type, next_value_type>::value_type; @@ -2059,7 +1961,17 @@ template <class... Views> using DeducedCommonPropsType = typename Impl::DeduceCommonViewAllocProp<Views...>::prop_type; -// User function +// This function is required in certain scenarios where users customize +// Kokkos View internals. One example are dynamic length embedded ensemble +// types. The function is used to propagate necessary information +// (like the ensemble size) when creating new views. +// However, most of the time it is called with a single view. +// Furthermore, the propagated information is not just for view allocations. +// From what I can tell, the type of functionality provided by +// common_view_alloc_prop is the equivalent of propagating accessors in mdspan, +// a mechanism we will eventually use to replace this clunky approach here, when +// we are finally mdspan based. +// TODO: get rid of this when we have mdspan template <class... Views> KOKKOS_INLINE_FUNCTION DeducedCommonPropsType<Views...> common_view_alloc_prop( Views const&... views) { diff --git a/packages/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp b/packages/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp index dbb557c13743fa79235ba3786a367b1ab2ac7adc..fafd825df297123e100ccf008069f24e4a2cf1e5 100644 --- a/packages/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp +++ b/packages/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_WORKGRAPHPOLICY_HPP #define KOKKOS_WORKGRAPHPOLICY_HPP @@ -243,7 +252,7 @@ class WorkGraphPolicy : public Kokkos::Impl::PolicyTraits<Properties...> { } // namespace Kokkos #ifdef KOKKOS_ENABLE_SERIAL -#include "impl/Kokkos_Serial_WorkGraphPolicy.hpp" +#include "Serial/Kokkos_Serial_WorkGraphPolicy.hpp" #endif #ifdef KOKKOS_ENABLE_OPENMP diff --git a/packages/kokkos/core/src/Kokkos_hwloc.hpp b/packages/kokkos/core/src/Kokkos_hwloc.hpp index 23fa0a0c67001161cb510e6904455091cc4b5986..abbec5409e067fae0bce73d3fb37d705a1a421aa 100644 --- a/packages/kokkos/core/src/Kokkos_hwloc.hpp +++ b/packages/kokkos/core/src/Kokkos_hwloc.hpp @@ -42,6 +42,15 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif #ifndef KOKKOS_HWLOC_HPP #define KOKKOS_HWLOC_HPP diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC.cpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f3216095be34b3898124637123ee2cac6cbc25fa --- /dev/null +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC.cpp @@ -0,0 +1,98 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#define KOKKOS_IMPL_PUBLIC_INCLUDE + +#include <OpenACC/Kokkos_OpenACC.hpp> +#include <OpenACC/Kokkos_OpenACC_Instance.hpp> +#include <impl/Kokkos_Profiling.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> + +#include <ostream> + +Kokkos::Experimental::OpenACC::OpenACC() + : m_space_instance(Impl::OpenACCInternal::singleton()) {} + +void Kokkos::Experimental::OpenACC::impl_initialize( + InitializationSettings const& settings) { + Impl::OpenACCInternal::singleton()->initialize(settings); +} + +void Kokkos::Experimental::OpenACC::impl_finalize() { + Impl::OpenACCInternal::singleton()->finalize(); +} + +bool Kokkos::Experimental::OpenACC::impl_is_initialized() { + return Impl::OpenACCInternal::singleton()->is_initialized(); +} + +void Kokkos::Experimental::OpenACC::print_configuration(std::ostream& os, + bool verbose) const { + os << "macro KOKKOS_ENABLE_OPENACC is defined\n"; // FIXME_OPENACC + m_space_instance->print_configuration(os, verbose); +} + +void Kokkos::Experimental::OpenACC::fence(std::string const& name) const { + Impl::OpenACCInternal::singleton()->fence(name); +} + +void Kokkos::Experimental::OpenACC::impl_static_fence(std::string const& name) { + Kokkos::Tools::Experimental::Impl::profile_fence_event< + Kokkos::Experimental::OpenACC>( + name, + Kokkos::Tools::Experimental::SpecialSynchronizationCases:: + GlobalDeviceSynchronization, + [&]() { acc_wait_all(); }); +} + +uint32_t Kokkos::Experimental::OpenACC::impl_instance_id() const noexcept { + return m_space_instance->instance_id(); +} + +namespace Kokkos { +namespace Impl { +int g_openacc_space_factory_initialized = + initialize_space_factory<Experimental::OpenACC>("170_OpenACC"); +} // namespace Impl +} // Namespace Kokkos diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC.hpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3ad59057b5158772e14e94e0a4cd216d1236451b --- /dev/null +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC.hpp @@ -0,0 +1,126 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif + +#ifndef KOKKOS_OPENACC_HPP +#define KOKKOS_OPENACC_HPP + +#include <OpenACC/Kokkos_OpenACCSpace.hpp> +#include <Kokkos_Concepts.hpp> +#include <Kokkos_Layout.hpp> +#include <Kokkos_ScratchSpace.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> +#include <impl/Kokkos_Profiling_Interface.hpp> +#include <OpenACC/Kokkos_OpenACC_Traits.hpp> + +#include <openacc.h> + +#include <iosfwd> +#include <string> + +namespace Kokkos::Experimental::Impl { +class OpenACCInternal; +} + +namespace Kokkos::Experimental { + +class OpenACC { + Impl::OpenACCInternal* m_space_instance = nullptr; + + public: + using execution_space = OpenACC; + using memory_space = OpenACCSpace; + using device_type = Kokkos::Device<execution_space, memory_space>; + + using array_layout = LayoutLeft; + using size_type = memory_space::size_type; + + using scratch_memory_space = ScratchMemorySpace<OpenACC>; + + OpenACC(); + + static void impl_initialize(InitializationSettings const& settings); + static void impl_finalize(); + static bool impl_is_initialized(); + + void print_configuration(std::ostream& os, bool verbose = false) const; + + void fence(std::string const& name = + "Kokkos::OpenACC::fence(): Unnamed Instance Fence") const; + static void impl_static_fence(std::string const& name); + + static char const* name() { return "OpenACC"; } + static int concurrency() { return 256000; } // FIXME_OPENACC + static bool in_parallel() { return acc_on_device(acc_device_not_host); } + uint32_t impl_instance_id() const noexcept; +}; + +} // namespace Kokkos::Experimental + +template <> +struct Kokkos::Tools::Experimental::DeviceTypeTraits< + ::Kokkos::Experimental::OpenACC> { + static constexpr DeviceType id = + ::Kokkos::Profiling::Experimental::DeviceType::OpenACC; + // FIXME_OPENACC: Need to return the device id from the execution space + // instance. In fact, acc_get_device_num() will return the same value as the + // device id from the execution space instance except for the host fallback + // case, where the device id may need to be updated with the value of + // acc_get_device_num(). + static int device_id(const Kokkos::Experimental::OpenACC&) { + using Kokkos::Experimental::Impl::OpenACC_Traits; + return acc_get_device_num(OpenACC_Traits::dev_type); + } +}; + +#endif diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bc2ba181562927bfa9154a7f584080384ed177ca --- /dev/null +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp @@ -0,0 +1,222 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#define KOKKOS_IMPL_PUBLIC_INCLUDE + +#include <OpenACC/Kokkos_OpenACC.hpp> +#include <OpenACC/Kokkos_OpenACCSpace.hpp> +#include <impl/Kokkos_MemorySpace.hpp> +#include <impl/Kokkos_Profiling_Interface.hpp> + +#include <openacc.h> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +void *Kokkos::Experimental::OpenACCSpace::allocate( + const Kokkos::Experimental::OpenACC &exec_space, + const size_t arg_alloc_size) const { + return allocate(exec_space, "[unlabeled]", arg_alloc_size); +} + +void *Kokkos::Experimental::OpenACCSpace::allocate( + const size_t arg_alloc_size) const { + return allocate("[unlabeled]", arg_alloc_size); +} + +void *Kokkos::Experimental::OpenACCSpace::allocate( + const Kokkos::Experimental::OpenACC &exec_space, const char *arg_label, + const size_t arg_alloc_size, const size_t arg_logical_size) const { + return impl_allocate(exec_space, arg_label, arg_alloc_size, arg_logical_size); +} + +void *Kokkos::Experimental::OpenACCSpace::allocate( + const char *arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return impl_allocate(arg_label, arg_alloc_size, arg_logical_size); +} + +void *Kokkos::Experimental::OpenACCSpace::impl_allocate( + const Kokkos::Experimental::OpenACC &exec_space, const char *arg_label, + const size_t arg_alloc_size, const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + static_assert(sizeof(void *) == sizeof(uintptr_t), + "Error sizeof(void*) != sizeof(uintptr_t)"); + + void *ptr = nullptr; + + // FIXME_OPENACC multiple device instances are not yet supported, and thus + // exec_space is ignored for now. + (void)exec_space; + + ptr = acc_malloc(arg_alloc_size); + + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + Kokkos::Profiling::allocateData(arg_handle, arg_label, ptr, reported_size); + } + + return ptr; +} + +void *Kokkos::Experimental::OpenACCSpace::impl_allocate( + const char *arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + static_assert(sizeof(void *) == sizeof(uintptr_t), + "Error sizeof(void*) != sizeof(uintptr_t)"); + + void *ptr = nullptr; + + //[DEBUG] Disabled due to the synchronous behavior of the current + // implementation. + /* + OpenACC::impl_static_fence( + "Kokkos::OpenACCSpace::impl_allocate: Pre OpenACC Allocation"); + */ + + ptr = acc_malloc(arg_alloc_size); + + //[DEBUG] Disabled due to the synchronous behavior of the current + // implementation. + /* + OpenACC::impl_static_fence( + "Kokkos::OpenACCSpace::impl_allocate: Post OpenACC Allocation"); + */ + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + Kokkos::Profiling::allocateData(arg_handle, arg_label, ptr, reported_size); + } + + return ptr; +} + +void Kokkos::Experimental::OpenACCSpace::deallocate( + void *const arg_alloc_ptr, const size_t arg_alloc_size) const { + deallocate("[unlabeled]", arg_alloc_ptr, arg_alloc_size); +} + +void Kokkos::Experimental::OpenACCSpace::deallocate( + const char *arg_label, void *const arg_alloc_ptr, + const size_t arg_alloc_size, const size_t arg_logical_size) const { + impl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size); +} + +void Kokkos::Experimental::OpenACCSpace::impl_deallocate( + const char *arg_label, void *const arg_alloc_ptr, + const size_t arg_alloc_size, const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, + reported_size); + } + + if (arg_alloc_ptr) { + acc_free(arg_alloc_ptr); + } +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#ifdef KOKKOS_ENABLE_DEBUG +Kokkos::Impl::SharedAllocationRecord<void, void> SharedAllocationRecord< + Kokkos::Experimental::OpenACCSpace, void>::s_root_record; +#endif + +Kokkos::Impl::SharedAllocationRecord<Kokkos::Experimental::OpenACCSpace, + void>::~SharedAllocationRecord() { + m_space.deallocate(m_label.c_str(), + SharedAllocationRecord<void, void>::m_alloc_ptr, + (SharedAllocationRecord<void, void>::m_alloc_size - + sizeof(SharedAllocationHeader))); +} + +Kokkos::Impl::SharedAllocationRecord<Kokkos::Experimental::OpenACCSpace, void>:: + SharedAllocationRecord( + const Kokkos::Experimental::OpenACCSpace &arg_space, + const std::string &arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord<void, void>::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::Experimental::OpenACCSpace, + void>::s_root_record, +#endif + Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_space(arg_space) { + SharedAllocationHeader header; + + this->base_t::_fill_host_accessible_header_info(header, arg_label); + + Kokkos::Impl::DeepCopy<Experimental::OpenACCSpace, HostSpace>( + RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader)); + Kokkos::fence( + "SharedAllocationRecord<Kokkos::Experimental::OpenACCSpace, " + "void>::SharedAllocationRecord(): fence after copying header from " + "HostSpace"); +} + +//============================================================================== +// <editor-fold desc="Explicit instantiations of CRTP Base classes"> {{{1 + +#include <impl/Kokkos_SharedAlloc_timpl.hpp> + +// To avoid additional compilation cost for something that's (mostly?) not +// performance sensitive, we explicitly instantiate these CRTP base classes +// here, where we have access to the associated *_timpl.hpp header files. +template class Kokkos::Impl::HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::OpenACCSpace>; +template class Kokkos::Impl::SharedAllocationRecordCommon< + Kokkos::Experimental::OpenACCSpace>; + +// </editor-fold> end Explicit instantiations of CRTP Base classes }}}1 +//============================================================================== diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.hpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a7347e8f91d111dcd6d24eb06c54c9181874903e --- /dev/null +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.hpp @@ -0,0 +1,249 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include <Kokkos_Macros.hpp> +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#else +KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.") +#endif +#endif + +#ifndef KOKKOS_OPENACC_SPACE_HPP +#define KOKKOS_OPENACC_SPACE_HPP + +#include <Kokkos_Concepts.hpp> + +#include <impl/Kokkos_Tools.hpp> +#include <impl/Kokkos_SharedAlloc.hpp> + +#include <openacc.h> +#include <iosfwd> + +namespace Kokkos::Experimental { + +class OpenACC; + +class OpenACCSpace { + public: + using memory_space = OpenACCSpace; + using execution_space = OpenACC; + using device_type = Kokkos::Device<execution_space, memory_space>; + + using size_type = size_t; + + OpenACCSpace() = default; + + /**\brief Allocate untracked memory in the space */ + void* allocate(const Kokkos::Experimental::OpenACC& exec_space, + const size_t arg_alloc_size) const; + void* allocate(const Kokkos::Experimental::OpenACC& exec_space, + const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; + void* allocate(const size_t arg_alloc_size) const; + void* allocate(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; + + /**\brief Deallocate untracked memory in the space */ + void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const; + void deallocate(const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; + + static constexpr char const* name() { return "OpenACCSpace"; } + + private: + void* impl_allocate(const Kokkos::Experimental::OpenACC& exec_space, + const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; + void* impl_allocate(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; + void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; +}; + +} // namespace Kokkos::Experimental + +/*--------------------------------------------------------------------------*/ + +template <> +struct Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace, + Kokkos::Experimental::OpenACCSpace> { + enum : bool { assignable = false }; + enum : bool { accessible = false }; + enum : bool { deepcopy = true }; +}; + +template <> +struct Kokkos::Impl::MemorySpaceAccess<Kokkos::Experimental::OpenACCSpace, + Kokkos::HostSpace> { + enum : bool { assignable = false }; + enum : bool { accessible = false }; + enum : bool { deepcopy = true }; +}; + +template <> +struct Kokkos::Impl::MemorySpaceAccess<Kokkos::Experimental::OpenACCSpace, + Kokkos::Experimental::OpenACCSpace> { + enum : bool { assignable = true }; + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; +/*--------------------------------------------------------------------------*/ + +template <> +class Kokkos::Impl::SharedAllocationRecord<Kokkos::Experimental::OpenACCSpace, + void> + : public HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::OpenACCSpace> { + private: + friend class HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::OpenACCSpace>; + friend class SharedAllocationRecordCommon<Kokkos::Experimental::OpenACCSpace>; + friend Kokkos::Experimental::OpenACCSpace; + + using base_t = HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::OpenACCSpace>; + using RecordBase = SharedAllocationRecord<void, void>; + + SharedAllocationRecord(const SharedAllocationRecord&) = delete; + SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; + + /**\brief Root record for tracked allocations from this OpenACCSpace + * instance */ + static RecordBase s_root_record; + + const Kokkos::Experimental::OpenACCSpace m_space; + + protected: + ~SharedAllocationRecord(); + SharedAllocationRecord() = default; + + template <typename ExecutionSpace> + SharedAllocationRecord( + const ExecutionSpace& /*exec_space*/, + const Kokkos::Experimental::OpenACCSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &deallocate) + : SharedAllocationRecord(arg_space, arg_label, arg_alloc_size, + arg_dealloc) {} + + SharedAllocationRecord( + const Kokkos::Experimental::OpenACCSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &deallocate); + + public: + KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate( + const Kokkos::Experimental::OpenACCSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size) { + if (acc_on_device(acc_device_host)) { + return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); + } else { + return nullptr; + } + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +// FIXME_OPENACC: Need to update the DeepCopy implementations below to support +// multiple execution space instances. +// The current OpenACC backend implementation assumes that there is only one +// device execution space instance, and all the device operations (e.g., memory +// transfers, kernel launches, etc.) are implemented to be synchronous, which +// does not violate the Kokkos execution semantics with the single execution +// space instance. +template <class ExecutionSpace> +struct Kokkos::Impl::DeepCopy<Kokkos::Experimental::OpenACCSpace, + Kokkos::Experimental::OpenACCSpace, + ExecutionSpace> { + DeepCopy(void* dst, const void* src, size_t n) { + // The behavior of acc_memcpy_device when bytes argument is zero is + // clarified only in the latest OpenACC specification (V3.2), and thus the + // value checking is added as a safeguard. (The current NVHPC (V22.5) + // supports OpenACC V2.7.) + if (n > 0) acc_memcpy_device(dst, const_cast<void*>(src), n); + } + DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) { + exec.fence(); + if (n > 0) acc_memcpy_device(dst, const_cast<void*>(src), n); + } +}; + +template <class ExecutionSpace> +struct Kokkos::Impl::DeepCopy<Kokkos::Experimental::OpenACCSpace, + Kokkos::HostSpace, ExecutionSpace> { + DeepCopy(void* dst, const void* src, size_t n) { + if (n > 0) acc_memcpy_to_device(dst, const_cast<void*>(src), n); + } + DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) { + exec.fence(); + if (n > 0) acc_memcpy_to_device(dst, const_cast<void*>(src), n); + } +}; + +template <class ExecutionSpace> +struct Kokkos::Impl::DeepCopy< + Kokkos::HostSpace, Kokkos::Experimental::OpenACCSpace, ExecutionSpace> { + DeepCopy(void* dst, const void* src, size_t n) { + if (n > 0) acc_memcpy_from_device(dst, const_cast<void*>(src), n); + } + DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) { + exec.fence(); + if (n > 0) acc_memcpy_from_device(dst, const_cast<void*>(src), n); + } +}; + +#endif diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Instance.cpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Instance.cpp new file mode 100644 index 0000000000000000000000000000000000000000..15d38803f9c5be559f96cf1aad9a663cd207be03 --- /dev/null +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Instance.cpp @@ -0,0 +1,118 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#define KOKKOS_IMPL_PUBLIC_INCLUDE + +#include <OpenACC/Kokkos_OpenACC_Instance.hpp> +#include <OpenACC/Kokkos_OpenACC.hpp> +#include <OpenACC/Kokkos_OpenACC_Traits.hpp> +#include <impl/Kokkos_Profiling.hpp> +#include <impl/Kokkos_DeviceManagement.hpp> + +#include <openacc.h> + +#include <iostream> + +namespace Kokkos { +bool show_warnings() noexcept; +} + +Kokkos::Experimental::Impl::OpenACCInternal* +Kokkos::Experimental::Impl::OpenACCInternal::singleton() { + static OpenACCInternal self; + return &self; +} + +void Kokkos::Experimental::Impl::OpenACCInternal::initialize( + InitializationSettings const& settings) { + if (OpenACC_Traits::may_fallback_to_host && + acc_get_num_devices(OpenACC_Traits::dev_type) == 0 && + !settings.has_device_id()) { + if (show_warnings()) { + std::cerr << "Warning: No GPU available for execution, falling back to" + " using the host!" + << std::endl; + } + acc_set_device_type(acc_device_host); + // FIXME_OPENACC if multiple execution space instances are supported, + // device id variable should be explicitly set to the value returned by + // acc_get_device_num(acc_device_host). + } else { + using Kokkos::Impl::get_gpu; + int const dev_num = get_gpu(settings); + acc_set_device_num(dev_num, OpenACC_Traits::dev_type); + } + m_is_initialized = true; +} + +void Kokkos::Experimental::Impl::OpenACCInternal::finalize() { + m_is_initialized = false; +} + +bool Kokkos::Experimental::Impl::OpenACCInternal::is_initialized() const { + return m_is_initialized; +} + +void Kokkos::Experimental::Impl::OpenACCInternal::print_configuration( + std::ostream& os, bool /*verbose*/) const { + os << "Using OpenACC\n"; // FIXME_OPENACC +} + +void Kokkos::Experimental::Impl::OpenACCInternal::fence( + std::string const& name) const { + Kokkos::Tools::Experimental::Impl::profile_fence_event< + Kokkos::Experimental::OpenACC>( + name, + Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{instance_id()}, + [&]() { + //[DEBUG] disabled due to synchronous behaviors of the current + // parallel construct implementations. acc_wait_all(); + }); +} + +uint32_t Kokkos::Experimental::Impl::OpenACCInternal::instance_id() const + noexcept { + return Kokkos::Tools::Experimental::Impl::idForInstance< + Kokkos::Experimental::OpenACC>(reinterpret_cast<uintptr_t>(this)); +} diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Instance.hpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Instance.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cb69b4ae7a0524739105c4bb5c835ac365b85a1c --- /dev/null +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Instance.hpp @@ -0,0 +1,79 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENACC_INSTANCE_HPP +#define KOKKOS_OPENACC_INSTANCE_HPP + +#include <impl/Kokkos_InitializationSettings.hpp> + +#include <cstdint> +#include <iosfwd> +#include <string> + +namespace Kokkos::Experimental::Impl { + +class OpenACCInternal { + bool m_is_initialized = false; + + OpenACCInternal() = default; + OpenACCInternal(const OpenACCInternal&) = default; + OpenACCInternal& operator=(const OpenACCInternal&) = default; + + public: + static OpenACCInternal* singleton(); + + void initialize(InitializationSettings const& settings); + void finalize(); + bool is_initialized() const; + + void print_configuration(std::ostream& os, bool verbose = false) const; + + void fence(std::string const& name) const; + + uint32_t instance_id() const noexcept; +}; + +} // namespace Kokkos::Experimental::Impl + +#endif diff --git a/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f9451ecfe67d0a4e98e2c9378a6fd414130f9395 --- /dev/null +++ b/packages/kokkos/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENACC_TRAITS_HPP +#define KOKKOS_OPENACC_TRAITS_HPP + +#include <openacc.h> + +namespace Kokkos::Experimental::Impl { + +struct OpenACC_Traits { +#if defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ + defined(KOKKOS_ARCH_AMPERE) + static constexpr acc_device_t dev_type = acc_device_nvidia; + static constexpr bool may_fallback_to_host = false; +#else + static constexpr acc_device_t dev_type = acc_device_not_host; + static constexpr bool may_fallback_to_host = true; +#endif +}; + +} // namespace Kokkos::Experimental::Impl + +#endif diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp similarity index 72% rename from packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp rename to packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp index 66dbbacce9bc8d00f0d8ce6719fcb96f9a0fcfcc..2397aa4785b88a236862f14f814ae147d5bdcf1c 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp @@ -42,8 +42,9 @@ //@HEADER */ -#include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_OPENMP) +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif #include <cstdio> #include <cstdlib> @@ -57,19 +58,21 @@ #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_CPUDiscovery.hpp> #include <impl/Kokkos_Tools.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> namespace Kokkos { namespace Impl { int g_openmp_hardware_max_threads = 1; -__thread int t_openmp_hardware_id = 0; -__thread Impl::OpenMPExec *t_openmp_instance = nullptr; +thread_local int t_openmp_hardware_id = 0; +// FIXME_OPENMP we can remove this after we remove partition_master +thread_local OpenMPInternal *t_openmp_instance = nullptr; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 -void OpenMPExec::validate_partition_impl(const int nthreads, - int &num_partitions, - int &partition_size) { +void OpenMPInternal::validate_partition_impl(const int nthreads, + int &num_partitions, + int &partition_size) { if (nthreads == 1) { num_partitions = 1; partition_size = 1; @@ -121,24 +124,7 @@ void OpenMPExec::validate_partition_impl(const int nthreads, } #endif -void OpenMPExec::verify_is_master(const char *const label) { - if (!t_openmp_instance) { - std::string msg(label); - msg.append(" ERROR: in parallel or not initialized"); - Kokkos::Impl::throw_runtime_exception(msg); - } -} - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -void OpenMPExec::clear_thread_data() { +void OpenMPInternal::clear_thread_data() { const size_t member_bytes = sizeof(int64_t) * HostThreadTeamData::align_to_int64(sizeof(HostThreadTeamData)); @@ -163,10 +149,10 @@ void OpenMPExec::clear_thread_data() { /* END #pragma omp parallel */ } -void OpenMPExec::resize_thread_data(size_t pool_reduce_bytes, - size_t team_reduce_bytes, - size_t team_shared_bytes, - size_t thread_local_bytes) { +void OpenMPInternal::resize_thread_data(size_t pool_reduce_bytes, + size_t team_reduce_bytes, + size_t team_shared_bytes, + size_t thread_local_bytes) { const size_t member_bytes = sizeof(int64_t) * HostThreadTeamData::align_to_int64(sizeof(HostThreadTeamData)); @@ -243,17 +229,16 @@ void OpenMPExec::resize_thread_data(size_t pool_reduce_bytes, } } -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { +OpenMPInternal &OpenMPInternal::singleton() { + static OpenMPInternal *self = nullptr; + if (self == nullptr) { + self = new OpenMPInternal(get_current_max_threads()); + } -//---------------------------------------------------------------------------- + return *self; +} -int OpenMP::impl_get_current_max_threads() noexcept { +int OpenMPInternal::get_current_max_threads() noexcept { // Using omp_get_max_threads(); is problematic in conjunction with // Hwloc on Intel (essentially an initial call to the OpenMP runtime // without a parallel region before will set a process mask for a single core @@ -272,16 +257,17 @@ int OpenMP::impl_get_current_max_threads() noexcept { return count; } -void OpenMP::impl_initialize(int thread_count) { +void OpenMPInternal::initialize(int thread_count) { + if (m_initialized) { + Kokkos::abort( + "Calling OpenMP::initialize after OpenMP::finalize is illegal\n"); + } + if (omp_in_parallel()) { std::string msg("Kokkos::OpenMP::initialize ERROR : in parallel"); Kokkos::Impl::throw_runtime_exception(msg); } - if (Impl::t_openmp_instance) { - finalize(); - } - { if (Kokkos::show_warnings() && nullptr == std::getenv("OMP_PROC_BIND")) { printf( @@ -299,7 +285,7 @@ void OpenMP::impl_initialize(int thread_count) { // Before any other call to OMP query the maximum number of threads // and save the value for re-initialization unit testing. - Impl::g_openmp_hardware_max_threads = impl_get_current_max_threads(); + Impl::g_openmp_hardware_max_threads = get_current_max_threads(); int process_num_threads = Impl::g_openmp_hardware_max_threads; @@ -335,21 +321,12 @@ void OpenMP::impl_initialize(int thread_count) { // setup thread local #pragma omp parallel num_threads(Impl::g_openmp_hardware_max_threads) { - Impl::t_openmp_instance = nullptr; Impl::t_openmp_hardware_id = omp_get_thread_num(); Impl::SharedAllocationRecord<void, void>::tracking_enable(); } - void *ptr = nullptr; - try { - ptr = space.allocate(sizeof(Impl::OpenMPExec)); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const &f) { - // For now, just rethrow the error message the existing way - Kokkos::Impl::throw_runtime_exception(f.get_error_message()); - } - - Impl::t_openmp_instance = - new (ptr) Impl::OpenMPExec(Impl::g_openmp_hardware_max_threads); + auto &instance = OpenMPInternal::singleton(); + instance.m_pool_size = Impl::g_openmp_hardware_max_threads; // New, unified host thread team data: { @@ -358,9 +335,8 @@ void OpenMP::impl_initialize(int thread_count) { size_t team_shared_bytes = 1024 * thread_count; size_t thread_local_bytes = 1024; - Impl::t_openmp_instance->resize_thread_data( - pool_reduce_bytes, team_reduce_bytes, team_shared_bytes, - thread_local_bytes); + instance.resize_thread_data(pool_reduce_bytes, team_reduce_bytes, + team_shared_bytes, thread_local_bytes); } } @@ -380,38 +356,31 @@ void OpenMP::impl_initialize(int thread_count) { << thread_count << " threads per process." << std::endl; } // Init the array for used for arbitrarily sized atomics - Impl::init_lock_array_host_space(); -} + init_lock_array_host_space(); -//---------------------------------------------------------------------------- + m_initialized = true; +} -void OpenMP::impl_finalize() { +void OpenMPInternal::finalize() { if (omp_in_parallel()) { std::string msg("Kokkos::OpenMP::finalize ERROR "); - if (!Impl::t_openmp_instance) msg.append(": not initialized"); + if (this != &singleton()) msg.append(": not initialized"); if (omp_in_parallel()) msg.append(": in parallel"); Kokkos::Impl::throw_runtime_exception(msg); } - if (Impl::t_openmp_instance) { + if (this == &singleton()) { + auto const &instance = singleton(); // Silence Cuda Warning - const int nthreads = Impl::t_openmp_instance->m_pool_size <= - Impl::g_openmp_hardware_max_threads - ? Impl::g_openmp_hardware_max_threads - : Impl::t_openmp_instance->m_pool_size; + const int nthreads = + instance.m_pool_size <= Impl::g_openmp_hardware_max_threads + ? Impl::g_openmp_hardware_max_threads + : instance.m_pool_size; (void)nthreads; - using Exec = Impl::OpenMPExec; - Exec *instance = Impl::t_openmp_instance; - instance->~Exec(); - - OpenMP::memory_space space; - space.deallocate(instance, sizeof(Exec)); - #pragma omp parallel num_threads(nthreads) { Impl::t_openmp_hardware_id = 0; - Impl::t_openmp_instance = nullptr; Impl::SharedAllocationRecord<void, void>::tracking_disable(); } @@ -421,19 +390,15 @@ void OpenMP::impl_finalize() { Impl::g_openmp_hardware_max_threads = 1; } + m_initialized = false; + Kokkos::Profiling::finalize(); } -//---------------------------------------------------------------------------- - -void OpenMP::print_configuration(std::ostream &s, const bool /*verbose*/) { +void OpenMPInternal::print_configuration(std::ostream &s) const { s << "Kokkos::OpenMP"; - const bool is_initialized = Impl::t_openmp_instance != nullptr; - - if (is_initialized) { - Impl::OpenMPExec::verify_is_master("OpenMP::print_configuration"); - + if (m_initialized) { const int numa_count = 1; const int core_per_numa = Impl::g_openmp_hardware_max_threads; const int thread_per_core = 1; @@ -445,67 +410,75 @@ void OpenMP::print_configuration(std::ostream &s, const bool /*verbose*/) { } } -std::vector<OpenMP> OpenMP::partition(...) { return std::vector<OpenMP>(1); } - -OpenMP OpenMP::create_instance(...) { return OpenMP(); } +bool OpenMPInternal::verify_is_initialized(const char *const label) const { + if (!m_initialized) { + std::cerr << "Kokkos::OpenMP " << label + << " : ERROR OpenMP is not initialized" << std::endl; + } + return m_initialized; +} +} // namespace Impl -int OpenMP::concurrency() { return Impl::g_openmp_hardware_max_threads; } +//---------------------------------------------------------------------------- -void OpenMP::fence() const { - fence("Kokkos::OpenMP::fence: Unnamed Instance Fence"); +OpenMP::OpenMP() +#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS + : m_space_instance(&Impl::OpenMPInternal::singleton()) { } -void OpenMP::fence(const std::string &name) const { - Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>( - name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, []() {}); +#else + : m_space_instance(&Impl::OpenMPInternal::singleton(), + [](Impl::OpenMPInternal *) {}) { + Impl::OpenMPInternal::singleton().verify_is_initialized( + "OpenMP instance constructor"); } +#endif -namespace Impl { - -int g_openmp_space_factory_initialized = - initialize_space_factory<OpenMPSpaceInitializer>("050_OpenMP"); - -void OpenMPSpaceInitializer::initialize(const InitArguments &args) { - // Prevent "unused variable" warning for 'args' input struct. If - // Serial::initialize() ever needs to take arguments from the input - // struct, you may remove this line of code. - const int num_threads = args.num_threads; - - if (std::is_same<Kokkos::OpenMP, Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::OpenMP, Kokkos::HostSpace::execution_space>::value) { - Kokkos::OpenMP::impl_initialize(num_threads); - } else { - // std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not - // initialized" << std::endl ; - } +int OpenMP::impl_get_current_max_threads() noexcept { + return Impl::OpenMPInternal::get_current_max_threads(); } -void OpenMPSpaceInitializer::finalize(const bool) { - if (Kokkos::OpenMP::impl_is_initialized()) Kokkos::OpenMP::impl_finalize(); +void OpenMP::impl_initialize(InitializationSettings const &settings) { + Impl::OpenMPInternal::singleton().initialize( + settings.has_num_threads() ? settings.get_num_threads() : -1); } -void OpenMPSpaceInitializer::fence() { Kokkos::OpenMP::impl_static_fence(); } -void OpenMPSpaceInitializer::fence(const std::string &name) { - Kokkos::OpenMP::impl_static_fence(OpenMP(), name); -} +void OpenMP::impl_finalize() { Impl::OpenMPInternal::singleton().finalize(); } -void OpenMPSpaceInitializer::print_configuration(std::ostream &msg, - const bool detail) { - msg << "Host Parallel Execution Space:" << std::endl; - msg << " KOKKOS_ENABLE_OPENMP: "; - msg << "yes" << std::endl; +void OpenMP::print_configuration(std::ostream &os, bool /*verbose*/) const { + os << "Host Parallel Execution Space:\n"; + os << " KOKKOS_ENABLE_OPENMP: yes\n"; - msg << "OpenMP Atomics:" << std::endl; - msg << " KOKKOS_ENABLE_OPENMP_ATOMICS: "; + os << "OpenMP Atomics:\n"; + os << " KOKKOS_ENABLE_OPENMP_ATOMICS: "; #ifdef KOKKOS_ENABLE_OPENMP_ATOMICS - msg << "yes" << std::endl; + os << "yes\n"; #else - msg << "no" << std::endl; + os << "no\n"; #endif - msg << "\nOpenMP Runtime Configuration:" << std::endl; - OpenMP::print_configuration(msg, detail); + os << "\nOpenMP Runtime Configuration:\n"; + + m_space_instance->print_configuration(os); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +std::vector<OpenMP> OpenMP::partition(...) { return std::vector<OpenMP>(1); } + +OpenMP OpenMP::create_instance(...) { return OpenMP(); } +#endif + +int OpenMP::concurrency() { return Impl::g_openmp_hardware_max_threads; } + +void OpenMP::fence(const std::string &name) const { + Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>( + name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, []() {}); +} + +namespace Impl { + +int g_openmp_space_factory_initialized = + initialize_space_factory<OpenMP>("050_OpenMP"); + } // namespace Impl #ifdef KOKKOS_ENABLE_CXX14 @@ -517,7 +490,3 @@ constexpr DeviceType DeviceTypeTraits<OpenMP>::id; #endif } // namespace Kokkos - -#else -void KOKKOS_CORE_SRC_OPENMP_EXEC_PREVENT_LINK_ERROR() {} -#endif // KOKKOS_ENABLE_OPENMP diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp similarity index 80% rename from packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp rename to packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp index ede24d1094f1582f132b82ba6668cc7264c99086..1a2ee95a79e079b8918767fc409f1286a590ea85 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp @@ -42,12 +42,10 @@ //@HEADER */ -#ifndef KOKKOS_OPENMPEXEC_HPP -#define KOKKOS_OPENMPEXEC_HPP +#ifndef KOKKOS_OPENMP_INSTANCE_HPP +#define KOKKOS_OPENMP_INSTANCE_HPP #include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_OPENMP) - #if !defined(_OPENMP) && !defined(__CUDA_ARCH__) && \ !defined(__HIP_DEVICE_COMPILE__) && !defined(__SYCL_DEVICE_ONLY__) #error \ @@ -66,27 +64,45 @@ #include <omp.h> -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - namespace Kokkos { namespace Impl { -class OpenMPExec; +class OpenMPInternal; extern int g_openmp_hardware_max_threads; -extern __thread int t_openmp_hardware_id; -extern __thread OpenMPExec* t_openmp_instance; +extern thread_local int t_openmp_hardware_id; +// FIXME_OPENMP we can remove this after we remove partition_master +extern thread_local OpenMPInternal* t_openmp_instance; + +struct OpenMPTraits { + static int constexpr MAX_THREAD_COUNT = 512; +}; + +class OpenMPInternal { + private: + OpenMPInternal(int arg_pool_size) + : m_pool_size{arg_pool_size}, m_level{omp_get_level()}, m_pool() {} + + ~OpenMPInternal() { clear_thread_data(); } + + static int get_current_max_threads() noexcept; -//---------------------------------------------------------------------------- -/** \brief Data for OpenMP thread execution */ + bool m_initialized = false; + + int m_pool_size; + int m_level; + + HostThreadTeamData* m_pool[OpenMPTraits::MAX_THREAD_COUNT]; -class OpenMPExec { public: friend class Kokkos::OpenMP; - enum { MAX_THREAD_COUNT = 512 }; + static OpenMPInternal& singleton(); + + void initialize(int thread_cound); + + void finalize(); void clear_thread_data(); @@ -100,65 +116,58 @@ class OpenMPExec { int& partition_size); #endif - private: - OpenMPExec(int arg_pool_size) - : m_pool_size{arg_pool_size}, m_level{omp_get_level()}, m_pool() {} - - ~OpenMPExec() { clear_thread_data(); } - - int m_pool_size; - int m_level; - - HostThreadTeamData* m_pool[MAX_THREAD_COUNT]; - - public: - static void verify_is_master(const char* const); - void resize_thread_data(size_t pool_reduce_bytes, size_t team_reduce_bytes, size_t team_shared_bytes, size_t thread_local_bytes); - inline HostThreadTeamData* get_thread_data() const noexcept { + HostThreadTeamData* get_thread_data() const noexcept { return m_pool[m_level == omp_get_level() ? 0 : omp_get_thread_num()]; } - inline HostThreadTeamData* get_thread_data(int i) const noexcept { + HostThreadTeamData* get_thread_data(int i) const noexcept { return m_pool[i]; } -}; -} // namespace Impl -} // namespace Kokkos + bool is_initialized() const { return m_initialized; } -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- + bool verify_is_initialized(const char* const label) const; -namespace Kokkos { + void print_configuration(std::ostream& s) const; +}; +} // namespace Impl inline bool OpenMP::impl_is_initialized() noexcept { - return Impl::t_openmp_instance != nullptr; + return Impl::OpenMPInternal::singleton().is_initialized(); } inline bool OpenMP::in_parallel(OpenMP const&) noexcept { - // t_openmp_instance is only non-null on a master thread - return !Impl::t_openmp_instance || - Impl::t_openmp_instance->m_level < omp_get_level(); + // FIXME_OPENMP We are forced to use t_openmp_instance because the function is + // static and does not use the OpenMP object + return ((Impl::OpenMPInternal::singleton().m_level < omp_get_level()) && + (!Impl::t_openmp_instance || + Impl::t_openmp_instance->m_level < omp_get_level())); } inline int OpenMP::impl_thread_pool_size() noexcept { - return OpenMP::in_parallel() ? omp_get_num_threads() - : Impl::t_openmp_instance->m_pool_size; + // FIXME_OPENMP We are forced to use t_openmp_instance because the function is + // static + return OpenMP::in_parallel() + ? omp_get_num_threads() + : (Impl::t_openmp_instance + ? Impl::t_openmp_instance->m_pool_size + : Impl::OpenMPInternal::singleton().m_pool_size); } KOKKOS_INLINE_FUNCTION int OpenMP::impl_thread_pool_rank() noexcept { + // FIXME_OPENMP We are forced to use t_openmp_instance because the function is + // static KOKKOS_IF_ON_HOST( (return Impl::t_openmp_instance ? 0 : omp_get_thread_num();)) KOKKOS_IF_ON_DEVICE((return -1;)) } -inline void OpenMP::impl_static_fence(OpenMP const& /**instance*/, - const std::string& name) noexcept { +inline void OpenMP::impl_static_fence(std::string const& name) { Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>( name, Kokkos::Tools::Experimental::SpecialSynchronizationCases:: @@ -179,9 +188,9 @@ KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions, #else if (omp_get_nested()) { #endif - using Exec = Impl::OpenMPExec; + using Exec = Impl::OpenMPInternal; - Exec* prev_instance = Impl::t_openmp_instance; + Exec* prev_instance = &Impl::OpenMPInternal::singleton(); Exec::validate_partition_impl(prev_instance->m_pool_size, num_partitions, partition_size); @@ -190,35 +199,22 @@ KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions, #pragma omp parallel num_threads(num_partitions) { - void* ptr = nullptr; - try { - ptr = space.allocate(sizeof(Exec)); - } catch ( - Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - // For now, just rethrow the error message the existing way - Kokkos::Impl::throw_runtime_exception(failure.get_error_message()); - } - - Impl::t_openmp_instance = new (ptr) Exec(partition_size); + Exec thread_local_instance(partition_size); + Impl::t_openmp_instance = &thread_local_instance; size_t pool_reduce_bytes = 32 * partition_size; size_t team_reduce_bytes = 32 * partition_size; size_t team_shared_bytes = 1024 * partition_size; size_t thread_local_bytes = 1024; - Impl::t_openmp_instance->resize_thread_data( + thread_local_instance.resize_thread_data( pool_reduce_bytes, team_reduce_bytes, team_shared_bytes, thread_local_bytes); omp_set_num_threads(partition_size); f(omp_get_thread_num(), omp_get_num_threads()); - - Impl::t_openmp_instance->~Exec(); - space.deallocate(Impl::t_openmp_instance, sizeof(Exec)); Impl::t_openmp_instance = nullptr; } - - Impl::t_openmp_instance = prev_instance; } else { // nested openmp not enabled f(0, 1); @@ -368,4 +364,3 @@ inline int OpenMP::impl_max_hardware_threads() noexcept { } // namespace Kokkos #endif -#endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */ diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index 764dd906545cf607c1264ff7830db4936bdc4f65..94c465dc2e03c67accd86470ea69d425afcfc1df 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -49,14 +49,26 @@ #if defined(KOKKOS_ENABLE_OPENMP) #include <omp.h> -#include <OpenMP/Kokkos_OpenMP_Exec.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> +#include <OpenMP/Kokkos_OpenMP_Instance.hpp> #include <KokkosExp_MDRangePolicy.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#define KOKKOS_PRAGMA_IVDEP_IF_ENABLED +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#undef KOKKOS_PRAGMA_IVDEP_IF_ENABLED +#define KOKKOS_PRAGMA_IVDEP_IF_ENABLED _Pragma("ivdep") +#endif + +#ifndef KOKKOS_COMPILER_NVHPC +#define KOKKOS_OPENMP_OPTIONAL_CHUNK_SIZE , m_policy.chunk_size() +#else +#define KOKKOS_OPENMP_OPTIONAL_CHUNK_SIZE +#endif + namespace Kokkos { namespace Impl { @@ -68,84 +80,105 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::OpenMP> { using WorkRange = typename Policy::WorkRange; using Member = typename Policy::member_type; - OpenMPExec* m_instance; + OpenMPInternal* m_instance; const FunctorType m_functor; const Policy m_policy; - template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend) { -#ifdef KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif -#endif - for (Member iwork = ibeg; iwork < iend; ++iwork) { - functor(iwork); + inline static void exec_range(const FunctorType& functor, const Member ibeg, + const Member iend) { + KOKKOS_PRAGMA_IVDEP_IF_ENABLED + for (auto iwork = ibeg; iwork < iend; ++iwork) { + exec_work(functor, iwork); } } - template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend) { - const TagType t{}; -#ifdef KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif -#endif - for (Member iwork = ibeg; iwork < iend; ++iwork) { - functor(t, iwork); + template <class Enable = WorkTag> + inline static std::enable_if_t<std::is_void<WorkTag>::value && + std::is_same<Enable, WorkTag>::value> + exec_work(const FunctorType& functor, const Member iwork) { + functor(iwork); + } + + template <class Enable = WorkTag> + inline static std::enable_if_t<!std::is_void<WorkTag>::value && + std::is_same<Enable, WorkTag>::value> + exec_work(const FunctorType& functor, const Member iwork) { + functor(WorkTag{}, iwork); + } + + template <class Policy> + std::enable_if_t<std::is_same<typename Policy::schedule_type::type, + Kokkos::Dynamic>::value> + execute_parallel() const { + // prevent bug in NVHPC 21.9/CUDA 11.4 (entering zero iterations loop) + if (m_policy.begin() >= m_policy.end()) return; +#pragma omp parallel for schedule(dynamic KOKKOS_OPENMP_OPTIONAL_CHUNK_SIZE) \ + num_threads(OpenMP::impl_thread_pool_size()) + KOKKOS_PRAGMA_IVDEP_IF_ENABLED + for (auto iwork = m_policy.begin(); iwork < m_policy.end(); ++iwork) { + exec_work(m_functor, iwork); + } + } + + template <class Policy> + std::enable_if_t<!std::is_same<typename Policy::schedule_type::type, + Kokkos::Dynamic>::value> + execute_parallel() const { +#pragma omp parallel for schedule(static KOKKOS_OPENMP_OPTIONAL_CHUNK_SIZE) \ + num_threads(OpenMP::impl_thread_pool_size()) + KOKKOS_PRAGMA_IVDEP_IF_ENABLED + for (auto iwork = m_policy.begin(); iwork < m_policy.end(); ++iwork) { + exec_work(m_functor, iwork); } } public: inline void execute() const { - enum { - is_dynamic = std::is_same<typename Policy::schedule_type::type, - Kokkos::Dynamic>::value - }; - if (OpenMP::in_parallel()) { - exec_range<WorkTag>(m_functor, m_policy.begin(), m_policy.end()); - } else { - OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); + exec_range(m_functor, m_policy.begin(), m_policy.end()); + return; + } +#ifndef KOKKOS_INTERNAL_DISABLE_NATIVE_OPENMP + execute_parallel<Policy>(); +#else + constexpr bool is_dynamic = + std::is_same<typename Policy::schedule_type::type, + Kokkos::Dynamic>::value; #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) - { - HostThreadTeamData& data = *(m_instance->get_thread_data()); + { + HostThreadTeamData& data = *(m_instance->get_thread_data()); - data.set_work_partition(m_policy.end() - m_policy.begin(), - m_policy.chunk_size()); + data.set_work_partition(m_policy.end() - m_policy.begin(), + m_policy.chunk_size()); - if (is_dynamic) { - // Make sure work partition is set before stealing - if (data.pool_rendezvous()) data.pool_rendezvous_release(); - } + if (is_dynamic) { + // Make sure work partition is set before stealing + if (data.pool_rendezvous()) data.pool_rendezvous_release(); + } - std::pair<int64_t, int64_t> range(0, 0); + std::pair<int64_t, int64_t> range(0, 0); - do { - range = is_dynamic ? data.get_work_stealing_chunk() - : data.get_work_partition(); + do { + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); - ParallelFor::template exec_range<WorkTag>( - m_functor, range.first + m_policy.begin(), - range.second + m_policy.begin()); + exec_range(m_functor, range.first + m_policy.begin(), + range.second + m_policy.begin()); - } while (is_dynamic && 0 <= range.first); - } + } while (is_dynamic && 0 <= range.first); } +#endif } inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) - : m_instance(t_openmp_instance), - m_functor(arg_functor), - m_policy(arg_policy) {} + : m_instance(nullptr), m_functor(arg_functor), m_policy(arg_policy) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } + } }; // MDRangePolicy impl @@ -163,7 +196,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, using iterate_type = typename Kokkos::Impl::HostIterateTile< MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void>; - OpenMPExec* m_instance; + OpenMPInternal* m_instance; const FunctorType m_functor; const MDRangePolicy m_mdr_policy; const Policy m_policy; // construct as RangePolicy( 0, num_tiles @@ -172,62 +205,90 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, inline static void exec_range(const MDRangePolicy& mdr_policy, const FunctorType& functor, const Member ibeg, const Member iend) { -#ifdef KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif -#endif + KOKKOS_PRAGMA_IVDEP_IF_ENABLED for (Member iwork = ibeg; iwork < iend; ++iwork) { iterate_type(mdr_policy, functor)(iwork); } } + template <class Policy> + typename std::enable_if_t<std::is_same<typename Policy::schedule_type::type, + Kokkos::Dynamic>::value> + execute_parallel() const { +#pragma omp parallel for schedule(dynamic KOKKOS_OPENMP_OPTIONAL_CHUNK_SIZE) \ + num_threads(OpenMP::impl_thread_pool_size()) + KOKKOS_PRAGMA_IVDEP_IF_ENABLED + for (auto iwork = m_policy.begin(); iwork < m_policy.end(); ++iwork) { + iterate_type(m_mdr_policy, m_functor)(iwork); + } + } + + template <class Policy> + typename std::enable_if<!std::is_same<typename Policy::schedule_type::type, + Kokkos::Dynamic>::value>::type + execute_parallel() const { +#pragma omp parallel for schedule(static KOKKOS_OPENMP_OPTIONAL_CHUNK_SIZE) \ + num_threads(OpenMP::impl_thread_pool_size()) + KOKKOS_PRAGMA_IVDEP_IF_ENABLED + for (auto iwork = m_policy.begin(); iwork < m_policy.end(); ++iwork) { + iterate_type(m_mdr_policy, m_functor)(iwork); + } + } + public: inline void execute() const { - enum { - is_dynamic = std::is_same<typename Policy::schedule_type::type, - Kokkos::Dynamic>::value - }; - if (OpenMP::in_parallel()) { ParallelFor::exec_range(m_mdr_policy, m_functor, m_policy.begin(), m_policy.end()); - } else { - OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); + return; + } + +#ifndef KOKKOS_INTERNAL_DISABLE_NATIVE_OPENMP + execute_parallel<Policy>(); +#else + constexpr bool is_dynamic = + std::is_same<typename Policy::schedule_type::type, + Kokkos::Dynamic>::value; #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) - { - HostThreadTeamData& data = *(m_instance->get_thread_data()); + { + HostThreadTeamData& data = *(m_instance->get_thread_data()); - data.set_work_partition(m_policy.end() - m_policy.begin(), - m_policy.chunk_size()); + data.set_work_partition(m_policy.end() - m_policy.begin(), + m_policy.chunk_size()); - if (is_dynamic) { - // Make sure work partition is set before stealing - if (data.pool_rendezvous()) data.pool_rendezvous_release(); - } + if (is_dynamic) { + // Make sure work partition is set before stealing + if (data.pool_rendezvous()) data.pool_rendezvous_release(); + } - std::pair<int64_t, int64_t> range(0, 0); + std::pair<int64_t, int64_t> range(0, 0); - do { - range = is_dynamic ? data.get_work_stealing_chunk() - : data.get_work_partition(); + do { + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); - ParallelFor::exec_range(m_mdr_policy, m_functor, - range.first + m_policy.begin(), - range.second + m_policy.begin()); + ParallelFor::exec_range(m_mdr_policy, m_functor, + range.first + m_policy.begin(), + range.second + m_policy.begin()); - } while (is_dynamic && 0 <= range.first); - } - // END #pragma omp parallel + } while (is_dynamic && 0 <= range.first); } + // END #pragma omp parallel +#endif } inline ParallelFor(const FunctorType& arg_functor, MDRangePolicy arg_policy) - : m_instance(t_openmp_instance), + : m_instance(nullptr), m_functor(arg_functor), m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } + } template <typename Policy, typename Functor> static int max_tile_size_product(const Policy&, const Functor&) { /** @@ -258,9 +319,6 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, using WorkRange = typename Policy::WorkRange; using Member = typename Policy::member_type; - using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; - using ReducerConditional = Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType>; @@ -270,34 +328,31 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, void>; // Static Assert WorkTag void if ReducerType not InvalidType - - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; + using Analysis = + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; using pointer_type = typename Analysis::pointer_type; using reference_type = typename Analysis::reference_type; - OpenMPExec* m_instance; + OpenMPInternal* m_instance; const FunctorType m_functor; const Policy m_policy; const ReducerType m_reducer; const pointer_type m_result_ptr; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update) { + inline static std::enable_if_t<std::is_void<TagType>::value> exec_range( + const FunctorType& functor, const Member ibeg, const Member iend, + reference_type update) { for (Member iwork = ibeg; iwork < iend; ++iwork) { functor(iwork, update); } } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update) { + inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const FunctorType& functor, const Member ibeg, const Member iend, + reference_type update) { const TagType t{}; for (Member iwork = ibeg; iwork < iend; ++iwork) { functor(t, iwork, update); @@ -306,12 +361,13 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, public: inline void execute() const { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + if (m_policy.end() <= m_policy.begin()) { if (m_result_ptr) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), m_result_ptr); + final_reducer.init(m_result_ptr); + final_reducer.final(m_result_ptr); } return; } @@ -320,8 +376,6 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, Kokkos::Dynamic>::value }; - OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_reduce"); - const size_t pool_reduce_bytes = Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); @@ -345,9 +399,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, if (data.pool_rendezvous()) data.pool_rendezvous_release(); } - reference_type update = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - data.pool_reduce_local()); + reference_type update = final_reducer.init( + reinterpret_cast<pointer_type>(data.pool_reduce_local())); std::pair<int64_t, int64_t> range(0, 0); @@ -368,12 +421,12 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, pointer_type(m_instance->get_thread_data(0)->pool_reduce_local()); for (int i = 1; i < pool_size; ++i) { - ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), ptr, - m_instance->get_thread_data(i)->pool_reduce_local()); + final_reducer.join( + ptr, reinterpret_cast<pointer_type>( + m_instance->get_thread_data(i)->pool_reduce_local())); } - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), ptr); + final_reducer.final(ptr); if (m_result_ptr) { const int n = Analysis::value_count( @@ -391,14 +444,19 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, inline ParallelReduce( const FunctorType& arg_functor, Policy arg_policy, const ViewType& arg_view, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = nullptr) - : m_instance(t_openmp_instance), + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = nullptr) + : m_instance(nullptr), m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), m_result_ptr(arg_view.data()) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" @@ -407,11 +465,16 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, const ReducerType& reducer) - : m_instance(t_openmp_instance), + : m_instance(nullptr), m_functor(arg_functor), m_policy(arg_policy), m_reducer(reducer), m_result_ptr(reducer.view().data()) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" @@ -431,9 +494,6 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, using WorkRange = typename Policy::WorkRange; using Member = typename Policy::member_type; - using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, - MDRangePolicy, FunctorType>; - using ReducerConditional = Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType>; @@ -442,8 +502,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, void>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; + using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, + MDRangePolicy, ReducerTypeFwd>; using pointer_type = typename Analysis::pointer_type; using value_type = typename Analysis::value_type; @@ -453,7 +513,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, typename Kokkos::Impl::HostIterateTile<MDRangePolicy, FunctorType, WorkTag, reference_type>; - OpenMPExec* m_instance; + OpenMPInternal* m_instance; const FunctorType m_functor; const MDRangePolicy m_mdr_policy; const Policy m_policy; // construct as RangePolicy( 0, num_tiles @@ -476,8 +536,6 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, Kokkos::Dynamic>::value }; - OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_reduce"); - const size_t pool_reduce_bytes = Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); @@ -488,6 +546,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, 0 // thread_local_bytes ); + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + const int pool_size = OpenMP::impl_thread_pool_size(); #pragma omp parallel num_threads(pool_size) { @@ -501,9 +562,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, if (data.pool_rendezvous()) data.pool_rendezvous_release(); } - reference_type update = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - data.pool_reduce_local()); + reference_type update = final_reducer.init( + reinterpret_cast<pointer_type>(data.pool_reduce_local())); std::pair<int64_t, int64_t> range(0, 0); @@ -525,12 +585,12 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, pointer_type(m_instance->get_thread_data(0)->pool_reduce_local()); for (int i = 1; i < pool_size; ++i) { - ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), ptr, - m_instance->get_thread_data(i)->pool_reduce_local()); + final_reducer.join( + ptr, reinterpret_cast<pointer_type>( + m_instance->get_thread_data(i)->pool_reduce_local())); } - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), ptr); + final_reducer.final(ptr); if (m_result_ptr) { const int n = Analysis::value_count( @@ -548,15 +608,20 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, inline ParallelReduce( const FunctorType& arg_functor, MDRangePolicy arg_policy, const ViewType& arg_view, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = nullptr) - : m_instance(t_openmp_instance), + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = nullptr) + : m_instance(nullptr), m_functor(arg_functor), m_mdr_policy(arg_policy), m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), m_reducer(InvalidType()), m_result_ptr(arg_view.data()) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" @@ -565,12 +630,17 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, inline ParallelReduce(const FunctorType& arg_functor, MDRangePolicy arg_policy, const ReducerType& reducer) - : m_instance(t_openmp_instance), + : m_instance(nullptr), m_functor(arg_functor), m_mdr_policy(arg_policy), m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), m_reducer(reducer), m_result_ptr(reducer.view().data()) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" @@ -609,32 +679,26 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, using WorkRange = typename Policy::WorkRange; using Member = typename Policy::member_type; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, WorkTag>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; - using pointer_type = typename Analysis::pointer_type; using reference_type = typename Analysis::reference_type; - OpenMPExec* m_instance; + OpenMPInternal* m_instance; const FunctorType m_functor; const Policy m_policy; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update, const bool final) { + inline static std::enable_if_t<std::is_void<TagType>::value> exec_range( + const FunctorType& functor, const Member ibeg, const Member iend, + reference_type update, const bool final) { for (Member iwork = ibeg; iwork < iend; ++iwork) { functor(iwork, update, final); } } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update, const bool final) { + inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const FunctorType& functor, const Member ibeg, const Member iend, + reference_type update, const bool final) { const TagType t{}; for (Member iwork = ibeg; iwork < iend; ++iwork) { functor(t, iwork, update, final); @@ -643,8 +707,6 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, public: inline void execute() const { - OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_scan"); - const int value_count = Analysis::value_count(m_functor); const size_t pool_reduce_bytes = 2 * Analysis::value_size(m_functor); @@ -658,12 +720,13 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) { HostThreadTeamData& data = *(m_instance->get_thread_data()); + typename Analysis::Reducer final_reducer(&m_functor); const WorkRange range(m_policy, omp_get_thread_num(), omp_get_num_threads()); - reference_type update_sum = - ValueInit::init(m_functor, data.pool_reduce_local()); + reference_type update_sum = final_reducer.init( + reinterpret_cast<pointer_type>(data.pool_reduce_local())); ParallelScan::template exec_range<WorkTag>( m_functor, range.begin(), range.end(), update_sum, false); @@ -681,9 +744,9 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, for (int j = 0; j < value_count; ++j) { ptr[j + value_count] = ptr_prev[j + value_count]; } - ValueJoin::join(m_functor, ptr + value_count, ptr_prev); + final_reducer.join(ptr + value_count, ptr_prev); } else { - ValueInit::init(m_functor, ptr + value_count); + final_reducer.init(ptr + value_count); } ptr_prev = ptr; @@ -692,8 +755,9 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, data.pool_rendezvous_release(); } - reference_type update_base = ValueOps::reference( - ((pointer_type)data.pool_reduce_local()) + value_count); + reference_type update_base = final_reducer.reference( + reinterpret_cast<pointer_type>(data.pool_reduce_local()) + + value_count); ParallelScan::template exec_range<WorkTag>( m_functor, range.begin(), range.end(), update_base, true); @@ -703,9 +767,13 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, //---------------------------------------- inline ParallelScan(const FunctorType& arg_functor, const Policy& arg_policy) - : m_instance(t_openmp_instance), - m_functor(arg_functor), - m_policy(arg_policy) {} + : m_instance(nullptr), m_functor(arg_functor), m_policy(arg_policy) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } + } //---------------------------------------- }; @@ -723,33 +791,27 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, using WorkRange = typename Policy::WorkRange; using Member = typename Policy::member_type; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, WorkTag>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; - using pointer_type = typename Analysis::pointer_type; using reference_type = typename Analysis::reference_type; - OpenMPExec* m_instance; + OpenMPInternal* m_instance; const FunctorType m_functor; const Policy m_policy; ReturnType& m_returnvalue; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update, const bool final) { + inline static std::enable_if_t<std::is_void<TagType>::value> exec_range( + const FunctorType& functor, const Member ibeg, const Member iend, + reference_type update, const bool final) { for (Member iwork = ibeg; iwork < iend; ++iwork) { functor(iwork, update, final); } } template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update, const bool final) { + inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const FunctorType& functor, const Member ibeg, const Member iend, + reference_type update, const bool final) { const TagType t{}; for (Member iwork = ibeg; iwork < iend; ++iwork) { functor(t, iwork, update, final); @@ -758,8 +820,6 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, public: inline void execute() const { - OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_scan"); - const int value_count = Analysis::value_count(m_functor); const size_t pool_reduce_bytes = 2 * Analysis::value_size(m_functor); @@ -773,11 +833,12 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) { HostThreadTeamData& data = *(m_instance->get_thread_data()); + typename Analysis::Reducer final_reducer(&m_functor); const WorkRange range(m_policy, omp_get_thread_num(), omp_get_num_threads()); - reference_type update_sum = - ValueInit::init(m_functor, data.pool_reduce_local()); + reference_type update_sum = final_reducer.init( + reinterpret_cast<pointer_type>(data.pool_reduce_local())); ParallelScanWithTotal::template exec_range<WorkTag>( m_functor, range.begin(), range.end(), update_sum, false); @@ -795,9 +856,9 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, for (int j = 0; j < value_count; ++j) { ptr[j + value_count] = ptr_prev[j + value_count]; } - ValueJoin::join(m_functor, ptr + value_count, ptr_prev); + final_reducer.join(ptr + value_count, ptr_prev); } else { - ValueInit::init(m_functor, ptr + value_count); + final_reducer.init(ptr + value_count); } ptr_prev = ptr; @@ -806,8 +867,9 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, data.pool_rendezvous_release(); } - reference_type update_base = ValueOps::reference( - ((pointer_type)data.pool_reduce_local()) + value_count); + reference_type update_base = final_reducer.reference( + reinterpret_cast<pointer_type>(data.pool_reduce_local()) + + value_count); ParallelScanWithTotal::template exec_range<WorkTag>( m_functor, range.begin(), range.end(), update_base, true); @@ -823,10 +885,16 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, inline ParallelScanWithTotal(const FunctorType& arg_functor, const Policy& arg_policy, ReturnType& arg_returnvalue) - : m_instance(t_openmp_instance), + : m_instance(nullptr), m_functor(arg_functor), m_policy(arg_policy), - m_returnvalue(arg_returnvalue) {} + m_returnvalue(arg_returnvalue) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } + } //---------------------------------------- }; @@ -852,17 +920,16 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, using SchedTag = typename Policy::schedule_type::type; using Member = typename Policy::member_type; - OpenMPExec* m_instance; + OpenMPInternal* m_instance; const FunctorType m_functor; const Policy m_policy; - const int m_shmem_size; + const size_t m_shmem_size; template <class TagType> - inline static - typename std::enable_if<(std::is_same<TagType, void>::value)>::type - exec_team(const FunctorType& functor, HostThreadTeamData& data, - const int league_rank_begin, const int league_rank_end, - const int league_size) { + inline static std::enable_if_t<(std::is_void<TagType>::value)> exec_team( + const FunctorType& functor, HostThreadTeamData& data, + const int league_rank_begin, const int league_rank_end, + const int league_size) { for (int r = league_rank_begin; r < league_rank_end;) { functor(Member(data, r, league_size)); @@ -877,11 +944,10 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, } template <class TagType> - inline static - typename std::enable_if<(!std::is_same<TagType, void>::value)>::type - exec_team(const FunctorType& functor, HostThreadTeamData& data, - const int league_rank_begin, const int league_rank_end, - const int league_size) { + inline static std::enable_if_t<(!std::is_void<TagType>::value)> exec_team( + const FunctorType& functor, HostThreadTeamData& data, + const int league_rank_begin, const int league_rank_end, + const int league_size) { const TagType t{}; for (int r = league_rank_begin; r < league_rank_end;) { @@ -901,11 +967,9 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, inline void execute() const { enum { is_dynamic = std::is_same<SchedTag, Kokkos::Dynamic>::value }; - OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); - const size_t pool_reduce_size = 0; // Never shrinks const size_t team_reduce_size = TEAM_REDUCE_SIZE * m_policy.team_size(); - const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); + const size_t team_shared_size = m_shmem_size; const size_t thread_local_size = 0; // Never shrinks m_instance->resize_thread_data(pool_reduce_size, team_reduce_size, @@ -949,12 +1013,18 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, } inline ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) - : m_instance(t_openmp_instance), + : m_instance(nullptr), m_functor(arg_functor), m_policy(arg_policy), m_shmem_size(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize<FunctorType>::value( - arg_functor, arg_policy.team_size())) {} + arg_functor, arg_policy.team_size())) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } + } }; //---------------------------------------------------------------------------- @@ -968,9 +1038,6 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, using Policy = Kokkos::Impl::TeamPolicyInternal<Kokkos::OpenMP, Properties...>; - using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; - using WorkTag = typename Policy::work_tag; using SchedTag = typename Policy::schedule_type::type; using Member = typename Policy::member_type; @@ -984,13 +1051,13 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, void>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; + using Analysis = + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; using pointer_type = typename Analysis::pointer_type; using reference_type = typename Analysis::reference_type; - OpenMPExec* m_instance; + OpenMPInternal* m_instance; const FunctorType m_functor; const Policy m_policy; const ReducerType m_reducer; @@ -998,11 +1065,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, const int m_shmem_size; template <class TagType> - inline static - typename std::enable_if<(std::is_same<TagType, void>::value)>::type - exec_team(const FunctorType& functor, HostThreadTeamData& data, - reference_type& update, const int league_rank_begin, - const int league_rank_end, const int league_size) { + inline static std::enable_if_t<(std::is_void<TagType>::value)> exec_team( + const FunctorType& functor, HostThreadTeamData& data, + reference_type& update, const int league_rank_begin, + const int league_rank_end, const int league_size) { for (int r = league_rank_begin; r < league_rank_end;) { functor(Member(data, r, league_size), update); @@ -1017,11 +1083,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } template <class TagType> - inline static - typename std::enable_if<(!std::is_same<TagType, void>::value)>::type - exec_team(const FunctorType& functor, HostThreadTeamData& data, - reference_type& update, const int league_rank_begin, - const int league_rank_end, const int league_size) { + inline static std::enable_if_t<(!std::is_void<TagType>::value)> exec_team( + const FunctorType& functor, HostThreadTeamData& data, + reference_type& update, const int league_rank_begin, + const int league_rank_end, const int league_size) { const TagType t{}; for (int r = league_rank_begin; r < league_rank_end;) { @@ -1041,16 +1106,16 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, inline void execute() const { enum { is_dynamic = std::is_same<SchedTag, Kokkos::Dynamic>::value }; + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + if (m_policy.league_size() == 0 || m_policy.team_size() == 0) { if (m_result_ptr) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), m_result_ptr); + final_reducer.init(m_result_ptr); + final_reducer.final(m_result_ptr); } return; } - OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_reduce"); const size_t pool_reduce_size = Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); @@ -1083,9 +1148,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } if (active) { - reference_type update = - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - data.pool_reduce_local()); + reference_type update = final_reducer.init( + reinterpret_cast<pointer_type>(data.pool_reduce_local())); std::pair<int64_t, int64_t> range(0, 0); @@ -1099,8 +1163,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } while (is_dynamic && 0 <= range.first); } else { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - data.pool_reduce_local()); + final_reducer.init( + reinterpret_cast<pointer_type>(data.pool_reduce_local())); } data.disband_team(); @@ -1122,12 +1186,12 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, pointer_type(m_instance->get_thread_data(0)->pool_reduce_local()); for (int i = 1; i < pool_size; ++i) { - ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), ptr, - m_instance->get_thread_data(i)->pool_reduce_local()); + final_reducer.join( + ptr, reinterpret_cast<pointer_type>( + m_instance->get_thread_data(i)->pool_reduce_local())); } - Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>::final( - ReducerConditional::select(m_functor, m_reducer), ptr); + final_reducer.final(ptr); if (m_result_ptr) { const int n = Analysis::value_count( @@ -1145,21 +1209,27 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, inline ParallelReduce( const FunctorType& arg_functor, const Policy& arg_policy, const ViewType& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = nullptr) - : m_instance(t_openmp_instance), + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = nullptr) + : m_instance(nullptr), m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), m_result_ptr(arg_result.data()), m_shmem_size(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize<FunctorType>::value( - arg_functor, arg_policy.team_size())) {} + arg_functor, arg_policy.team_size())) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } + } inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, const ReducerType& reducer) - : m_instance(t_openmp_instance), + : m_instance(nullptr), m_functor(arg_functor), m_policy(arg_policy), m_reducer(reducer), @@ -1167,6 +1237,11 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, m_shmem_size(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize<FunctorType>::value( arg_functor, arg_policy.team_size())) { + if (t_openmp_instance) { + m_instance = t_openmp_instance; + } else { + m_instance = arg_policy.space().impl_internal_space_instance(); + } /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" @@ -1180,5 +1255,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#undef KOKKOS_PRAGMA_IVDEP_IF_ENABLED +#undef KOKKOS_OPENMP_OPTIONAL_CHUNK_SIZE + #endif #endif /* KOKKOS_OPENMP_PARALLEL_HPP */ diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index f7338819af53300839895021ada12c11323a2f82..4babcf03d9c143d7221bcd8c7d554455835aefc9 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #if defined(KOKKOS_ENABLE_OPENMP) && defined(KOKKOS_ENABLE_TASKDAG) diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index d9234e34191a39a84b34d9651975b09fa5801a35..ec1ede0e2abf59dea97f5902c0ea372b7e99b050 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -75,7 +75,7 @@ class HostThreadTeamDataSingleton : private HostThreadTeamData { // TODO @tasking @cleanup DSH Make this the general class template and make the // old code the partial specialization template <class QueueType> -class TaskQueueSpecialization<SimpleTaskScheduler<Kokkos::OpenMP, QueueType> > { +class TaskQueueSpecialization<SimpleTaskScheduler<Kokkos::OpenMP, QueueType>> { public: using execution_space = Kokkos::OpenMP; using scheduler_type = SimpleTaskScheduler<Kokkos::OpenMP, QueueType>; @@ -96,10 +96,8 @@ class TaskQueueSpecialization<SimpleTaskScheduler<Kokkos::OpenMP, QueueType> > { // HostThreadTeamData& team_data_single = // HostThreadTeamDataSingleton::singleton(); - // TODO @tasking @generalization DSH use - // scheduler.get_execution_space().impl() (or something like that) instead - // of the thread-local variable - Impl::OpenMPExec* instance = t_openmp_instance; + Impl::OpenMPInternal* instance = + execution_space().impl_internal_space_instance(); const int pool_size = get_max_team_count(scheduler.get_execution_space()); // TODO @tasking @new_feature DSH allow team sizes other than 1 @@ -198,8 +196,8 @@ class TaskQueueSpecialization<SimpleTaskScheduler<Kokkos::OpenMP, QueueType> > { template <class Scheduler> class TaskQueueSpecializationConstrained< Scheduler, - typename std::enable_if<std::is_same<typename Scheduler::execution_space, - Kokkos::OpenMP>::value>::type> { + std::enable_if_t<std::is_same<typename Scheduler::execution_space, + Kokkos::OpenMP>::value>> { public: using execution_space = Kokkos::OpenMP; using scheduler_type = Scheduler; @@ -258,8 +256,9 @@ class TaskQueueSpecializationConstrained< HostThreadTeamData& team_data_single = HostThreadTeamDataSingleton::singleton(); - Impl::OpenMPExec* instance = t_openmp_instance; - const int pool_size = OpenMP::impl_thread_pool_size(); + Impl::OpenMPInternal* instance = + execution_space().impl_internal_space_instance(); + const int pool_size = OpenMP::impl_thread_pool_size(); const int team_size = 1; // Threads per core instance->resize_thread_data(0 /* global reduce buffer */ diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp index be7afd32883df4869a8919eb7460e812f3be6e0e..73533178b6f4d943c7bfe776ac38c24d081a40f8 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp @@ -48,7 +48,7 @@ #include <Kokkos_Macros.hpp> #if defined(KOKKOS_ENABLE_OPENMP) -#include <OpenMP/Kokkos_OpenMP_Exec.hpp> +#include <OpenMP/Kokkos_OpenMP_Instance.hpp> namespace Kokkos { namespace Impl { diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp index 92e4ee636a8a0d4e2c196672b81c4720376ce21c..55d9c58607315d8f5d53473648c8c72cf975101a 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp @@ -60,13 +60,13 @@ class ParallelFor<FunctorType, Kokkos::WorkGraphPolicy<Traits...>, FunctorType m_functor; template <class TagType> - typename std::enable_if<std::is_same<TagType, void>::value>::type exec_one( + std::enable_if_t<std::is_void<TagType>::value> exec_one( const std::int32_t w) const noexcept { m_functor(w); } template <class TagType> - typename std::enable_if<!std::is_same<TagType, void>::value>::type exec_one( + std::enable_if_t<!std::is_void<TagType>::value> exec_one( const std::int32_t w) const noexcept { const TagType t{}; m_functor(t, w); diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp index c95951a1119272cac3e35382a3224adcfe25c24c..5ff9bf32b6f7f94cada3641519c6a740f76652b0 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #include <algorithm> @@ -61,6 +65,7 @@ #include <Kokkos_OpenMPTargetSpace.hpp> #include <impl/Kokkos_Error.hpp> #include <Kokkos_Atomic.hpp> +#include <impl/Kokkos_MemorySpace.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -70,23 +75,66 @@ namespace Experimental { /* Default allocation mechanism */ OpenMPTargetSpace::OpenMPTargetSpace() {} -void *OpenMPTargetSpace::allocate(const size_t arg_alloc_size) const { - static_assert(sizeof(void *) == sizeof(uintptr_t), +void* OpenMPTargetSpace::impl_allocate( + + const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + static_assert(sizeof(void*) == sizeof(uintptr_t), "Error sizeof(void*) != sizeof(uintptr_t)"); - void *ptr; + void* ptr; ptr = omp_target_alloc(arg_alloc_size, omp_get_default_device()); + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + Kokkos::Profiling::allocateData(arg_handle, arg_label, ptr, reported_size); + } + return ptr; } -void OpenMPTargetSpace::deallocate(void *const arg_alloc_ptr, - const size_t /*arg_alloc_size*/) const { +void* OpenMPTargetSpace::allocate(const size_t arg_alloc_size) const { + return allocate("[unlabeled]", arg_alloc_size); +} + +void* OpenMPTargetSpace::allocate(const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return impl_allocate(arg_label, arg_alloc_size, arg_logical_size); +} + +void OpenMPTargetSpace::impl_deallocate( + const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, + reported_size); + } if (arg_alloc_ptr) { omp_target_free(arg_alloc_ptr, omp_get_default_device()); } } + +void OpenMPTargetSpace::deallocate(void* const arg_alloc_ptr, + const size_t arg_alloc_size) const { + deallocate("[unlabeled]", arg_alloc_ptr, arg_alloc_size); +} + +void OpenMPTargetSpace::deallocate(const char* arg_label, + void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size) const + +{ + impl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size); +} + } // namespace Experimental } // namespace Kokkos @@ -103,14 +151,16 @@ SharedAllocationRecord<void, void> SharedAllocationRecord< SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::~SharedAllocationRecord() { - m_space.deallocate(SharedAllocationRecord<void, void>::m_alloc_ptr, - SharedAllocationRecord<void, void>::m_alloc_size); + auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size; + m_space.deallocate(m_label.c_str(), + SharedAllocationRecord<void, void>::m_alloc_ptr, + alloc_size, (alloc_size - sizeof(SharedAllocationHeader))); } SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: SharedAllocationRecord( - const Kokkos::Experimental::OpenMPTargetSpace &arg_space, - const std::string &arg_label, const size_t arg_alloc_size, + const Kokkos::Experimental::OpenMPTargetSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, const SharedAllocationRecord<void, void>::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function @@ -119,8 +169,8 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: &SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::s_root_record, #endif - reinterpret_cast<SharedAllocationHeader *>(arg_space.allocate( - sizeof(SharedAllocationHeader) + arg_alloc_size)), + Kokkos::Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, arg_label), m_space(arg_space) { @@ -143,42 +193,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: } // namespace Impl } // namespace Kokkos -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -template <class> -struct ViewOperatorBoundsErrorAbort; - -template <> -struct ViewOperatorBoundsErrorAbort<Kokkos::Experimental::OpenMPTargetSpace> { - static void apply(const size_t rank, const size_t n0, const size_t n1, - const size_t n2, const size_t n3, const size_t n4, - const size_t n5, const size_t n6, const size_t n7, - const size_t i0, const size_t i1, const size_t i2, - const size_t i3, const size_t i4, const size_t i5, - const size_t i6, const size_t i7); -}; - -void ViewOperatorBoundsErrorAbort<Kokkos::Experimental::OpenMPTargetSpace>:: - apply(const size_t rank, const size_t n0, const size_t n1, const size_t n2, - const size_t n3, const size_t n4, const size_t n5, const size_t n6, - const size_t n7, const size_t i0, const size_t i1, const size_t i2, - const size_t i3, const size_t i4, const size_t i5, const size_t i6, - const size_t i7) { - printf( - "View operator bounds error : rank(%lu) " - "dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) " - "index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)", - rank, n0, n1, n2, n3, n4, n5, n6, n7, i0, i1, i2, i3, i4, i5, i6, i7); - // Kokkos::Impl::throw_runtime_exception( buffer ); -} - -} // namespace Impl -} // namespace Kokkos - /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ /* diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp index 7ff885ed86b94e942cce18ff2e7ff1ed664129fa..d3bec5aebff8c425aaf5c4cc5060844f6cf55874 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <stdio.h> #include <limits> #include <iostream> @@ -118,7 +122,8 @@ void OpenMPTargetExec::clear_lock_array() { void* OpenMPTargetExec::get_scratch_ptr() { return m_scratch_ptr; } void OpenMPTargetExec::resize_scratch(int64_t team_size, int64_t shmem_size_L0, - int64_t shmem_size_L1) { + int64_t shmem_size_L1, + int64_t league_size) { Kokkos::Experimental::OpenMPTargetSpace space; const int64_t shmem_size = shmem_size_L0 + shmem_size_L1; // L0 + L1 scratch memory per team. @@ -127,7 +132,7 @@ void OpenMPTargetExec::resize_scratch(int64_t team_size, int64_t shmem_size_L0, // on the maximum number of in-flight teams possible. int64_t total_size = (shmem_size + OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE + padding) * - (MAX_ACTIVE_THREADS / team_size); + std::min(MAX_ACTIVE_THREADS / team_size, league_size); if (total_size > m_scratch_size) { space.deallocate(m_scratch_ptr, m_scratch_size); diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp index 9d0507847129d17d0a33962bacdfaefacb95ed1e..52f5dcb83e8f3d7390ae8c7382fb0405363350ae 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp @@ -59,6 +59,13 @@ #define KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND #endif +// FIXME_OPENMPTARGET - Using this macro to implement a workaround for +// hierarchical scan. It avoids hitting the code path which we wanted to +// write but doesn't work. undef'ed at the end. +#ifndef KOKKOS_ARCH_INTEL_GPU +#define KOKKOS_IMPL_TEAM_SCAN_WORKAROUND +#endif + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -69,33 +76,23 @@ template <class Reducer> struct OpenMPTargetReducerWrapper { using value_type = typename Reducer::value_type; + // Using a generic unknown Reducer for the OpenMPTarget backend is not + // implemented. KOKKOS_INLINE_FUNCTION - static void join(value_type&, const value_type&) { - printf( - "Using a generic unknown Reducer for the OpenMPTarget backend is not " - "implemented."); - } + static void join(value_type&, const value_type&) = delete; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type&, const volatile value_type&) { - printf( - "Using a generic unknown Reducer for the OpenMPTarget backend is not " - "implemented."); - } + static void join(volatile value_type&, const volatile value_type&) = delete; KOKKOS_INLINE_FUNCTION - static void init(value_type&) { - printf( - "Using a generic unknown Reducer for the OpenMPTarget backend is not " - "implemented."); - } + static void init(value_type&) = delete; }; template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<Sum<Scalar, Space>> { public: // Required - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; // Required KOKKOS_INLINE_FUNCTION @@ -116,7 +113,7 @@ template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<Prod<Scalar, Space>> { public: // Required - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; // Required KOKKOS_INLINE_FUNCTION @@ -137,7 +134,7 @@ template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<Min<Scalar, Space>> { public: // Required - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; // Required KOKKOS_INLINE_FUNCTION @@ -160,7 +157,7 @@ template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<Max<Scalar, Space>> { public: // Required - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; // Required KOKKOS_INLINE_FUNCTION @@ -184,7 +181,7 @@ template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<LAnd<Scalar, Space>> { public: // Required - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; KOKKOS_INLINE_FUNCTION static void join(value_type& dest, const value_type& src) { @@ -206,7 +203,7 @@ template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<LOr<Scalar, Space>> { public: // Required - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; using result_view_type = Kokkos::View<value_type, Space>; @@ -231,7 +228,7 @@ template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<BAnd<Scalar, Space>> { public: // Required - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; // Required KOKKOS_INLINE_FUNCTION @@ -254,7 +251,7 @@ template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<BOr<Scalar, Space>> { public: // Required - using value_type = typename std::remove_cv<Scalar>::type; + using value_type = std::remove_cv_t<Scalar>; // Required KOKKOS_INLINE_FUNCTION @@ -276,8 +273,8 @@ struct OpenMPTargetReducerWrapper<BOr<Scalar, Space>> { template <class Scalar, class Index, class Space> struct OpenMPTargetReducerWrapper<MinLoc<Scalar, Index, Space>> { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -304,8 +301,8 @@ struct OpenMPTargetReducerWrapper<MinLoc<Scalar, Index, Space>> { template <class Scalar, class Index, class Space> struct OpenMPTargetReducerWrapper<MaxLoc<Scalar, Index, Space>> { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -331,7 +328,7 @@ struct OpenMPTargetReducerWrapper<MaxLoc<Scalar, Index, Space>> { template <class Scalar, class Space> struct OpenMPTargetReducerWrapper<MinMax<Scalar, Space>> { private: - using scalar_type = typename std::remove_cv<Scalar>::type; + using scalar_type = std::remove_cv_t<Scalar>; public: // Required @@ -368,8 +365,8 @@ struct OpenMPTargetReducerWrapper<MinMax<Scalar, Space>> { template <class Scalar, class Index, class Space> struct OpenMPTargetReducerWrapper<MinMaxLoc<Scalar, Index, Space>> { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -415,8 +412,8 @@ struct OpenMPTargetReducerWrapper<MinMaxLoc<Scalar, Index, Space>> { template <class Scalar, class Index, class Space> struct OpenMPTargetReducerWrapper<MaxFirstLoc<Scalar, Index, Space>> { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -458,8 +455,8 @@ struct OpenMPTargetReducerWrapper<MaxFirstLoc<Scalar, Index, Space>> { template <class Scalar, class Index, class Space> struct OpenMPTargetReducerWrapper<MinFirstLoc<Scalar, Index, Space>> { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -501,8 +498,8 @@ struct OpenMPTargetReducerWrapper<MinFirstLoc<Scalar, Index, Space>> { template <class Scalar, class Index, class Space> struct OpenMPTargetReducerWrapper<MinMaxFirstLastLoc<Scalar, Index, Space>> { private: - using scalar_type = typename std::remove_cv<Scalar>::type; - using index_type = typename std::remove_cv<Index>::type; + using scalar_type = std::remove_cv_t<Scalar>; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -563,7 +560,7 @@ struct OpenMPTargetReducerWrapper<MinMaxFirstLastLoc<Scalar, Index, Space>> { template <class Index, class Space> struct OpenMPTargetReducerWrapper<FirstLoc<Index, Space>> { private: - using index_type = typename std::remove_cv<Index>::type; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -601,7 +598,7 @@ struct OpenMPTargetReducerWrapper<FirstLoc<Index, Space>> { template <class Index, class Space> struct OpenMPTargetReducerWrapper<LastLoc<Index, Space>> { private: - using index_type = typename std::remove_cv<Index>::type; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -639,7 +636,7 @@ struct OpenMPTargetReducerWrapper<LastLoc<Index, Space>> { template <class Index, class Space> struct OpenMPTargetReducerWrapper<StdIsPartitioned<Index, Space>> { private: - using index_type = typename std::remove_cv<Index>::type; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -686,7 +683,7 @@ struct OpenMPTargetReducerWrapper<StdIsPartitioned<Index, Space>> { template <class Index, class Space> struct OpenMPTargetReducerWrapper<StdPartitionPoint<Index, Space>> { private: - using index_type = typename std::remove_cv<Index>::type; + using index_type = std::remove_cv_t<Index>; public: // Required @@ -768,7 +765,7 @@ class OpenMPTargetExec { static void clear_lock_array(); static void resize_scratch(int64_t team_reduce_bytes, int64_t team_shared_bytes, - int64_t thread_local_bytes); + int64_t thread_local_bytes, int64_t league_size); static void* m_scratch_ptr; static int64_t m_scratch_size; @@ -797,7 +794,7 @@ class OpenMPTargetExecTeamMember { using scratch_memory_space = execution_space::scratch_memory_space; scratch_memory_space m_team_shared; - int m_team_scratch_size[2]; + size_t m_team_scratch_size[2]; int m_team_rank; int m_team_size; int m_league_rank; @@ -845,9 +842,8 @@ class OpenMPTargetExecTeamMember { KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& value, int thread_id) const { // Make sure there is enough scratch space: - using type = - typename std::conditional<(sizeof(ValueType) < TEAM_REDUCE_SIZE), - ValueType, void>::type; + using type = std::conditional_t<(sizeof(ValueType) < TEAM_REDUCE_SIZE), + ValueType, void>; type* team_scratch = reinterpret_cast<type*>(static_cast<char*>(m_glb_scratch) + TEAM_REDUCE_SIZE * omp_get_team_num()); @@ -864,13 +860,15 @@ class OpenMPTargetExecTeamMember { team_broadcast(value, thread_id); } + // FIXME_OPENMPTARGET this function has the wrong interface and currently + // ignores the reducer passed. template <class ValueType, class JoinOp> KOKKOS_INLINE_FUNCTION ValueType team_reduce(const ValueType& value, - const JoinOp& op_in) const { + const JoinOp&) const { #pragma omp barrier using value_type = ValueType; - const JoinLambdaAdapter<value_type, JoinOp> op(op_in); + // const JoinLambdaAdapter<value_type, JoinOp> op(op_in); // Make sure there is enough scratch space: using type = std::conditional_t<(sizeof(value_type) < TEAM_REDUCE_SIZE), @@ -983,7 +981,7 @@ class OpenMPTargetExecTeamMember { // Properties ...> & team , void* const glb_scratch, const int shmem_block_index, - const int shmem_size_L0, const int shmem_size_L1) + const size_t shmem_size_L0, const size_t shmem_size_L1) : m_team_scratch_size{shmem_size_L0, shmem_size_L1}, m_team_rank(0), m_team_size(team_size), @@ -1322,11 +1320,10 @@ TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, - Impl::OpenMPTargetExecTeamMember> + std::common_type_t<iType1, iType2>, Impl::OpenMPTargetExecTeamMember> TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType1& begin, const iType2& end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPTargetExecTeamMember>(thread, iType(begin), iType(end)); @@ -1343,11 +1340,10 @@ ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, - Impl::OpenMPTargetExecTeamMember> + std::common_type_t<iType1, iType2>, Impl::OpenMPTargetExecTeamMember> ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType1& arg_begin, const iType2& arg_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::ThreadVectorRangeBoundariesStruct< iType, Impl::OpenMPTargetExecTeamMember>(thread, iType(arg_begin), iType(arg_end)); @@ -1364,11 +1360,10 @@ TeamVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, - Impl::OpenMPTargetExecTeamMember> + std::common_type_t<iType1, iType2>, Impl::OpenMPTargetExecTeamMember> TeamVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType1& arg_begin, const iType2& arg_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamVectorRangeBoundariesStruct< iType, Impl::OpenMPTargetExecTeamMember>(thread, iType(arg_begin), iType(arg_end)); @@ -1411,12 +1406,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( */ template <typename iType, class Lambda, typename ValueType> -KOKKOS_INLINE_FUNCTION - std::enable_if_t<!Kokkos::is_reducer_type<ValueType>::value> - parallel_reduce( - const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, - const Lambda& lambda, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ValueType& result) { // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of // elements in the array <= 32. For reduction we allocate, 16 bytes per // element in the scratch space, hence, 16*32 = 512. @@ -1456,12 +1449,10 @@ KOKKOS_INLINE_FUNCTION // and crashes. We should try this with every new compiler // This is the variant we actually wanted to write template <typename iType, class Lambda, typename ReducerType> -KOKKOS_INLINE_FUNCTION - std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> - parallel_reduce( - const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, - const Lambda& lambda, ReducerType result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType result) { using ValueType = typename ReducerType::value_type; #pragma omp declare reduction( \ @@ -1491,12 +1482,10 @@ KOKKOS_INLINE_FUNCTION } #else template <typename iType, class Lambda, typename ReducerType> -KOKKOS_INLINE_FUNCTION - std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> - parallel_reduce( - const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, - const Lambda& lambda, ReducerType result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType result) { using ValueType = typename ReducerType::value_type; // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of @@ -1602,21 +1591,33 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( const Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPTargetExecTeamMember>& loop_bounds, const FunctorType& lambda) { - // Extract value_type from lambda - using value_type = typename Kokkos::Impl::FunctorAnalysis< - Kokkos::Impl::FunctorPatternInterface::SCAN, void, - FunctorType>::value_type; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + TeamPolicy<Experimental::OpenMPTarget>, + FunctorType>; + using value_type = typename Analysis::value_type; const auto start = loop_bounds.start; const auto end = loop_bounds.end; - // Note this thing is called .member in the CUDA specialization of - // TeamThreadRangeBoundariesStruct + // Note this thing is called .member in the CUDA specialization of + // TeamThreadRangeBoundariesStruct auto& member = loop_bounds.team; const auto team_size = member.team_size(); const auto team_rank = member.team_rank(); - const auto nchunk = (end - start + team_size - 1) / team_size; - value_type accum = 0; - // each team has to process one or more chunks of the prefix scan + +#if defined(KOKKOS_IMPL_TEAM_SCAN_WORKAROUND) + value_type scan_val = value_type(); + + if (team_rank == 0) { + for (iType i = start; i < end; ++i) { + lambda(i, scan_val, true); + } + } +#pragma omp barrier +#else + const auto nchunk = (end - start + team_size - 1) / team_size; + value_type accum = 0; + // each team has to process one or + // more chunks of the prefix scan for (iType i = 0; i < nchunk; ++i) { auto ii = start + i * team_size + team_rank; // local accumulation for this chunk @@ -1634,6 +1635,7 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( // broadcast last value to rest of the team member.team_broadcast(accum, team_size - 1); } +#endif } } // namespace Kokkos @@ -1686,12 +1688,10 @@ KOKKOS_INLINE_FUNCTION void parallel_reduce( } template <typename iType, class Lambda, typename ReducerType> -KOKKOS_INLINE_FUNCTION - std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> - parallel_reduce( - const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, - const Lambda& lambda, ReducerType const& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType const& result) { using ValueType = typename ReducerType::value_type; #pragma omp declare reduction( \ @@ -1756,8 +1756,10 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( const Impl::ThreadVectorRangeBoundariesStruct< iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, const FunctorType& lambda) { - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = typename ValueTraits::value_type; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + TeamPolicy<Experimental::OpenMPTarget>, + FunctorType>; + using value_type = typename Analysis::value_type; value_type scan_val = value_type(); @@ -1771,6 +1773,10 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( } // namespace Kokkos +#ifdef KOKKOS_IMPL_TEAM_SCAN_WORKAROUND +#undef KOKKOS_IMPL_TEAM_SCAN_WORKAROUND +#endif + namespace Kokkos { /** \brief Intra-team vector parallel_for. Executes lambda(iType i) for each * i=0..N-1. @@ -1833,12 +1839,10 @@ KOKKOS_INLINE_FUNCTION void parallel_reduce( #if !defined(KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND) template <typename iType, class Lambda, typename ReducerType> -KOKKOS_INLINE_FUNCTION - std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> - parallel_reduce( - const Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, - const Lambda& lambda, ReducerType const& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType const& result) { using ValueType = typename ReducerType::value_type; // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of @@ -1869,12 +1873,10 @@ KOKKOS_INLINE_FUNCTION } #else template <typename iType, class Lambda, typename ReducerType> -KOKKOS_INLINE_FUNCTION - std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> - parallel_reduce( - const Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, - const Lambda& lambda, ReducerType const& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType const& result) { using ValueType = typename ReducerType::value_type; // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp index e421edc5b4c108b03fb8680863184828df243dd7..51921765baf249a1f1dacc57221bc4f4a398c79d 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(_OPENMP) @@ -53,6 +57,7 @@ #include <Kokkos_OpenMPTarget.hpp> #include <OpenMPTarget/Kokkos_OpenMPTarget_UniqueToken.hpp> #include <OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> #include <sstream> @@ -89,10 +94,10 @@ void OpenMPTargetInternal::fence(const std::string& name, } int OpenMPTargetInternal::concurrency() { return 128000; } const char* OpenMPTargetInternal::name() { return "OpenMPTarget"; } -void OpenMPTargetInternal::print_configuration(std::ostream& /*stream*/, - const bool) { +void OpenMPTargetInternal::print_configuration(std::ostream& os, + bool /*verbose*/) const { // FIXME_OPENMPTARGET - printf("Using OpenMPTarget\n"); + os << "Using OpenMPTarget\n"; } void OpenMPTargetInternal::impl_finalize() { @@ -133,9 +138,13 @@ OpenMPTarget::OpenMPTarget() const char* OpenMPTarget::name() { return Impl::OpenMPTargetInternal::impl_singleton()->name(); } -void OpenMPTarget::print_configuration(std::ostream& stream, - const bool detail) { - m_space_instance->print_configuration(stream, detail); +void OpenMPTarget::print_configuration(std::ostream& os, bool verbose) const { + os << "OpenMPTarget Execution Space:\n"; + os << " KOKKOS_ENABLE_OPENMPTARGET: yes\n"; + + os << "\nOpenMPTarget Runtime Configuration:\n"; + + m_space_instance->print_configuration(os, verbose); } uint32_t OpenMPTarget::impl_instance_id() const noexcept { @@ -145,25 +154,22 @@ uint32_t OpenMPTarget::impl_instance_id() const noexcept { int OpenMPTarget::concurrency() { return Impl::OpenMPTargetInternal::impl_singleton()->concurrency(); } -void OpenMPTarget::fence() { - Impl::OpenMPTargetInternal::impl_singleton()->fence( - "Kokkos::OpenMPTarget::fence: Unnamed Instance Fence"); -} + void OpenMPTarget::fence(const std::string& name) { Impl::OpenMPTargetInternal::impl_singleton()->fence(name); } -void OpenMPTarget::impl_static_fence() { - Impl::OpenMPTargetInternal::impl_singleton()->fence( - "Kokkos::OpenMPTarget::fence: Unnamed Instance Fence", - Kokkos::Experimental::Impl::openmp_fence_is_static::yes); -} + void OpenMPTarget::impl_static_fence(const std::string& name) { Impl::OpenMPTargetInternal::impl_singleton()->fence( name, Kokkos::Experimental::Impl::openmp_fence_is_static::yes); } -void OpenMPTarget::impl_initialize() { m_space_instance->impl_initialize(); } -void OpenMPTarget::impl_finalize() { m_space_instance->impl_finalize(); } +void OpenMPTarget::impl_initialize(InitializationSettings const&) { + Impl::OpenMPTargetInternal::impl_singleton()->impl_initialize(); +} +void OpenMPTarget::impl_finalize() { + Impl::OpenMPTargetInternal::impl_singleton()->impl_finalize(); +} int OpenMPTarget::impl_is_initialized() { return Impl::OpenMPTargetInternal::impl_singleton()->impl_is_initialized(); } @@ -171,52 +177,9 @@ int OpenMPTarget::impl_is_initialized() { namespace Impl { int g_openmptarget_space_factory_initialized = - Kokkos::Impl::initialize_space_factory<OpenMPTargetSpaceInitializer>( + Kokkos::Impl::initialize_space_factory<Experimental::OpenMPTarget>( "160_OpenMPTarget"); -void OpenMPTargetSpaceInitializer::initialize(const InitArguments& args) { - // Prevent "unused variable" warning for 'args' input struct. If - // Serial::initialize() ever needs to take arguments from the input - // struct, you may remove this line of code. - (void)args; - - if (std::is_same<Kokkos::Experimental::OpenMPTarget, - Kokkos::DefaultExecutionSpace>::value) { - Kokkos::Experimental::OpenMPTarget().impl_initialize(); - // std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" - // << std::endl ; - } else { - // std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not - // initialized" << std::endl ; - } -} - -void OpenMPTargetSpaceInitializer::finalize(const bool all_spaces) { - if (std::is_same<Kokkos::Experimental::OpenMPTarget, - Kokkos::DefaultExecutionSpace>::value || - all_spaces) { - if (Kokkos::Experimental::OpenMPTarget().impl_is_initialized()) - Kokkos::Experimental::OpenMPTarget().impl_finalize(); - } -} - -void OpenMPTargetSpaceInitializer::fence() { - Kokkos::Experimental::OpenMPTarget::impl_static_fence(); -} -void OpenMPTargetSpaceInitializer::fence(const std::string& name) { - Kokkos::Experimental::OpenMPTarget::impl_static_fence(name); -} - -void OpenMPTargetSpaceInitializer::print_configuration(std::ostream& msg, - const bool detail) { - msg << "OpenMPTarget Execution Space:" << std::endl; - msg << " KOKKOS_ENABLE_OPENMPTARGET: "; - msg << "yes" << std::endl; - - msg << "\nOpenMPTarget Runtime Configuration:" << std::endl; - Kokkos::Experimental::OpenMPTarget().print_configuration(msg, detail); -} - } // namespace Impl } // Namespace Kokkos diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp index b495771190e35c661df26a5ab3bc1d53a544cff7..8e4baf8c0529f31d226d91470dbe7cc9674d6272 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp @@ -68,7 +68,7 @@ class OpenMPTargetInternal { int concurrency(); //! Print configuration information to the given output stream. - void print_configuration(std::ostream&, const bool detail = false); + void print_configuration(std::ostream& os, bool verbose) const; static const char* name(); diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp index ab38dea024caf64d498de18c4ff19790ddf4f74a..dfb9ea70a1af0458933ef22c695b1a689359add0 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp @@ -49,7 +49,6 @@ #include <sstream> #include <Kokkos_Parallel.hpp> #include <OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> namespace Kokkos { namespace Impl { @@ -84,7 +83,7 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, #pragma omp target teams distribute parallel for map(to : a_functor) for (auto i = begin; i < end; ++i) { - if constexpr (std::is_same<TagType, void>::value) { + if constexpr (std::is_void<TagType>::value) { a_functor(i); } else { a_functor(TagType(), i); @@ -127,8 +126,10 @@ template <class FunctorType, class PolicyType, class ReducerType, struct ParallelReduceSpecialize { inline static void execute(const FunctorType& /*f*/, const PolicyType& /*p*/, PointerType /*result_ptr*/) { - constexpr int FunctorHasJoin = ReduceFunctorHasJoin<FunctorType>::value; - constexpr int UseReducerType = is_reducer_type<ReducerType>::value; + constexpr int FunctorHasJoin = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, + FunctorType>::has_join_member_function; + constexpr int UseReducerType = is_reducer<ReducerType>::value; std::stringstream error_message; error_message << "Error: Invalid Specialization " << FunctorHasJoin << ' ' @@ -145,17 +146,11 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, using PolicyType = Kokkos::RangePolicy<PolicyArgs...>; using TagType = typename PolicyType::work_tag; using ReducerTypeFwd = - typename std::conditional<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, TagType, - void>; - - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, TagType>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, TagType>; - using ReferenceType = typename ValueTraits::reference_type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + PolicyType, ReducerTypeFwd>; + using ReferenceType = typename Analysis::reference_type; using ParReduceCommon = ParallelReduceCommon<PointerType>; @@ -188,7 +183,7 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, : f) reduction(custom \ : result) for (auto i = begin; i < end; ++i) { - if constexpr (std::is_same<TagType, void>::value) { + if constexpr (std::is_void<TagType>::value) { f(i, result); } else { f(TagType(), i, result); @@ -226,7 +221,7 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, map(to:f) reduction(+: result) for (auto i = begin; i < end; ++i) - if constexpr (std::is_same<TagType, void>::value) { + if constexpr (std::is_void<TagType>::value) { f(i, result); } else { f(TagType(), i, result); @@ -238,7 +233,7 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, : result) for (auto i = begin; i < end; ++i) - if constexpr (std::is_same<TagType, void>::value) { + if constexpr (std::is_void<TagType>::value) { f(i, result); } else { f(TagType(), i, result); @@ -260,7 +255,7 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, } #pragma omp target teams distribute parallel for map(to:f) reduction(+:result[:NumReductions]) for (auto i = begin; i < end; ++i) { - if constexpr (std::is_same<TagType, void>::value) { + if constexpr (std::is_void<TagType>::value) { f(i, result); } else { f(TagType(), i, result); @@ -277,7 +272,10 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, const auto begin = p.begin(); const auto end = p.end(); - constexpr int HasInit = ReduceFunctorHasInit<FunctorType>::value; + using FunctorAnalysis = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, + FunctorType>; + constexpr int HasInit = FunctorAnalysis::has_init_member_function; // Initialize the result pointer. @@ -290,31 +288,30 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, const int max_teams = OpenMPTargetExec::MAX_ACTIVE_THREADS / max_team_threads; // Number of elements in the reduction - const auto value_count = - FunctorValueTraits<FunctorType, TagType>::value_count(f); + const auto value_count = FunctorAnalysis::value_count(f); // Allocate scratch per active thread. Achieved by setting the first // parameter of `resize_scratch=1`. - OpenMPTargetExec::resize_scratch(1, 0, value_count * sizeof(ValueType)); + OpenMPTargetExec::resize_scratch(1, 0, value_count * sizeof(ValueType), + std::numeric_limits<int64_t>::max()); ValueType* scratch_ptr = static_cast<ValueType*>(OpenMPTargetExec::get_scratch_ptr()); #pragma omp target map(to : f) is_device_ptr(scratch_ptr) { + typename FunctorAnalysis::Reducer final_reducer(&f); // Enter this loop if the functor has an `init` if constexpr (HasInit) { // The `init` routine needs to be called on the device since it might // need device members. - ValueInit::init(f, scratch_ptr); - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, TagType>::final(f, scratch_ptr); + final_reducer.init(scratch_ptr); + final_reducer.final(scratch_ptr); } else { for (int i = 0; i < value_count; ++i) { static_cast<ValueType*>(scratch_ptr)[i] = ValueType(); } - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, TagType>::final(f, scratch_ptr); + final_reducer.final(scratch_ptr); } } @@ -337,6 +334,7 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, map(to \ : f) is_device_ptr(scratch_ptr) { + typename FunctorAnalysis::Reducer final_reducer(&f); #pragma omp parallel { const int team_num = omp_get_team_num(); @@ -347,13 +345,13 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, (team_num == num_teams - 1) ? end : (team_begin + chunk_size); ValueType* team_scratch = scratch_ptr + team_num * max_team_threads * value_count; - ReferenceType result = ValueInit::init( - f, &team_scratch[omp_get_thread_num() * value_count]); + ReferenceType result = final_reducer.init( + &team_scratch[omp_get_thread_num() * value_count]); // Accumulate partial results in thread specific storage. #pragma omp for simd for (auto i = team_begin; i < team_end; ++i) { - if constexpr (std::is_same<TagType, void>::value) { + if constexpr (std::is_void<TagType>::value) { f(i, result); } else { f(TagType(), i, result); @@ -368,8 +366,8 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, for (int i = 0; i < team_size - tree_neighbor_offset; i += 2 * tree_neighbor_offset) { const int neighbor = i + tree_neighbor_offset; - ValueJoin::join(f, &team_scratch[i * value_count], - &team_scratch[neighbor * value_count]); + final_reducer.join(&team_scratch[i * value_count], + &team_scratch[neighbor * value_count]); } tree_neighbor_offset *= 2; } while (tree_neighbor_offset < team_size); @@ -383,18 +381,18 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, is_device_ptr(scratch_ptr) for (int i = 0; i < max_teams - tree_neighbor_offset; i += 2 * tree_neighbor_offset) { + typename FunctorAnalysis::Reducer final_reducer(&f); ValueType* team_scratch = scratch_ptr; const int team_offset = max_team_threads * value_count; - ValueJoin::join( - f, &team_scratch[i * team_offset], + final_reducer.join( + &team_scratch[i * team_offset], &team_scratch[(i + tree_neighbor_offset) * team_offset]); // If `final` is provided by the functor. - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) { - // Do the final only once at the end. - if (tree_neighbor_offset * 2 >= max_teams && - omp_get_team_num() == 0 && omp_get_thread_num() == 0) - FunctorFinal<FunctorType, TagType>::final(f, scratch_ptr); + // Do the final only once at the end. + if (tree_neighbor_offset * 2 >= max_teams && omp_get_team_num() == 0 && + omp_get_thread_num() == 0) { + final_reducer.final(scratch_ptr); } } tree_neighbor_offset *= 2; @@ -422,25 +420,23 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, using WorkRange = typename Policy::WorkRange; using ReducerTypeFwd = - typename std::conditional<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, - void>; - - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - - static constexpr int HasJoin = ReduceFunctorHasJoin<FunctorType>::value; - static constexpr int UseReducer = is_reducer_type<ReducerType>::value; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + Policy, ReducerTypeFwd>; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + static constexpr int HasJoin = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, Policy, + FunctorType>::has_join_member_function; + static constexpr int UseReducer = is_reducer<ReducerType>::value; static constexpr int IsArray = std::is_pointer<reference_type>::value; using ParReduceSpecialize = ParallelReduceSpecialize<FunctorType, Policy, ReducerType, pointer_type, - typename ValueTraits::value_type>; + typename Analysis::value_type>; const FunctorType m_functor; const Policy m_policy; @@ -489,12 +485,11 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, } template <class ViewType> - ParallelReduce( - const FunctorType& arg_functor, Policy& arg_policy, - const ViewType& arg_result_view, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = nullptr) + ParallelReduce(const FunctorType& arg_functor, Policy& arg_policy, + const ViewType& arg_result_view, + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -537,28 +532,26 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, using Member = typename Policy::member_type; using idx_type = typename Policy::index_type; - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, WorkTag>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + Policy, FunctorType>; - using value_type = typename ValueTraits::value_type; - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; + using value_type = typename Analysis::value_type; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; const FunctorType m_functor; const Policy m_policy; template <class TagType> - typename std::enable_if<std::is_same<TagType, void>::value>::type - call_with_tag(const FunctorType& f, const idx_type& idx, value_type& val, - const bool& is_final) const { + std::enable_if_t<std::is_void<TagType>::value> call_with_tag( + const FunctorType& f, const idx_type& idx, value_type& val, + const bool& is_final) const { f(idx, val, is_final); } template <class TagType> - typename std::enable_if<!std::is_same<TagType, void>::value>::type - call_with_tag(const FunctorType& f, const idx_type& idx, value_type& val, - const bool& is_final) const { + std::enable_if_t<!std::is_void<TagType>::value> call_with_tag( + const FunctorType& f, const idx_type& idx, value_type& val, + const bool& is_final) const { f(WorkTag(), idx, val, is_final); } @@ -582,6 +575,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, : a_functor) num_teams(nteams) \ thread_limit(team_size) for (idx_type team_id = 0; team_id < n_chunks; ++team_id) { + typename Analysis::Reducer final_reducer(&a_functor); #pragma omp parallel num_threads(team_size) { const idx_type local_offset = team_id * chunk_size; @@ -590,16 +584,16 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, for (idx_type i = 0; i < chunk_size; ++i) { const idx_type idx = local_offset + i; value_type val; - ValueInit::init(a_functor, &val); + final_reducer.init(&val); if (idx < N) call_with_tag<WorkTag>(a_functor, idx, val, false); element_values(team_id, i) = val; } #pragma omp barrier if (omp_get_thread_num() == 0) { value_type sum; - ValueInit::init(a_functor, &sum); + final_reducer.init(&sum); for (idx_type i = 0; i < chunk_size; ++i) { - ValueJoin::join(a_functor, &sum, &element_values(team_id, i)); + final_reducer.join(&sum, &element_values(team_id, i)); element_values(team_id, i) = sum; } chunk_values(team_id) = sum; @@ -608,9 +602,9 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, if (omp_get_thread_num() == 0) { if (Kokkos::atomic_fetch_add(&count(), 1) == n_chunks - 1) { value_type sum; - ValueInit::init(a_functor, &sum); + final_reducer.init(&sum); for (idx_type i = 0; i < n_chunks; ++i) { - ValueJoin::join(a_functor, &sum, &chunk_values(i)); + final_reducer.join(&sum, &chunk_values(i)); chunk_values(i) = sum; } } @@ -622,6 +616,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, : a_functor) num_teams(nteams) \ thread_limit(team_size) for (idx_type team_id = 0; team_id < n_chunks; ++team_id) { + typename Analysis::Reducer final_reducer(&a_functor); #pragma omp parallel num_threads(team_size) { const idx_type local_offset = team_id * chunk_size; @@ -629,7 +624,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, if (team_id > 0) offset_value = chunk_values(team_id - 1); else - ValueInit::init(a_functor, &offset_value); + final_reducer.init(&offset_value); #pragma omp for for (idx_type i = 0; i < chunk_size; ++i) { @@ -637,7 +632,18 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, value_type local_offset_value; if (i > 0) { local_offset_value = element_values(team_id, i - 1); - ValueJoin::join(a_functor, &local_offset_value, &offset_value); + // FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs +#ifdef KOKKOS_ARCH_VEGA + if constexpr (Analysis::has_join_member_function) { + if constexpr (std::is_void_v<WorkTag>) + a_functor.join(local_offset_value, offset_value); + else + a_functor.join(WorkTag{}, local_offset_value, offset_value); + } else + local_offset_value += offset_value; +#else + final_reducer.join(&local_offset_value, &offset_value); +#endif } else local_offset_value = offset_value; if (idx < N) @@ -708,7 +714,7 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, base_t::impl_execute(element_values, chunk_values, count); - const int size = base_t::ValueTraits::value_size(base_t::m_functor); + const int size = base_t::Analysis::value_size(base_t::m_functor); DeepCopy<HostSpace, Kokkos::Experimental::OpenMPTargetSpace>( &m_returnvalue, chunk_values.data() + (n_chunks - 1), size); } else { @@ -742,7 +748,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, const FunctorType m_functor; const Policy m_policy; - const int m_shmem_size; + const size_t m_shmem_size; public: void execute() const { @@ -766,7 +772,8 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, const size_t shmem_size_L0 = m_policy.scratch_size(0, team_size); const size_t shmem_size_L1 = m_policy.scratch_size(1, team_size); - OpenMPTargetExec::resize_scratch(team_size, shmem_size_L0, shmem_size_L1); + OpenMPTargetExec::resize_scratch(team_size, shmem_size_L0, shmem_size_L1, + league_size); void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); FunctorType a_functor(m_functor); @@ -781,6 +788,9 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, const auto nteams = league_size < max_active_teams ? league_size : max_active_teams; + // If the league size is <=0, do not launch the kernel. + if (nteams <= 0) return; + // Performing our own scheduling of teams to avoid separation of code between // teams-distribute and parallel. Gave a 2x performance boost in test cases with // the clang compiler. atomic_compare_exchange can be avoided since the standard @@ -803,7 +813,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, typename Policy::member_type team( league_id, league_size, team_size, vector_length, scratch_ptr, blockIdx, shmem_size_L0, shmem_size_L1); - if constexpr (std::is_same<TagType, void>::value) + if constexpr (std::is_void<TagType>::value) m_functor(team); else m_functor(TagType(), team); @@ -829,17 +839,12 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, using PolicyType = TeamPolicyInternal<PolicyArgs...>; using TagType = typename PolicyType::work_tag; using ReducerTypeFwd = - typename std::conditional<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, TagType, - void>; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + PolicyType, ReducerTypeFwd>; - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, TagType>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, TagType>; - using ReferenceType = typename ValueTraits::reference_type; + using ReferenceType = typename Analysis::reference_type; using ParReduceCommon = ParallelReduceCommon<PointerType>; @@ -857,7 +862,7 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, const size_t shmem_size_L0 = p.scratch_size(0, team_size); const size_t shmem_size_L1 = p.scratch_size(1, team_size); OpenMPTargetExec::resize_scratch(PolicyType::member_type::TEAM_REDUCE_SIZE, - shmem_size_L0, shmem_size_L1); + shmem_size_L0, shmem_size_L1, league_size); void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); ValueType result = ValueType(); @@ -867,6 +872,9 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, const auto nteams = league_size < max_active_teams ? league_size : max_active_teams; + // If the league size is <=0, do not launch the kernel. + if (nteams <= 0) return; + #pragma omp declare reduction( \ custom:ValueType \ : OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \ @@ -888,7 +896,7 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, typename PolicyType::member_type team( league_id, league_size, team_size, vector_length, scratch_ptr, blockIdx, shmem_size_L0, shmem_size_L1); - if constexpr (std::is_same<TagType, void>::value) + if constexpr (std::is_void<TagType>::value) f(team, result); else f(TagType(), team, result); @@ -917,7 +925,7 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, const size_t shmem_size_L0 = p.scratch_size(0, team_size); const size_t shmem_size_L1 = p.scratch_size(1, team_size); OpenMPTargetExec::resize_scratch(PolicyType::member_type::TEAM_REDUCE_SIZE, - shmem_size_L0, shmem_size_L1); + shmem_size_L0, shmem_size_L1, league_size); void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); // Maximum active teams possible. @@ -925,6 +933,9 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, const auto nteams = league_size < max_active_teams ? league_size : max_active_teams; + // If the league size is <=0, do not launch the kernel. + if (nteams <= 0) return; + // Case where the number of reduction items is 1. if constexpr (NumReductions == 1) { ValueType result = ValueType(); @@ -946,7 +957,7 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, typename PolicyType::member_type team( league_id, league_size, team_size, vector_length, scratch_ptr, blockIdx, shmem_size_L0, shmem_size_L1); - if constexpr (std::is_same<TagType, void>::value) + if constexpr (std::is_void<TagType>::value) f(team, result); else f(TagType(), team, result); @@ -973,7 +984,7 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, typename PolicyType::member_type team( league_id, league_size, team_size, vector_length, scratch_ptr, blockIdx, shmem_size_L0, shmem_size_L1); - if constexpr (std::is_same<TagType, void>::value) + if constexpr (std::is_void<TagType>::value) f(team, result); else f(TagType(), team, result); @@ -1004,7 +1015,7 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, typename PolicyType::member_type team( league_id, league_size, team_size, vector_length, scratch_ptr, blockIdx, shmem_size_L0, shmem_size_L1); - if constexpr (std::is_same<TagType, void>::value) + if constexpr (std::is_void<TagType>::value) f(team, result); else f(TagType(), team, result); @@ -1023,7 +1034,10 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, // RangePolicy. Need a new implementation. static void execute_init_join(const FunctorType& f, const PolicyType& p, PointerType ptr, const bool ptr_on_device) { - constexpr int HasInit = ReduceFunctorHasInit<FunctorType>::value; + using FunctorAnalysis = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, + FunctorType>; + constexpr int HasInit = FunctorAnalysis::has_init_member_function; const int league_size = p.league_size(); const int team_size = p.team_size(); @@ -1047,11 +1061,11 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, const auto nteams = league_size; // Number of elements in the reduction - const auto value_count = - FunctorValueTraits<FunctorType, TagType>::value_count(f); + const auto value_count = FunctorAnalysis::value_count(f); // Allocate scratch per active thread. - OpenMPTargetExec::resize_scratch(1, 0, value_count * sizeof(ValueType)); + OpenMPTargetExec::resize_scratch(1, 0, value_count * sizeof(ValueType), + league_size); void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); // Enter this loop if the functor has an `init` @@ -1060,10 +1074,9 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, // device members. #pragma omp target map(to : f) is_device_ptr(scratch_ptr) { - ValueInit::init(f, scratch_ptr); - - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, TagType>::final(f, scratch_ptr); + typename FunctorAnalysis::Reducer final_reducer(&f); + final_reducer.init(scratch_ptr); + final_reducer.final(scratch_ptr); } } else { #pragma omp target map(to : f) is_device_ptr(scratch_ptr) @@ -1072,8 +1085,8 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, static_cast<ValueType*>(scratch_ptr)[i] = ValueType(); } - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, TagType>::final(f, scratch_ptr); + typename FunctorAnalysis::Reducer final_reducer(&f); + final_reducer.final(static_cast<ValueType*>(scratch_ptr)); } } @@ -1102,14 +1115,15 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, const int num_teams = omp_get_num_teams(); ValueType* team_scratch = static_cast<ValueType*>(scratch_ptr) + team_num * team_size * value_count; - ReferenceType result = ValueInit::init(f, &team_scratch[0]); + typename FunctorAnalysis::Reducer final_reducer(&f); + ReferenceType result = final_reducer.init(&team_scratch[0]); for (int league_id = team_num; league_id < league_size; league_id += num_teams) { typename PolicyType::member_type team( league_id, league_size, team_size, vector_length, scratch_ptr, team_num, shmem_size_L0, shmem_size_L1); - if constexpr (std::is_same<TagType, void>::value) { + if constexpr (std::is_void<TagType>::value) { f(team, result); } else { f(TagType(), team, result); @@ -1127,16 +1141,16 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, i += 2 * tree_neighbor_offset) { ValueType* team_scratch = static_cast<ValueType*>(scratch_ptr); const int team_offset = team_size * value_count; - ValueJoin::join( - f, &team_scratch[i * team_offset], + typename FunctorAnalysis::Reducer final_reducer(&f); + final_reducer.join( + &team_scratch[i * team_offset], &team_scratch[(i + tree_neighbor_offset) * team_offset]); // If `final` is provided by the functor. - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) { - // Do the final only once at the end. - if (tree_neighbor_offset * 2 >= nteams && omp_get_team_num() == 0 && - omp_get_thread_num() == 0) - FunctorFinal<FunctorType, TagType>::final(f, scratch_ptr); + // Do the final only once at the end. + if (tree_neighbor_offset * 2 >= nteams && omp_get_team_num() == 0 && + omp_get_thread_num() == 0) { + final_reducer.final(scratch_ptr); } } tree_neighbor_offset *= 2; @@ -1165,37 +1179,36 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, using WorkTag = typename Policy::work_tag; using Member = typename Policy::member_type; using ReducerTypeFwd = - typename std::conditional<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; using WorkTagFwd = std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, void>; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + Policy, ReducerTypeFwd>; - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - using value_type = typename ValueTraits::value_type; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; bool m_result_ptr_on_device; const int m_result_ptr_num_elems; - static constexpr int HasJoin = ReduceFunctorHasJoin<FunctorType>::value; - static constexpr int UseReducer = is_reducer_type<ReducerType>::value; + static constexpr int HasJoin = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, Policy, + FunctorType>::has_join_member_function; + static constexpr int UseReducer = is_reducer<ReducerType>::value; static constexpr int IsArray = std::is_pointer<reference_type>::value; using ParReduceSpecialize = ParallelReduceSpecialize<FunctorType, Policy, ReducerType, pointer_type, - typename ValueTraits::value_type>; + typename Analysis::value_type>; const FunctorType m_functor; const Policy m_policy; const ReducerType m_reducer; const pointer_type m_result_ptr; - const int m_shmem_size; + const size_t m_shmem_size; public: void execute() const { @@ -1231,12 +1244,11 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, } template <class ViewType> - ParallelReduce( - const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = nullptr) + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = nullptr) : m_result_ptr_on_device( MemorySpaceAccess<Kokkos::Experimental::OpenMPTargetSpace, typename ViewType::memory_space>::accessible), diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp index 40d8c45f5d0f8ce6798079d9eb3deb48a4361122..2399b424f58cf9bf632d64e87b3da2383f8f8270 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp @@ -48,7 +48,6 @@ #include <omp.h> #include <Kokkos_Parallel.hpp> #include <OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> // WORKAROUND OPENMPTARGET: sometimes tile sizes don't make it correctly, // this was tracked down to a bug in clang with regards of mapping structs @@ -68,6 +67,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, using Policy = Kokkos::MDRangePolicy<Traits...>; using WorkTag = typename Policy::work_tag; using Member = typename Policy::member_type; + using Index = typename Policy::index_type; const FunctorType m_functor; const Policy m_policy; @@ -117,21 +117,21 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, } template <int Rank> - inline typename std::enable_if<Rank == 2>::type execute_tile( + inline std::enable_if_t<Rank == 2> execute_tile( typename Policy::point_type offset, const FunctorType& functor, const Policy& policy) const { #ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES (void)offset; - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; #pragma omp target teams distribute parallel for collapse(2) map(to : functor) for (auto i0 = begin_0; i0 < end_0; ++i0) { for (auto i1 = begin_1; i1 < end_1; ++i1) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1); else functor(typename Policy::work_tag(), i0, i1); @@ -149,7 +149,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, #pragma omp for collapse(2) for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1); else functor(typename Policy::work_tag(), i0, i1); @@ -158,24 +158,24 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, } template <int Rank> - inline typename std::enable_if<Rank == 3>::type execute_tile( + inline std::enable_if_t<Rank == 3> execute_tile( typename Policy::point_type offset, const FunctorType& functor, const Policy& policy) const { #ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES (void)offset; - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; - const auto begin_2 = policy.m_lower[2]; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; - const auto end_2 = policy.m_upper[2]; + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; #pragma omp target teams distribute parallel for collapse(3) map(to : functor) for (auto i0 = begin_0; i0 < end_0; ++i0) { for (auto i1 = begin_1; i1 < end_1; ++i1) { for (auto i2 = begin_2; i2 < end_2; ++i2) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1, i2); else functor(typename Policy::work_tag(), i0, i1, i2); @@ -199,7 +199,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1, i2); else functor(typename Policy::work_tag(), i0, i1, i2); @@ -208,27 +208,27 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, } template <int Rank> - inline typename std::enable_if<Rank == 4>::type execute_tile( + inline std::enable_if_t<Rank == 4> execute_tile( typename Policy::point_type offset, const FunctorType& functor, const Policy& policy) const { #ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES (void)offset; - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; - const auto begin_2 = policy.m_lower[2]; - const auto begin_3 = policy.m_lower[3]; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; - const auto end_2 = policy.m_upper[2]; - const auto end_3 = policy.m_upper[3]; + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; #pragma omp target teams distribute parallel for collapse(4) map(to : functor) for (auto i0 = begin_0; i0 < end_0; ++i0) { for (auto i1 = begin_1; i1 < end_1; ++i1) { for (auto i2 = begin_2; i2 < end_2; ++i2) { for (auto i3 = begin_3; i3 < end_3; ++i3) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1, i2, i3); else functor(typename Policy::work_tag(), i0, i1, i2, i3); @@ -258,7 +258,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) for (ptrdiff_t i3 = begin_3; i3 < end_3; ++i3) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1, i2, i3); else functor(typename Policy::work_tag(), i0, i1, i2, i3); @@ -267,22 +267,22 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, } template <int Rank> - inline typename std::enable_if<Rank == 5>::type execute_tile( + inline std::enable_if_t<Rank == 5> execute_tile( typename Policy::point_type offset, const FunctorType& functor, const Policy& policy) const { #ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES (void)offset; - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; - const auto begin_2 = policy.m_lower[2]; - const auto begin_3 = policy.m_lower[3]; - const auto begin_4 = policy.m_lower[4]; - - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; - const auto end_2 = policy.m_upper[2]; - const auto end_3 = policy.m_upper[3]; - const auto end_4 = policy.m_upper[4]; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; #pragma omp target teams distribute parallel for collapse(5) map(to : functor) for (auto i0 = begin_0; i0 < end_0; ++i0) { @@ -337,24 +337,24 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, } template <int Rank> - inline typename std::enable_if<Rank == 6>::type execute_tile( + inline std::enable_if_t<Rank == 6> execute_tile( typename Policy::point_type offset, const FunctorType& functor, const Policy& policy) const { #ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES (void)offset; - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; - const auto begin_2 = policy.m_lower[2]; - const auto begin_3 = policy.m_lower[3]; - const auto begin_4 = policy.m_lower[4]; - const auto begin_5 = policy.m_lower[5]; - - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; - const auto end_2 = policy.m_upper[2]; - const auto end_3 = policy.m_upper[3]; - const auto end_4 = policy.m_upper[4]; - const auto end_5 = policy.m_upper[5]; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + const Index begin_5 = policy.m_lower[5]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + const Index end_5 = policy.m_upper[5]; #pragma omp target teams distribute parallel for collapse(6) map(to : functor) for (auto i0 = begin_0; i0 < end_0; ++i0) { @@ -446,23 +446,24 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, using WorkTag = typename Policy::work_tag; using Member = typename Policy::member_type; + using Index = typename Policy::index_type; using ReducerConditional = std::conditional<std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, - void>; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + Policy, ReducerTypeFwd>; - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - - enum { HasJoin = ReduceFunctorHasJoin<FunctorType>::value }; - enum { UseReducer = is_reducer_type<ReducerType>::value }; + enum { + HasJoin = + Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, Policy, + FunctorType>::has_join_member_function + }; + enum { UseReducer = is_reducer<ReducerType>::value }; const pointer_type m_result_ptr; const FunctorType m_functor; @@ -475,7 +476,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, public: inline void execute() const { - execute_tile<Policy::rank, typename ValueTraits::value_type>( + execute_tile<Policy::rank, typename Analysis::value_type>( m_functor, m_policy, m_result_ptr); } @@ -483,9 +484,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, inline ParallelReduce( const FunctorType& arg_functor, Policy arg_policy, const ViewType& arg_result_view, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void*>::type = NULL) + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = NULL) : m_result_ptr(arg_result_view.data()), m_functor(arg_functor), m_policy(arg_policy), @@ -506,14 +507,14 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, memory_space>::accessible) {} template <int Rank, class ValueType> - inline typename std::enable_if<Rank == 2>::type execute_tile( - const FunctorType& functor, const Policy& policy, - pointer_type ptr) const { - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; + inline std::enable_if_t<Rank == 2> execute_tile(const FunctorType& functor, + const Policy& policy, + pointer_type ptr) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; ValueType result = ValueType(); @@ -531,7 +532,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, : result) for (auto i0 = begin_0; i0 < end_0; ++i0) { for (auto i1 = begin_1; i1 < end_1; ++i1) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1, result); else functor(typename Policy::work_tag(), i0, i1, result); @@ -542,7 +543,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, reduction(+:result) for (auto i0 = begin_0; i0 < end_0; ++i0) { for (auto i1 = begin_1; i1 < end_1; ++i1) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1, result); else functor(typename Policy::work_tag(), i0, i1, result); @@ -555,16 +556,16 @@ reduction(+:result) } template <int Rank, class ValueType> - inline typename std::enable_if<Rank == 3>::type execute_tile( - const FunctorType& functor, const Policy& policy, - pointer_type ptr) const { - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; - const auto begin_2 = policy.m_lower[2]; + inline std::enable_if_t<Rank == 3> execute_tile(const FunctorType& functor, + const Policy& policy, + pointer_type ptr) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; - const auto end_2 = policy.m_upper[2]; + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; ValueType result = ValueType(); @@ -583,7 +584,7 @@ reduction(+:result) for (auto i0 = begin_0; i0 < end_0; ++i0) { for (auto i1 = begin_1; i1 < end_1; ++i1) { for (auto i2 = begin_2; i2 < end_2; ++i2) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1, i2, result); else functor(typename Policy::work_tag(), i0, i1, i2, result); @@ -596,7 +597,7 @@ reduction(+:result) for (auto i0 = begin_0; i0 < end_0; ++i0) { for (auto i1 = begin_1; i1 < end_1; ++i1) { for (auto i2 = begin_2; i2 < end_2; ++i2) { - if constexpr (std::is_same<typename Policy::work_tag, void>::value) + if constexpr (std::is_void<typename Policy::work_tag>::value) functor(i0, i1, i2, result); else functor(typename Policy::work_tag(), i0, i1, i2, result); @@ -610,18 +611,18 @@ reduction(+:result) } template <int Rank, class ValueType> - inline typename std::enable_if<Rank == 4>::type execute_tile( - const FunctorType& functor, const Policy& policy, - pointer_type ptr) const { - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; - const auto begin_2 = policy.m_lower[3]; - const auto begin_3 = policy.m_lower[2]; - - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; - const auto end_2 = policy.m_upper[2]; - const auto end_3 = policy.m_upper[3]; + inline std::enable_if_t<Rank == 4> execute_tile(const FunctorType& functor, + const Policy& policy, + pointer_type ptr) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[3]; + const Index begin_3 = policy.m_lower[2]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; ValueType result = ValueType(); @@ -673,20 +674,20 @@ reduction(+:result) } template <int Rank, class ValueType> - inline typename std::enable_if<Rank == 5>::type execute_tile( - const FunctorType& functor, const Policy& policy, - pointer_type ptr) const { - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; - const auto begin_2 = policy.m_lower[2]; - const auto begin_3 = policy.m_lower[3]; - const auto begin_4 = policy.m_lower[4]; - - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; - const auto end_2 = policy.m_upper[2]; - const auto end_3 = policy.m_upper[3]; - const auto end_4 = policy.m_upper[4]; + inline std::enable_if_t<Rank == 5> execute_tile(const FunctorType& functor, + const Policy& policy, + pointer_type ptr) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; ValueType result = ValueType(); @@ -744,22 +745,22 @@ reduction(+:result) } template <int Rank, class ValueType> - inline typename std::enable_if<Rank == 6>::type execute_tile( - const FunctorType& functor, const Policy& policy, - pointer_type ptr) const { - const auto begin_0 = policy.m_lower[0]; - const auto begin_1 = policy.m_lower[1]; - const auto begin_2 = policy.m_lower[2]; - const auto begin_3 = policy.m_lower[3]; - const auto begin_4 = policy.m_lower[4]; - const auto begin_5 = policy.m_lower[5]; - - const auto end_0 = policy.m_upper[0]; - const auto end_1 = policy.m_upper[1]; - const auto end_2 = policy.m_upper[2]; - const auto end_3 = policy.m_upper[3]; - const auto end_4 = policy.m_upper[4]; - const auto end_5 = policy.m_upper[5]; + inline std::enable_if_t<Rank == 6> execute_tile(const FunctorType& functor, + const Policy& policy, + pointer_type ptr) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + const Index begin_5 = policy.m_lower[5]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + const Index end_5 = policy.m_upper[5]; ValueType result = ValueType(); diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp index a46a64ea640f34d811751981b07e44ee71386230..1ada2b1911af954e7b1fbe2a91a60fdda6530eb4 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> #if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ENABLE_TASKPOLICY) diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp index 48f6b74dc1e280bcf62de6f5cf715acb0643d571..840db4327cb363409e058602885f7b079ae85e31 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Concepts.hpp> #include <SYCL/Kokkos_SYCL_Instance.hpp> #include <Kokkos_SYCL.hpp> @@ -49,6 +53,8 @@ #include <Kokkos_Serial.hpp> #include <Kokkos_Core.hpp> #include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_DeviceManagement.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> namespace { template <typename C> @@ -69,12 +75,6 @@ struct Container { } // namespace namespace Kokkos { - -namespace Impl { -// forward-declaration -int get_gpu(const InitArguments& args); -} // namespace Impl - namespace Experimental { SYCL::SYCL() : m_space_instance(&Impl::SYCLInternal::singleton(), @@ -105,24 +105,22 @@ bool SYCL::impl_is_initialized() { void SYCL::impl_finalize() { Impl::SYCLInternal::singleton().finalize(); } -void SYCL::print_configuration(std::ostream& s, const bool detailed) { - s << "macro KOKKOS_ENABLE_SYCL : defined" << '\n'; - if (detailed) - SYCL::impl_sycl_info(s, m_space_instance->m_queue->get_device()); -} +void SYCL::print_configuration(std::ostream& os, bool verbose) const { + os << "Devices:\n"; + os << " KOKKOS_ENABLE_SYCL: yes\n"; + + os << "\nRuntime Configuration:\n"; -void SYCL::fence() const { - fence("Kokkos::Experimental::SYCL::fence: Unnamed Instance Fence"); + os << "macro KOKKOS_ENABLE_SYCL : defined\n"; + if (verbose) + SYCL::impl_sycl_info(os, m_space_instance->m_queue->get_device()); } + void SYCL::fence(const std::string& name) const { Impl::SYCLInternal::fence(*m_space_instance->m_queue, name, impl_instance_id()); } -void SYCL::impl_static_fence() { - impl_static_fence( - "Kokkos::Experimental::SYCL::fence: Unnamed Instance Fence"); -} void SYCL::impl_static_fence(const std::string& name) { Kokkos::Tools::Experimental::Impl::profile_fence_event< Kokkos::Experimental::SYCL>( @@ -148,27 +146,23 @@ int SYCL::sycl_device() const { return impl_internal_space_instance()->m_syclDev; } -SYCL::SYCLDevice::SYCLDevice(sycl::device d) : m_device(std::move(d)) {} - -SYCL::SYCLDevice::SYCLDevice(const sycl::device_selector& selector) - : m_device(selector.select_device()) {} - -SYCL::SYCLDevice::SYCLDevice(size_t id) { +void SYCL::impl_initialize(InitializationSettings const& settings) { std::vector<sycl::device> gpu_devices = sycl::device::get_devices(sycl::info::device_type::gpu); - if (id >= gpu_devices.size()) { - std::stringstream error_message; - error_message << "Requested GPU with id " << id << " but only " - << gpu_devices.size() << " GPU(s) available!\n"; - Kokkos::Impl::throw_runtime_exception(error_message.str()); + // If the device id is not specified and there are no GPUs, sidestep Kokkos + // device selection and use whatever is available (if no GPU architecture is + // specified). +#if !defined(KOKKOS_ARCH_INTEL_GPU) && !defined(KOKKOS_ARCH_KEPLER) && \ + !defined(KOKKOS_ARCH_MAXWELL) && !defined(KOKKOS_ARCH_PASCAL) && \ + !defined(KOKKOS_ARCH_VOLTA) && !defined(KOKKOS_ARCH_TURING75) && \ + !defined(KOKKOS_ARCH_AMPERE) + if (!settings.has_device_id() && gpu_devices.empty()) { + Impl::SYCLInternal::singleton().initialize(sycl::device()); + return; } - m_device = gpu_devices[id]; -} - -sycl::device SYCL::SYCLDevice::get_device() const { return m_device; } - -void SYCL::impl_initialize(SYCL::SYCLDevice d) { - Impl::SYCLInternal::singleton().initialize(d.get_device()); +#endif + using Kokkos::Impl::get_gpu; + Impl::SYCLInternal::singleton().initialize(gpu_devices[get_gpu(settings)]); } std::ostream& SYCL::impl_sycl_info(std::ostream& os, @@ -262,9 +256,6 @@ std::ostream& SYCL::impl_sycl_info(std::ostream& os, << device.get_info<device::is_linker_available>() << "\nQueue Profiling: " << device.get_info<device::queue_profiling>() - << "\nBuilt In Kernels: " - << Container<std::vector<std::string>>( - device.get_info<device::built_in_kernels>()) << "\nVendor: " << device.get_info<device::vendor>() << "\nProfile: " << device.get_info<device::profile>() << "\nVersion: " << device.get_info<device::version>() @@ -281,54 +272,8 @@ std::ostream& SYCL::impl_sycl_info(std::ostream& os, namespace Impl { int g_sycl_space_factory_initialized = - Kokkos::Impl::initialize_space_factory<SYCLSpaceInitializer>("170_SYCL"); + Kokkos::Impl::initialize_space_factory<SYCL>("170_SYCL"); -void SYCLSpaceInitializer::initialize(const InitArguments& args) { - // If there are no GPUs return whatever else we can run on if no specific GPU - // is requested. - const auto num_gpus = - sycl::device::get_devices(sycl::info::device_type::gpu).size(); - int use_gpu = num_gpus == 0 ? args.device_id : Kokkos::Impl::get_gpu(args); - - if (std::is_same<Kokkos::Experimental::SYCL, - Kokkos::DefaultExecutionSpace>::value || - 0 < use_gpu) { - if (use_gpu > -1) { - Kokkos::Experimental::SYCL::impl_initialize( - Kokkos::Experimental::SYCL::SYCLDevice(use_gpu)); - } else { - Kokkos::Experimental::SYCL::impl_initialize( - Kokkos::Experimental::SYCL::SYCLDevice(sycl::default_selector())); - } - } } - -void SYCLSpaceInitializer::finalize(const bool all_spaces) { - if (std::is_same<Kokkos::Experimental::SYCL, - Kokkos::DefaultExecutionSpace>::value || - all_spaces) { - if (Kokkos::Experimental::SYCL::impl_is_initialized()) - Kokkos::Experimental::SYCL::impl_finalize(); - } -} - -void SYCLSpaceInitializer::fence() { - Kokkos::Experimental::SYCL::impl_static_fence(); -} -void SYCLSpaceInitializer::fence(const std::string& name) { - Kokkos::Experimental::SYCL::impl_static_fence(name); -} - -void SYCLSpaceInitializer::print_configuration(std::ostream& msg, - const bool detail) { - msg << "Devices:" << std::endl; - msg << " KOKKOS_ENABLE_SYCL: "; - msg << "yes" << std::endl; - - msg << "\nRuntime Configuration:" << std::endl; - Experimental::SYCL{}.print_configuration(msg, detail); -} - -} // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp index 0cf5a95d8a6ae31488b5849119f7bb1ef1cb16ad..37721247a73ae8d0b0d3723b424d7833c5b9a3dc 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> //kokkos_malloc namespace Kokkos { @@ -73,8 +77,9 @@ SYCLInternal::~SYCLInternal() { int SYCLInternal::verify_is_initialized(const char* const label) const { if (!is_initialized()) { - std::cerr << "Kokkos::Experimental::SYCL::" << label - << " : ERROR device not initialized" << std::endl; + Kokkos::abort((std::string("Kokkos::Experimental::SYCL::") + label + + " : ERROR device not initialized\n") + .c_str()); } return is_initialized(); } @@ -98,11 +103,7 @@ void SYCLInternal::initialize(const sycl::device& d) { Kokkos::Impl::throw_runtime_exception( "There was an asynchronous SYCL error!\n"); }; - // FIXME_SYCL using an in-order queue here should not be necessary since we - // are using submit_barrier for managing kernel dependencies but this seems to - // be required as a hot fix for now. - initialize( - sycl::queue{d, exception_handler, sycl::property::queue::in_order()}); + initialize(sycl::queue{d, exception_handler}); } // FIXME_SYCL @@ -172,8 +173,8 @@ void SYCLInternal::initialize(const sycl::queue& q) { m_team_scratch_ptr = nullptr; } -void* SYCLInternal::resize_team_scratch_space(std::int64_t bytes, - bool force_shrink) { +sycl::device_ptr<void> SYCLInternal::resize_team_scratch_space( + std::int64_t bytes, bool force_shrink) { if (m_team_scratch_current_size == 0) { m_team_scratch_current_size = bytes; m_team_scratch_ptr = @@ -229,7 +230,7 @@ void SYCLInternal::finalize() { m_queue.reset(); } -void* SYCLInternal::scratch_space(const std::size_t size) { +sycl::device_ptr<void> SYCLInternal::scratch_space(const std::size_t size) { const size_type sizeScratchGrain = sizeof(Kokkos::Experimental::SYCL::size_type); if (verify_is_initialized("scratch_space") && @@ -255,7 +256,7 @@ void* SYCLInternal::scratch_space(const std::size_t size) { return m_scratchSpace; } -void* SYCLInternal::scratch_flags(const std::size_t size) { +sycl::device_ptr<void> SYCLInternal::scratch_flags(const std::size_t size) { const size_type sizeScratchGrain = sizeof(Kokkos::Experimental::SYCL::size_type); if (verify_is_initialized("scratch_flags") && diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp index 45aacd7258a62c8afe8b25c439bfb1f505f093bc..45a7887873e66127315eade103f71d3cde38746f 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp @@ -66,10 +66,10 @@ class SYCLInternal { SYCLInternal& operator=(SYCLInternal&&) = delete; SYCLInternal(SYCLInternal&&) = delete; - void* scratch_space(const std::size_t size); - void* scratch_flags(const std::size_t size); - void* resize_team_scratch_space(std::int64_t bytes, - bool force_shrink = false); + sycl::device_ptr<void> scratch_space(const std::size_t size); + sycl::device_ptr<void> scratch_flags(const std::size_t size); + sycl::device_ptr<void> resize_team_scratch_space(std::int64_t bytes, + bool force_shrink = false); uint32_t impl_get_instance_id() const; int m_syclDev = 0; @@ -78,15 +78,15 @@ class SYCLInternal { uint32_t m_maxConcurrency = 0; uint64_t m_maxShmemPerBlock = 0; - std::size_t m_scratchSpaceCount = 0; - size_type* m_scratchSpace = nullptr; - std::size_t m_scratchFlagsCount = 0; - size_type* m_scratchFlags = nullptr; + std::size_t m_scratchSpaceCount = 0; + sycl::device_ptr<size_type> m_scratchSpace = nullptr; + std::size_t m_scratchFlagsCount = 0; + sycl::device_ptr<size_type> m_scratchFlags = nullptr; // mutex to access shared memory mutable std::mutex m_mutexScratchSpace; - int64_t m_team_scratch_current_size = 0; - void* m_team_scratch_ptr = nullptr; + int64_t m_team_scratch_current_size = 0; + sycl::device_ptr<void> m_team_scratch_ptr = nullptr; mutable std::mutex m_team_scratch_mutex; uint32_t m_instance_id = Kokkos::Tools::Experimental::Impl::idForInstance< diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp index d631c3ba8cb541de259fed51f69d87620f53be5e..cf292f957ca9113ac594839cd351819f7e1dbc23 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp @@ -56,7 +56,7 @@ struct FunctorWrapperRangePolicyParallelFor { void operator()(sycl::item<1> item) const { const typename Policy::index_type id = item.get_linear_id() + m_begin; - if constexpr (std::is_same<WorkTag, void>::value) + if constexpr (std::is_void<WorkTag>::value) m_functor_wrapper.get_functor()(id); else m_functor_wrapper.get_functor()(WorkTag(), id); @@ -87,9 +87,7 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, const sycl::event& memcpy_event) { // Convenience references const Kokkos::Experimental::SYCL& space = policy.space(); - Kokkos::Experimental::Impl::SYCLInternal& instance = - *space.impl_internal_space_instance(); - sycl::queue& q = *instance.m_queue; + sycl::queue& q = space.sycl_queue(); auto parallel_for_event = q.submit([&](sycl::handler& cgh) { FunctorWrapperRangePolicyParallelFor<Functor, Policy> f{policy.begin(), @@ -223,9 +221,7 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, sycl::event sycl_direct_launch(const FunctorWrapper& functor_wrapper, const sycl::event& memcpy_event) const { // Convenience references - Kokkos::Experimental::Impl::SYCLInternal& instance = - *m_space.impl_internal_space_instance(); - sycl::queue& q = *instance.m_queue; + sycl::queue& q = m_space.sycl_queue(); if (m_policy.m_num_tiles == 0) return {}; diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp index eca6f311114a71e4142484234572dd7a8d4e0721..e980a82a580a1284244328884856de8b8765bced 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp @@ -58,20 +58,18 @@ namespace Kokkos { namespace Impl { -template <class FunctorValueTraits> +template <class ReducerType> inline constexpr bool use_shuffle_based_algorithm = - FunctorValueTraits::StaticValueSize > 0; + std::is_reference_v<typename ReducerType::reference_type>; namespace SYCLReduction { -template <class ValueJoin, class ValueOps, typename WorkTag, typename ValueType, - typename ReducerType, typename FunctorType, int dim> -std::enable_if_t< - !use_shuffle_based_algorithm<FunctorValueTraits<ReducerType, WorkTag>>> -workgroup_reduction( +template <typename ValueType, typename ReducerType, int dim> +std::enable_if_t<!use_shuffle_based_algorithm<ReducerType>> workgroup_reduction( sycl::nd_item<dim>& item, sycl::local_ptr<ValueType> local_mem, - ValueType* results_ptr, ValueType* device_accessible_result_ptr, - const unsigned int value_count, const ReducerType& selected_reducer, - const FunctorType& functor, bool final, unsigned int max_size) { + sycl::device_ptr<ValueType> results_ptr, + sycl::global_ptr<ValueType> device_accessible_result_ptr, + const unsigned int value_count, const ReducerType& final_reducer, + bool final, unsigned int max_size) { const auto local_id = item.get_local_linear_id(); // Perform the actual workgroup reduction in each subgroup @@ -85,8 +83,7 @@ workgroup_reduction( std::min(local_range - id_in_sg, max_size - local_id); for (unsigned int stride = 1; stride < local_range; stride <<= 1) { if (stride < upper_stride_bound) - ValueJoin::join(selected_reducer, result, - &local_mem[(local_id + stride) * value_count]); + final_reducer.join(result, &local_mem[(local_id + stride) * value_count]); sycl::group_barrier(sg); } sycl::group_barrier(item.get_group()); @@ -94,8 +91,7 @@ workgroup_reduction( // Copy the subgroup results into the first positions of the // reduction array. if (id_in_sg == 0) - ValueOps::copy(functor, &local_mem[sg.get_group_id()[0] * value_count], - result); + final_reducer.copy(&local_mem[sg.get_group_id()[0] * value_count], result); sycl::group_barrier(item.get_group()); // Do the final reduction only using the first subgroup. @@ -108,15 +104,15 @@ workgroup_reduction( for (unsigned int offset = local_range; offset < n_subgroups; offset += local_range) if (id_in_sg + offset < n_subgroups) - ValueJoin::join(selected_reducer, result_, - &local_mem[(id_in_sg + offset) * value_count]); + final_reducer.join(result_, + &local_mem[(id_in_sg + offset) * value_count]); sycl::group_barrier(sg); // Then, we proceed as before. for (unsigned int stride = 1; stride < local_range; stride <<= 1) { if (id_in_sg + stride < n_subgroups) - ValueJoin::join(selected_reducer, result_, - &local_mem[(id_in_sg + stride) * value_count]); + final_reducer.join(result_, + &local_mem[(id_in_sg + stride) * value_count]); sycl::group_barrier(sg); } @@ -126,32 +122,25 @@ workgroup_reduction( // final() if necessary. if (id_in_sg == 0) { if (final) { - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, WorkTag>::final(functor, &local_mem[0]); + final_reducer.final(&local_mem[0]); if (device_accessible_result_ptr != nullptr) - ValueOps::copy(functor, &device_accessible_result_ptr[0], - &local_mem[0]); + final_reducer.copy(&device_accessible_result_ptr[0], &local_mem[0]); else - ValueOps::copy(functor, &results_ptr[0], &local_mem[0]); + final_reducer.copy(&results_ptr[0], &local_mem[0]); } else - ValueOps::copy(functor, - &results_ptr[(item.get_group_linear_id()) * value_count], - &local_mem[0]); + final_reducer.copy( + &results_ptr[(item.get_group_linear_id()) * value_count], + &local_mem[0]); } } } -template <class ValueJoin, typename WorkTag, typename ValueType, - typename ReducerType, typename FunctorType, int dim> -std::enable_if_t< - use_shuffle_based_algorithm<FunctorValueTraits<ReducerType, WorkTag>>> -workgroup_reduction(sycl::nd_item<dim>& item, - sycl::local_ptr<ValueType> local_mem, ValueType local_value, - ValueType* results_ptr, - ValueType* device_accessible_result_ptr, - const ReducerType& selected_reducer, - const FunctorType& functor, bool final, - unsigned int max_size) { +template <typename ValueType, typename ReducerType, int dim> +std::enable_if_t<use_shuffle_based_algorithm<ReducerType>> workgroup_reduction( + sycl::nd_item<dim>& item, sycl::local_ptr<ValueType> local_mem, + ValueType local_value, sycl::device_ptr<ValueType> results_ptr, + sycl::global_ptr<ValueType> device_accessible_result_ptr, + const ReducerType& final_reducer, bool final, unsigned int max_size) { const auto local_id = item.get_local_linear_id(); // Perform the actual workgroup reduction in each subgroup @@ -164,8 +153,7 @@ workgroup_reduction(sycl::nd_item<dim>& item, std::min(local_range - id_in_sg, max_size - local_id); for (unsigned int stride = 1; stride < local_range; stride <<= 1) { auto tmp = sg.shuffle_down(local_value, stride); - if (stride < upper_stride_bound) - ValueJoin::join(selected_reducer, &local_value, &tmp); + if (stride < upper_stride_bound) final_reducer.join(&local_value, &tmp); } // Copy the subgroup results into the first positions of the @@ -188,8 +176,7 @@ workgroup_reduction(sycl::nd_item<dim>& item, for (unsigned int offset = local_range; offset < n_active_subgroups; offset += local_range) if (id_in_sg + offset < n_active_subgroups) { - ValueJoin::join(selected_reducer, &sg_value, - &local_mem[(id_in_sg + offset)]); + final_reducer.join(&sg_value, &local_mem[(id_in_sg + offset)]); } sg.barrier(); } @@ -198,7 +185,7 @@ workgroup_reduction(sycl::nd_item<dim>& item, for (unsigned int stride = 1; stride < local_range; stride <<= 1) { auto tmp = sg.shuffle_down(sg_value, stride); if (id_in_sg + stride < n_active_subgroups) - ValueJoin::join(selected_reducer, &sg_value, &tmp); + final_reducer.join(&sg_value, &tmp); } // Finally, we copy the workgroup results back to global memory @@ -207,8 +194,7 @@ workgroup_reduction(sycl::nd_item<dim>& item, // final() if necessary. if (id_in_sg == 0) { if (final) { - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, WorkTag>::final(functor, &sg_value); + final_reducer.final(&sg_value); if (device_accessible_result_ptr != nullptr) device_accessible_result_ptr[0] = sg_value; else @@ -228,8 +214,12 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, using Policy = Kokkos::RangePolicy<Traits...>; private: + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; using execution_space = typename Analysis::execution_space; using value_type = typename Analysis::value_type; using pointer_type = typename Analysis::pointer_type; @@ -240,9 +230,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, public: // V - View template <typename V> - ParallelReduce( - const FunctorType& f, const Policy& p, const V& v, - typename std::enable_if<Kokkos::is_view<V>::value, void*>::type = nullptr) + ParallelReduce(const FunctorType& f, const Policy& p, const V& v, + std::enable_if_t<Kokkos::is_view<V>::value, void*> = nullptr) : m_functor(f), m_policy(p), m_result_ptr(v.data()), @@ -272,24 +261,11 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, const PolicyType& policy, const FunctorWrapper& functor_wrapper, const ReducerWrapper& reducer_wrapper, const std::vector<sycl::event>& memcpy_events) const { - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>; - using ValueInit = - Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = - Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; - // Convenience references const Kokkos::Experimental::SYCL& space = policy.space(); Kokkos::Experimental::Impl::SYCLInternal& instance = *space.impl_internal_space_instance(); - sycl::queue& q = *instance.m_queue; + sycl::queue& q = space.sycl_queue(); // FIXME_SYCL optimize constexpr size_t wgroup_size = 128; @@ -300,13 +276,13 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, wgroup_size, 1); const unsigned int value_count = - FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count( - ReducerConditional::select(m_functor, m_reducer)); - const auto results_ptr = static_cast<pointer_type>(instance.scratch_space( - sizeof(value_type) * std::max(value_count, 1u) * init_size)); - value_type* device_accessible_result_ptr = + Analysis::value_count(ReducerConditional::select(m_functor, m_reducer)); + const auto results_ptr = + static_cast<sycl::device_ptr<value_type>>(instance.scratch_space( + sizeof(value_type) * std::max(value_count, 1u) * init_size)); + sycl::global_ptr<value_type> device_accessible_result_ptr = m_result_ptr_device_accessible ? m_result_ptr : nullptr; - auto scratch_flags = static_cast<unsigned int*>( + auto scratch_flags = static_cast<sycl::device_ptr<unsigned int>>( instance.scratch_flags(sizeof(unsigned int))); sycl::event last_reduction_event; @@ -323,20 +299,18 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, const auto& selected_reducer = ReducerConditional::select( static_cast<const FunctorType&>(functor), static_cast<const ReducerType&>(reducer_wrapper.get_functor())); - reference_type update = - ValueInit::init(selected_reducer, results_ptr); + typename Analysis::Reducer final_reducer(&selected_reducer); + reference_type update = final_reducer.init(results_ptr); if (size == 1) { - if constexpr (std::is_same<WorkTag, void>::value) + if constexpr (std::is_void<WorkTag>::value) functor(begin, update); else functor(WorkTag(), begin, update); } - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, WorkTag>::final( - static_cast<const FunctorType&>(functor), results_ptr); + final_reducer.final(results_ptr); if (device_accessible_result_ptr != nullptr) - ValueOps::copy(functor, &device_accessible_result_ptr[0], - &results_ptr[0]); + final_reducer.copy(device_accessible_result_ptr.get(), + results_ptr.get()); }); }); q.ext_oneapi_submit_barrier( @@ -377,107 +351,93 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, static_cast<const FunctorType&>(functor), static_cast<const ReducerType&>( reducer_wrapper.get_functor())); + typename Analysis::Reducer final_reducer(&selected_reducer); using index_type = typename Policy::index_type; const auto upper_bound = std::min<index_type>( global_id + values_per_thread * wgroup_size, size); - if constexpr (FunctorValueTraits<ReducerTypeFwd, - WorkTagFwd>::StaticValueSize == - 0) { - reference_type update = ValueInit::init( - selected_reducer, &local_mem[local_id * value_count]); + if constexpr (Analysis::StaticValueSize == 0) { + reference_type update = + final_reducer.init(&local_mem[local_id * value_count]); for (index_type id = global_id; id < upper_bound; id += wgroup_size) { - if constexpr (std::is_same<WorkTag, void>::value) + if constexpr (std::is_void<WorkTag>::value) functor(id + begin, update); else functor(WorkTag(), id + begin, update); } item.barrier(sycl::access::fence_space::local_space); - SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, - WorkTag>( + SYCLReduction::workgroup_reduction<>( item, local_mem.get_pointer(), results_ptr, - device_accessible_result_ptr, value_count, selected_reducer, - static_cast<const FunctorType&>(functor), false, - std::min(size, wgroup_size)); + device_accessible_result_ptr, value_count, final_reducer, + false, std::min(size, wgroup_size)); if (local_id == 0) { - sycl::ext::oneapi::atomic_ref< - unsigned, sycl::ext::oneapi::memory_order::relaxed, - sycl::ext::oneapi::memory_scope::device, - sycl::access::address_space::global_space> + sycl::atomic_ref<unsigned, sycl::memory_order::relaxed, + sycl::memory_scope::device, + sycl::access::address_space::global_space> scratch_flags_ref(*scratch_flags); num_teams_done[0] = ++scratch_flags_ref; } item.barrier(sycl::access::fence_space::local_space); if (num_teams_done[0] == n_wgroups) { if (local_id >= n_wgroups) - ValueInit::init(selected_reducer, - &local_mem[local_id * value_count]); + final_reducer.init(&local_mem[local_id * value_count]); else { - ValueOps::copy(functor, &local_mem[local_id * value_count], - &results_ptr[local_id * value_count]); + final_reducer.copy(&local_mem[local_id * value_count], + &results_ptr[local_id * value_count]); for (unsigned int id = local_id + wgroup_size; id < n_wgroups; id += wgroup_size) { - ValueJoin::join(selected_reducer, - &local_mem[local_id * value_count], - &results_ptr[id * value_count]); + final_reducer.join(&local_mem[local_id * value_count], + &results_ptr[id * value_count]); } } - SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, - WorkTag>( + SYCLReduction::workgroup_reduction<>( item, local_mem.get_pointer(), results_ptr, - device_accessible_result_ptr, value_count, - selected_reducer, - static_cast<const FunctorType&>(functor), true, - std::min(n_wgroups, wgroup_size)); + device_accessible_result_ptr, value_count, final_reducer, + true, std::min(n_wgroups, wgroup_size)); } } else { value_type local_value; - reference_type update = - ValueInit::init(selected_reducer, &local_value); + reference_type update = final_reducer.init(&local_value); for (index_type id = global_id; id < upper_bound; id += wgroup_size) { - if constexpr (std::is_same<WorkTag, void>::value) + if constexpr (std::is_void<WorkTag>::value) functor(id + begin, update); else functor(WorkTag(), id + begin, update); } - SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>( + SYCLReduction::workgroup_reduction<>( item, local_mem.get_pointer(), local_value, results_ptr, - device_accessible_result_ptr, selected_reducer, - static_cast<const FunctorType&>(functor), false, + device_accessible_result_ptr, final_reducer, false, std::min(size, wgroup_size)); if (local_id == 0) { - sycl::ext::oneapi::atomic_ref< - unsigned, sycl::ext::oneapi::memory_order::relaxed, - sycl::ext::oneapi::memory_scope::device, - sycl::access::address_space::global_space> + sycl::atomic_ref<unsigned, sycl::memory_order::relaxed, + sycl::memory_scope::device, + sycl::access::address_space::global_space> scratch_flags_ref(*scratch_flags); num_teams_done[0] = ++scratch_flags_ref; } item.barrier(sycl::access::fence_space::local_space); if (num_teams_done[0] == n_wgroups) { if (local_id >= n_wgroups) - ValueInit::init(selected_reducer, &local_value); + final_reducer.init(&local_value); else { local_value = results_ptr[local_id]; for (unsigned int id = local_id + wgroup_size; id < n_wgroups; id += wgroup_size) { - ValueJoin::join(selected_reducer, &local_value, - &results_ptr[id]); + final_reducer.join(&local_value, &results_ptr[id]); } } - SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>( + SYCLReduction::workgroup_reduction<>( item, local_mem.get_pointer(), local_value, results_ptr, - device_accessible_result_ptr, selected_reducer, - static_cast<const FunctorType&>(functor), true, + device_accessible_result_ptr, final_reducer, true, std::min(n_wgroups, wgroup_size)); } } @@ -495,9 +455,6 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, Kokkos::Experimental::SYCLDeviceUSMSpace>( space, m_result_ptr, results_ptr, sizeof(*m_result_ptr) * value_count); - space.fence( - "Kokkos::Impl::ParallelReduce::sycl_direct_launch: fence due to " - "inaccessible reducer result location"); } return last_reduction_event; @@ -543,8 +500,12 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, using Policy = Kokkos::MDRangePolicy<Traits...>; private: + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; using execution_space = typename Analysis::execution_space; using value_type = typename Analysis::value_type; using pointer_type = typename Analysis::pointer_type; @@ -578,9 +539,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, public: // V - View template <typename V> - ParallelReduce( - const FunctorType& f, const Policy& p, const V& v, - typename std::enable_if<Kokkos::is_view<V>::value, void*>::type = nullptr) + ParallelReduce(const FunctorType& f, const Policy& p, const V& v, + std::enable_if_t<Kokkos::is_view<V>::value, void*> = nullptr) : m_functor(f), m_policy(p), m_space(p.space()), @@ -612,23 +572,10 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, const PolicyType& policy, const FunctorWrapper& functor_wrapper, const ReducerWrapper& reducer_wrapper, const std::vector<sycl::event>& memcpy_events) const { - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>; - using ValueInit = - Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = - Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; - // Convenience references Kokkos::Experimental::Impl::SYCLInternal& instance = *m_space.impl_internal_space_instance(); - sycl::queue& q = *instance.m_queue; + sycl::queue& q = m_space.sycl_queue(); const typename Policy::index_type nwork = m_policy.m_num_tiles; const typename Policy::index_type block_size = @@ -644,14 +591,13 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, const auto init_size = std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1); const unsigned int value_count = - FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count( - ReducerConditional::select(m_functor, m_reducer)); - // FIXME_SYCL only use the first half - const auto results_ptr = static_cast<pointer_type>(instance.scratch_space( - sizeof(value_type) * std::max(value_count, 1u) * init_size)); - value_type* device_accessible_result_ptr = + Analysis::value_count(ReducerConditional::select(m_functor, m_reducer)); + const auto results_ptr = + static_cast<sycl::device_ptr<value_type>>(instance.scratch_space( + sizeof(value_type) * std::max(value_count, 1u) * init_size)); + sycl::global_ptr<value_type> device_accessible_result_ptr = m_result_ptr_device_accessible ? m_result_ptr : nullptr; - auto scratch_flags = static_cast<unsigned int*>( + auto scratch_flags = static_cast<sycl::device_ptr<unsigned int>>( instance.scratch_flags(sizeof(unsigned int))); sycl::event last_reduction_event; @@ -667,8 +613,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, const auto& selected_reducer = ReducerConditional::select( static_cast<const FunctorType&>(functor), static_cast<const ReducerType&>(reducer_wrapper.get_functor())); - reference_type update = - ValueInit::init(selected_reducer, results_ptr); + typename Analysis::Reducer final_reducer(&selected_reducer); + + reference_type update = final_reducer.init(results_ptr); if (size == 1) { Kokkos::Impl::Reduce::DeviceIterateTile< Policy::rank, BarePolicy, FunctorType, @@ -676,12 +623,10 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, policy, functor, update, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}) .exec_range(); } - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, WorkTag>::final( - static_cast<const FunctorType&>(functor), results_ptr); + final_reducer.final(results_ptr); if (device_accessible_result_ptr) - ValueOps::copy(functor, &device_accessible_result_ptr[0], - &results_ptr[0]); + final_reducer.copy(device_accessible_result_ptr.get(), + results_ptr.get()); }); }); q.ext_oneapi_submit_barrier( @@ -714,6 +659,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, const auto& selected_reducer = ReducerConditional::select( static_cast<const FunctorType&>(functor), static_cast<const ReducerType&>(reducer_wrapper.get_functor())); + typename Analysis::Reducer final_reducer(&selected_reducer); // In the first iteration, we call functor to initialize the local // memory. Otherwise, the local memory is initialized with the @@ -732,10 +678,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, const index_type n_global_y = 1; const index_type n_global_z = 1; - if constexpr (FunctorValueTraits<ReducerTypeFwd, - WorkTagFwd>::StaticValueSize == 0) { - reference_type update = ValueInit::init( - selected_reducer, &local_mem[local_id * value_count]); + if constexpr (Analysis::StaticValueSize == 0) { + reference_type update = + final_reducer.init(&local_mem[local_id * value_count]); Kokkos::Impl::Reduce::DeviceIterateTile< Policy::rank, BarePolicy, FunctorType, @@ -746,46 +691,40 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, .exec_range(); item.barrier(sycl::access::fence_space::local_space); - SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>( + SYCLReduction::workgroup_reduction<>( item, local_mem.get_pointer(), results_ptr, - device_accessible_result_ptr, value_count, selected_reducer, - static_cast<const FunctorType&>(functor), false, + device_accessible_result_ptr, value_count, final_reducer, false, std::min(size, wgroup_size)); if (local_id == 0) { - sycl::ext::oneapi::atomic_ref< - unsigned, sycl::ext::oneapi::memory_order::relaxed, - sycl::ext::oneapi::memory_scope::device, - sycl::access::address_space::global_space> + sycl::atomic_ref<unsigned, sycl::memory_order::relaxed, + sycl::memory_scope::device, + sycl::access::address_space::global_space> scratch_flags_ref(*scratch_flags); num_teams_done[0] = ++scratch_flags_ref; } item.barrier(sycl::access::fence_space::local_space); if (num_teams_done[0] == n_wgroups) { if (local_id >= n_wgroups) - ValueInit::init(selected_reducer, - &local_mem[local_id * value_count]); + final_reducer.init(&local_mem[local_id * value_count]); else { - ValueOps::copy(functor, &local_mem[local_id * value_count], - &results_ptr[local_id * value_count]); + final_reducer.copy(&local_mem[local_id * value_count], + &results_ptr[local_id * value_count]); for (unsigned int id = local_id + wgroup_size; id < n_wgroups; id += wgroup_size) { - ValueJoin::join(selected_reducer, - &local_mem[local_id * value_count], - &results_ptr[id * value_count]); + final_reducer.join(&local_mem[local_id * value_count], + &results_ptr[id * value_count]); } } - SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>( + SYCLReduction::workgroup_reduction<>( item, local_mem.get_pointer(), results_ptr, - device_accessible_result_ptr, value_count, selected_reducer, - static_cast<const FunctorType&>(functor), true, - std::min(n_wgroups, wgroup_size)); + device_accessible_result_ptr, value_count, final_reducer, + true, std::min(n_wgroups, wgroup_size)); } } else { value_type local_value; - reference_type update = - ValueInit::init(selected_reducer, &local_value); + reference_type update = final_reducer.init(&local_value); Kokkos::Impl::Reduce::DeviceIterateTile< Policy::rank, BarePolicy, FunctorType, @@ -795,37 +734,33 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, {global_x, global_y, global_z}, {local_x, local_y, local_z}) .exec_range(); - SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>( + SYCLReduction::workgroup_reduction<>( item, local_mem.get_pointer(), local_value, results_ptr, - device_accessible_result_ptr, selected_reducer, - static_cast<const FunctorType&>(functor), false, + device_accessible_result_ptr, final_reducer, false, std::min(size, wgroup_size)); if (local_id == 0) { - sycl::ext::oneapi::atomic_ref< - unsigned, sycl::ext::oneapi::memory_order::relaxed, - sycl::ext::oneapi::memory_scope::device, - sycl::access::address_space::global_space> + sycl::atomic_ref<unsigned, sycl::memory_order::relaxed, + sycl::memory_scope::device, + sycl::access::address_space::global_space> scratch_flags_ref(*scratch_flags); num_teams_done[0] = ++scratch_flags_ref; } item.barrier(sycl::access::fence_space::local_space); if (num_teams_done[0] == n_wgroups) { if (local_id >= n_wgroups) - ValueInit::init(selected_reducer, &local_value); + final_reducer.init(&local_value); else { local_value = results_ptr[local_id]; for (unsigned int id = local_id + wgroup_size; id < n_wgroups; id += wgroup_size) { - ValueJoin::join(selected_reducer, &local_value, - &results_ptr[id]); + final_reducer.join(&local_value, &results_ptr[id]); } } - SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>( + SYCLReduction::workgroup_reduction<>( item, local_mem.get_pointer(), local_value, results_ptr, - device_accessible_result_ptr, selected_reducer, - static_cast<const FunctorType&>(functor), true, + device_accessible_result_ptr, final_reducer, true, std::min(n_wgroups, wgroup_size)); } } @@ -843,9 +778,6 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, Kokkos::Experimental::SYCLDeviceUSMSpace>( m_space, m_result_ptr, results_ptr, sizeof(*m_result_ptr) * value_count); - m_space.fence( - "Kokkos::Impl::ParallelReduce::sycl_direct_launch: fence after deep " - "copying results back"); } return last_reduction_event; diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp index e5992956267f96f816d558621614ecaa6864089e..e2afc9783961cf24bfa6677e7e44a1899b218884 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp @@ -57,9 +57,8 @@ namespace Impl { // At the end of this function, the subgroup scans are stored in the local array // such that the last value (at position n_active_subgroups-1) contains the // total sum. -template <class ValueJoin, class ValueInit, int dim, typename ValueType, - typename FunctorType> -void workgroup_scan(sycl::nd_item<dim> item, const FunctorType& functor, +template <int dim, typename ValueType, typename FunctorType> +void workgroup_scan(sycl::nd_item<dim> item, const FunctorType& final_reducer, sycl::local_ptr<ValueType> local_mem, ValueType& local_value, unsigned int global_range) { // subgroup scans @@ -68,7 +67,7 @@ void workgroup_scan(sycl::nd_item<dim> item, const FunctorType& functor, const auto id_in_sg = sg.get_local_id()[0]; for (unsigned int stride = 1; stride < global_range; stride <<= 1) { auto tmp = sg.shuffle_up(local_value, stride); - if (id_in_sg >= stride) ValueJoin::join(functor, &local_value, &tmp); + if (id_in_sg >= stride) final_reducer.join(&local_value, &tmp); } const auto max_subgroup_size = sg.get_max_local_range()[0]; @@ -79,7 +78,7 @@ void workgroup_scan(sycl::nd_item<dim> item, const FunctorType& functor, if (id_in_sg == local_range - 1 && sg_group_id < n_active_subgroups) local_mem[sg_group_id] = local_value; local_value = sg.shuffle_up(local_value, 1); - if (id_in_sg == 0) ValueInit::init(functor, &local_value); + if (id_in_sg == 0) final_reducer.init(&local_value); sycl::group_barrier(item.get_group()); // scan subgroup results using the first subgroup @@ -96,7 +95,7 @@ void workgroup_scan(sycl::nd_item<dim> item, const FunctorType& functor, auto tmp = sg.shuffle_up(local_sg_value, stride); if (id_in_sg >= stride) { if (idx < n_active_subgroups) - ValueJoin::join(functor, &local_sg_value, &tmp); + final_reducer.join(&local_sg_value, &tmp); else local_sg_value = tmp; } @@ -104,8 +103,8 @@ void workgroup_scan(sycl::nd_item<dim> item, const FunctorType& functor, if (idx < n_active_subgroups) { local_mem[idx] = local_sg_value; if (round > 0) - ValueJoin::join(functor, &local_mem[idx], - &local_mem[round * local_range - 1]); + final_reducer.join(&local_mem[idx], + &local_mem[round * local_range - 1]); } if (round + 1 < n_rounds) sycl::group_barrier(sg); } @@ -115,7 +114,7 @@ void workgroup_scan(sycl::nd_item<dim> item, const FunctorType& functor, // add results to all subgroups if (sg_group_id > 0) - ValueJoin::join(functor, &local_value, &local_mem[sg_group_id - 1]); + final_reducer.join(&local_value, &local_mem[sg_group_id - 1]); } template <class FunctorType, class... Traits> @@ -129,14 +128,12 @@ class ParallelScanSYCLBase { using WorkRange = typename Policy::WorkRange; using LaunchBounds = typename Policy::launch_bounds; - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, WorkTag>; - public: - using pointer_type = typename ValueTraits::pointer_type; - using value_type = typename ValueTraits::value_type; - using reference_type = typename ValueTraits::reference_type; + using Analysis = + FunctorAnalysis<FunctorPatternInterface::SCAN, Policy, FunctorType>; + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; using functor_type = FunctorType; using size_type = Kokkos::Experimental::SYCL::size_type; using index_type = typename Policy::index_type; @@ -174,6 +171,9 @@ class ParallelScanSYCLBase { cgh.parallel_for( sycl::nd_range<1>(n_wgroups * wgroup_size, wgroup_size), [=](sycl::nd_item<1> item) { + const FunctorType& functor = functor_wrapper.get_functor(); + typename Analysis::Reducer final_reducer(&functor); + const auto local_id = item.get_local_linear_id(); const auto global_id = item.get_global_linear_id(); @@ -182,11 +182,10 @@ class ParallelScanSYCLBase { if (global_id < size) local_value = global_mem[global_id]; else - ValueInit::init(functor_wrapper.get_functor(), &local_value); + final_reducer.init(&local_value); - workgroup_scan<ValueJoin, ValueInit>( - item, functor_wrapper.get_functor(), local_mem.get_pointer(), - local_value, wgroup_size); + workgroup_scan<>(item, final_reducer, local_mem.get_pointer(), + local_value, wgroup_size); if (n_wgroups > 1 && local_id == wgroup_size - 1) group_results[item.get_group_linear_id()] = @@ -204,11 +203,12 @@ class ParallelScanSYCLBase { cgh.parallel_for( sycl::nd_range<1>(n_wgroups * wgroup_size, wgroup_size), [=](sycl::nd_item<1> item) { - const auto global_id = item.get_global_linear_id(); + const auto global_id = item.get_global_linear_id(); + const FunctorType& functor = functor_wrapper.get_functor(); + typename Analysis::Reducer final_reducer(&functor); if (global_id < size) - ValueJoin::join(functor_wrapper.get_functor(), - &global_mem[global_id], - &group_results[item.get_group_linear_id()]); + final_reducer.join(&global_mem[global_id], + &group_results[item.get_group_linear_id()]); }); }); q.ext_oneapi_submit_barrier( @@ -221,9 +221,7 @@ class ParallelScanSYCLBase { sycl::event memcpy_event) const { // Convenience references const Kokkos::Experimental::SYCL& space = m_policy.space(); - Kokkos::Experimental::Impl::SYCLInternal& instance = - *space.impl_internal_space_instance(); - sycl::queue& q = *instance.m_queue; + sycl::queue& q = space.sycl_queue(); const std::size_t len = m_policy.end() - m_policy.begin(); @@ -236,9 +234,12 @@ class ParallelScanSYCLBase { cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) { const typename Policy::index_type id = static_cast<typename Policy::index_type>(item.get_id()) + begin; + const FunctorType& functor = functor_wrapper.get_functor(); + typename Analysis::Reducer final_reducer(&functor); + value_type update{}; - ValueInit::init(functor_wrapper.get_functor(), &update); - if constexpr (std::is_same<WorkTag, void>::value) + final_reducer.init(&update); + if constexpr (std::is_void<WorkTag>::value) functor_wrapper.get_functor()(id, update, false); else functor_wrapper.get_functor()(WorkTag(), id, update, false); @@ -258,7 +259,7 @@ class ParallelScanSYCLBase { auto global_id = item.get_id(0); value_type update = global_mem[global_id]; - if constexpr (std::is_same<WorkTag, void>::value) + if constexpr (std::is_void<WorkTag>::value) functor_wrapper.get_functor()(global_id, update, true); else functor_wrapper.get_functor()(WorkTag(), global_id, update, true); @@ -297,8 +298,8 @@ class ParallelScanSYCLBase { // FIXME_SYCL consider only storing one value per block and recreate initial // results in the end before doing the final pass - m_scratch_space = - static_cast<pointer_type>(instance.scratch_space(total_memory)); + m_scratch_space = static_cast<sycl::device_ptr<value_type>>( + instance.scratch_space(total_memory)); Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -346,14 +347,16 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, using Base = ParallelScanSYCLBase<FunctorType, Traits...>; ReturnType& m_returnvalue; + const Kokkos::Experimental::SYCL& m_exec; inline void execute() { Base::impl_execute([&]() { const long long nwork = Base::m_policy.end() - Base::m_policy.begin(); if (nwork > 0) { - const int size = Base::ValueTraits::value_size(Base::m_functor); - DeepCopy<HostSpace, Kokkos::Experimental::SYCLDeviceUSMSpace>( - &m_returnvalue, Base::m_scratch_space + nwork - 1, size); + const int size = Base::Analysis::value_size(Base::m_functor); + DeepCopy<HostSpace, Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL>( + m_exec, &m_returnvalue, Base::m_scratch_space + nwork - 1, size); } }); } @@ -361,7 +364,9 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, ParallelScanWithTotal(const FunctorType& arg_functor, const typename Base::Policy& arg_policy, ReturnType& arg_returnvalue) - : Base(arg_functor, arg_policy), m_returnvalue(arg_returnvalue) {} + : Base(arg_functor, arg_policy), + m_returnvalue(arg_returnvalue), + m_exec(arg_policy.space()) {} }; } // namespace Impl diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp index bf37dcb26f3b99b92ab989168c8c0200a29d809b..5ac7d8af3089f0acd4a3923b4811ae0aec080472 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp @@ -70,8 +70,8 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...> int m_league_size; int m_team_size; int m_vector_length; - int m_team_scratch_size[2]; - int m_thread_scratch_size[2]; + size_t m_team_scratch_size[2]; + size_t m_thread_scratch_size[2]; int m_chunk_size; bool m_tune_team_size; bool m_tune_vector_length; @@ -172,15 +172,17 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...> int league_size() const { return m_league_size; } - int scratch_size(int level, int team_size_ = -1) const { + size_t scratch_size(int level, int team_size_ = -1) const { if (team_size_ < 0) team_size_ = m_team_size; return m_team_scratch_size[level] + team_size_ * m_thread_scratch_size[level]; } - int team_scratch_size(int level) const { return m_team_scratch_size[level]; } + size_t team_scratch_size(int level) const { + return m_team_scratch_size[level]; + } - int thread_scratch_size(int level) const { + size_t thread_scratch_size(int level) const { return m_thread_scratch_size[level]; } @@ -408,8 +410,8 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, size_type const m_vector_size; int m_shmem_begin; int m_shmem_size; - char* m_scratch_ptr[2]; - int m_scratch_size[2]; + sycl::device_ptr<char> m_global_scratch_ptr; + size_t m_scratch_size[2]; // Only let one ParallelFor/Reduce modify the team scratch memory. The // constructor acquires the mutex which is released in the destructor. std::scoped_lock<std::mutex> m_scratch_lock; @@ -420,9 +422,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, const sycl::event& memcpy_events) const { // Convenience references const Kokkos::Experimental::SYCL& space = policy.space(); - Kokkos::Experimental::Impl::SYCLInternal& instance = - *space.impl_internal_space_instance(); - sycl::queue& q = *instance.m_queue; + sycl::queue& q = space.sycl_queue(); auto parallel_for_event = q.submit([&](sycl::handler& cgh) { // FIXME_SYCL accessors seem to need a size greater than zero at least for @@ -430,20 +430,21 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, sycl::accessor<char, 1, sycl::access::mode::read_write, sycl::access::target::local> team_scratch_memory_L0( - sycl::range<1>(std::max(m_scratch_size[0] + m_shmem_begin, 1)), + sycl::range<1>( + std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))), cgh); // Avoid capturing *this since it might not be trivially copyable - const auto shmem_begin = m_shmem_begin; - const int scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; - char* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]}; + const auto shmem_begin = m_shmem_begin; + const size_t scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; + sycl::device_ptr<char> const global_scratch_ptr = m_global_scratch_ptr; auto lambda = [=](sycl::nd_item<2> item) { const member_type team_member( team_scratch_memory_L0.get_pointer(), shmem_begin, scratch_size[0], - scratch_ptr[1] + item.get_group(1) * scratch_size[1], + global_scratch_ptr + item.get_group(1) * scratch_size[1], scratch_size[1], item); - if constexpr (std::is_same<work_tag, void>::value) + if constexpr (std::is_void<work_tag>::value) functor_wrapper.get_functor()(team_member); else functor_wrapper.get_functor()(work_tag(), team_member); @@ -516,13 +517,12 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, m_scratch_size[0] = m_shmem_size; m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); - // FIXME_SYCL so far accessors used instead of these pointers // Functor's reduce memory, team scan memory, and team shared memory depend // upon team size. - auto& space = *m_policy.space().impl_internal_space_instance(); - m_scratch_ptr[0] = nullptr; - m_scratch_ptr[1] = static_cast<char*>(space.resize_team_scratch_space( - static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size)); + auto& space = *m_policy.space().impl_internal_space_instance(); + m_global_scratch_ptr = + static_cast<sycl::device_ptr<char>>(space.resize_team_scratch_space( + static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size)); if (static_cast<int>(space.m_maxShmemPerBlock) < m_shmem_size - m_shmem_begin) { @@ -554,8 +554,12 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, using Policy = TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>; private: + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; using Analysis = - FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; using member_type = typename Policy::member_type; using WorkTag = typename Policy::work_tag; using launch_bounds = typename Policy::launch_bounds; @@ -574,14 +578,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, const ReducerType m_reducer; const pointer_type m_result_ptr; const bool m_result_ptr_device_accessible; - // FIXME_SYCL avoid reallocating memory for reductions - /* size_type* m_scratch_space; - size_type* m_scratch_flags; - size_type m_team_begin;*/ size_type m_shmem_begin; size_type m_shmem_size; - char* m_scratch_ptr[2]; - int m_scratch_size[2]; + sycl::device_ptr<char> m_global_scratch_ptr; + size_t m_scratch_size[2]; const size_type m_league_size; int m_team_size; const size_type m_vector_size; @@ -595,28 +595,14 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, const PolicyType& policy, const FunctorWrapper& functor_wrapper, const ReducerWrapper& reducer_wrapper, const std::vector<sycl::event>& memcpy_events) const { - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>; - using ValueInit = - Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueJoin = - Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; - using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; - // Convenience references const Kokkos::Experimental::SYCL& space = policy.space(); Kokkos::Experimental::Impl::SYCLInternal& instance = *space.impl_internal_space_instance(); - sycl::queue& q = *instance.m_queue; + sycl::queue& q = space.sycl_queue(); const unsigned int value_count = - FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count( - ReducerConditional::select(m_functor, m_reducer)); + Analysis::value_count(ReducerConditional::select(m_functor, m_reducer)); std::size_t size = std::size_t(m_league_size) * m_team_size * m_vector_size; value_type* results_ptr = nullptr; @@ -626,9 +612,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, // working with the global scratch memory but don't copy back to // m_result_ptr yet. if (size <= 1) { - results_ptr = static_cast<pointer_type>(instance.scratch_space( - sizeof(value_type) * std::max(value_count, 1u))); - value_type* device_accessible_result_ptr = + results_ptr = + static_cast<sycl::device_ptr<value_type>>(instance.scratch_space( + sizeof(value_type) * std::max(value_count, 1u))); + sycl::global_ptr<value_type> device_accessible_result_ptr = m_result_ptr_device_accessible ? m_result_ptr : nullptr; auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { @@ -637,13 +624,14 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, sycl::accessor<char, 1, sycl::access::mode::read_write, sycl::access::target::local> team_scratch_memory_L0( - sycl::range<1>(std::max(m_scratch_size[0] + m_shmem_begin, 1)), + sycl::range<1>( + std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))), cgh); // Avoid capturing *this since it might not be trivially copyable - const auto shmem_begin = m_shmem_begin; - const int scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; - char* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]}; + const auto shmem_begin = m_shmem_begin; + const size_t scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; + sycl::device_ptr<char> const global_scratch_ptr = m_global_scratch_ptr; cgh.depends_on(memcpy_events); cgh.parallel_for( @@ -654,23 +642,22 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, static_cast<const FunctorType&>(functor), static_cast<const ReducerType&>( reducer_wrapper.get_functor())); - reference_type update = - ValueInit::init(selected_reducer, results_ptr); + typename Analysis::Reducer final_reducer(&selected_reducer); + + reference_type update = final_reducer.init(results_ptr); if (size == 1) { const member_type team_member( team_scratch_memory_L0.get_pointer(), shmem_begin, - scratch_size[0], scratch_ptr[1], scratch_size[1], item); - if constexpr (std::is_same<WorkTag, void>::value) + scratch_size[0], global_scratch_ptr, scratch_size[1], item); + if constexpr (std::is_void<WorkTag>::value) functor(team_member, update); else functor(WorkTag(), team_member, update); } - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) - FunctorFinal<FunctorType, WorkTag>::final( - static_cast<const FunctorType&>(functor), results_ptr); + final_reducer.final(results_ptr); if (device_accessible_result_ptr) - ValueOps::copy(functor, device_accessible_result_ptr, - &results_ptr[0]); + final_reducer.copy(device_accessible_result_ptr, + &results_ptr[0]); }); }); q.ext_oneapi_submit_barrier( @@ -682,7 +669,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, // workgroup results back to global memory and recurse until only one // workgroup does the reduction and thus gets the final value. auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { - auto scratch_flags = static_cast<unsigned int*>( + auto scratch_flags = static_cast<sycl::device_ptr<unsigned int>>( instance.scratch_flags(sizeof(unsigned int))); // FIXME_SYCL accessors seem to need a size greater than zero at least @@ -690,145 +677,138 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, sycl::accessor<char, 1, sycl::access::mode::read_write, sycl::access::target::local> team_scratch_memory_L0( - sycl::range<1>(std::max(m_scratch_size[0] + m_shmem_begin, 1)), + sycl::range<1>( + std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))), cgh); // Avoid capturing *this since it might not be trivially copyable - const auto shmem_begin = m_shmem_begin; - const int scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; - char* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]}; - - auto team_reduction_factory = [&](sycl::accessor< - value_type, 1, - sycl::access::mode::read_write, - sycl::access::target::local> - local_mem, - value_type* results_ptr) mutable { - value_type* device_accessible_result_ptr = - m_result_ptr_device_accessible ? m_result_ptr : nullptr; - auto lambda = [=](sycl::nd_item<2> item) { - auto n_wgroups = - item.get_group_range()[0] * item.get_group_range()[1]; - auto wgroup_size = - item.get_local_range()[0] * item.get_local_range()[1]; - auto size = n_wgroups * wgroup_size; - - auto& num_teams_done = reinterpret_cast<unsigned int&>( - local_mem[wgroup_size * std::max(value_count, 1u)]); - const auto local_id = item.get_local_linear_id(); - const auto& functor = functor_wrapper.get_functor(); - const auto& selected_reducer = ReducerConditional::select( - static_cast<const FunctorType&>(functor), - static_cast<const ReducerType&>(reducer_wrapper.get_functor())); - - if constexpr (FunctorValueTraits<ReducerTypeFwd, - WorkTagFwd>::StaticValueSize == - 0) { - reference_type update = ValueInit::init( - selected_reducer, &local_mem[local_id * value_count]); - const member_type team_member( - team_scratch_memory_L0.get_pointer(), shmem_begin, - scratch_size[0], - scratch_ptr[1] + item.get_group(1) * scratch_size[1], - scratch_size[1], item); - if constexpr (std::is_same<WorkTag, void>::value) - functor(team_member, update); - else - functor(WorkTag(), team_member, update); - item.barrier(sycl::access::fence_space::local_space); - - SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, WorkTag>( - item, local_mem.get_pointer(), results_ptr, - device_accessible_result_ptr, value_count, selected_reducer, - static_cast<const FunctorType&>(functor), false, - std::min<std::size_t>(size, item.get_local_range()[0] * - item.get_local_range()[1])); - - if (local_id == 0) { - sycl::ext::oneapi::atomic_ref< - unsigned, sycl::ext::oneapi::memory_order::relaxed, - sycl::ext::oneapi::memory_scope::device, - sycl::access::address_space::global_space> - scratch_flags_ref(*scratch_flags); - num_teams_done = ++scratch_flags_ref; - } - sycl::group_barrier(item.get_group()); - if (num_teams_done == n_wgroups) { - if (local_id >= n_wgroups) - ValueInit::init(selected_reducer, - &local_mem[local_id * value_count]); - else { - ValueOps::copy(functor, &local_mem[local_id * value_count], - &results_ptr[local_id * value_count]); - for (unsigned int id = local_id + wgroup_size; id < n_wgroups; - id += wgroup_size) { - ValueJoin::join(selected_reducer, - &local_mem[local_id * value_count], - &results_ptr[id * value_count]); + const auto shmem_begin = m_shmem_begin; + const size_t scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; + sycl::device_ptr<char> const global_scratch_ptr = m_global_scratch_ptr; + + auto team_reduction_factory = + [&](sycl::accessor<value_type, 1, sycl::access::mode::read_write, + sycl::access::target::local> + local_mem, + sycl::device_ptr<value_type> results_ptr) mutable { + sycl::global_ptr<value_type> device_accessible_result_ptr = + m_result_ptr_device_accessible ? m_result_ptr : nullptr; + auto lambda = [=](sycl::nd_item<2> item) { + auto n_wgroups = + item.get_group_range()[0] * item.get_group_range()[1]; + auto wgroup_size = + item.get_local_range()[0] * item.get_local_range()[1]; + auto size = n_wgroups * wgroup_size; + + auto& num_teams_done = reinterpret_cast<unsigned int&>( + local_mem[wgroup_size * std::max(value_count, 1u)]); + const auto local_id = item.get_local_linear_id(); + const auto& functor = functor_wrapper.get_functor(); + const auto& selected_reducer = ReducerConditional::select( + static_cast<const FunctorType&>(functor), + static_cast<const ReducerType&>( + reducer_wrapper.get_functor())); + typename Analysis::Reducer final_reducer(&selected_reducer); + + if constexpr (Analysis::StaticValueSize == 0) { + reference_type update = + final_reducer.init(&local_mem[local_id * value_count]); + const member_type team_member( + team_scratch_memory_L0.get_pointer(), shmem_begin, + scratch_size[0], + global_scratch_ptr + item.get_group(1) * scratch_size[1], + scratch_size[1], item); + if constexpr (std::is_void<WorkTag>::value) + functor(team_member, update); + else + functor(WorkTag(), team_member, update); + item.barrier(sycl::access::fence_space::local_space); + + SYCLReduction::workgroup_reduction<>( + item, local_mem.get_pointer(), results_ptr, + device_accessible_result_ptr, value_count, + selected_reducer, false, + std::min<std::size_t>(size, + item.get_local_range()[0] * + item.get_local_range()[1])); + + if (local_id == 0) { + sycl::atomic_ref<unsigned, sycl::memory_order::relaxed, + sycl::memory_scope::device, + sycl::access::address_space::global_space> + scratch_flags_ref(*scratch_flags); + num_teams_done = ++scratch_flags_ref; } - } - - SYCLReduction::workgroup_reduction<ValueJoin, ValueOps, - WorkTag>( - item, local_mem.get_pointer(), results_ptr, - device_accessible_result_ptr, value_count, selected_reducer, - static_cast<const FunctorType&>(functor), true, - std::min(n_wgroups, item.get_local_range()[0] * - item.get_local_range()[1])); - } - } else { - value_type local_value; - reference_type update = - ValueInit::init(selected_reducer, &local_value); - const member_type team_member( - team_scratch_memory_L0.get_pointer(), shmem_begin, - scratch_size[0], - scratch_ptr[1] + item.get_group(1) * scratch_size[1], - scratch_size[1], item); - if constexpr (std::is_same<WorkTag, void>::value) - functor(team_member, update); - else - functor(WorkTag(), team_member, update); - - SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>( - item, local_mem.get_pointer(), local_value, results_ptr, - device_accessible_result_ptr, selected_reducer, - static_cast<const FunctorType&>(functor), false, - std::min<std::size_t>(size, item.get_local_range()[0] * - item.get_local_range()[1])); - - if (local_id == 0) { - sycl::ext::oneapi::atomic_ref< - unsigned, sycl::ext::oneapi::memory_order::relaxed, - sycl::ext::oneapi::memory_scope::device, - sycl::access::address_space::global_space> - scratch_flags_ref(*scratch_flags); - num_teams_done = ++scratch_flags_ref; - } - item.barrier(sycl::access::fence_space::local_space); - if (num_teams_done == n_wgroups) { - if (local_id >= n_wgroups) - ValueInit::init(selected_reducer, &local_value); - else { - local_value = results_ptr[local_id]; - for (unsigned int id = local_id + wgroup_size; id < n_wgroups; - id += wgroup_size) { - ValueJoin::join(selected_reducer, &local_value, - &results_ptr[id]); + sycl::group_barrier(item.get_group()); + if (num_teams_done == n_wgroups) { + if (local_id >= n_wgroups) + final_reducer.init(&local_mem[local_id * value_count]); + else { + final_reducer.copy(&local_mem[local_id * value_count], + &results_ptr[local_id * value_count]); + for (unsigned int id = local_id + wgroup_size; + id < n_wgroups; id += wgroup_size) { + final_reducer.join(&local_mem[local_id * value_count], + &results_ptr[id * value_count]); + } + } + + SYCLReduction::workgroup_reduction<>( + item, local_mem.get_pointer(), results_ptr, + device_accessible_result_ptr, value_count, + selected_reducer, true, + std::min(n_wgroups, item.get_local_range()[0] * + item.get_local_range()[1])); + } + } else { + value_type local_value; + reference_type update = final_reducer.init(&local_value); + const member_type team_member( + team_scratch_memory_L0.get_pointer(), shmem_begin, + scratch_size[0], + global_scratch_ptr + item.get_group(1) * scratch_size[1], + scratch_size[1], item); + if constexpr (std::is_void<WorkTag>::value) + functor(team_member, update); + else + functor(WorkTag(), team_member, update); + + SYCLReduction::workgroup_reduction<>( + item, local_mem.get_pointer(), local_value, results_ptr, + device_accessible_result_ptr, final_reducer, false, + std::min<std::size_t>(size, + item.get_local_range()[0] * + item.get_local_range()[1])); + + if (local_id == 0) { + sycl::atomic_ref<unsigned, sycl::memory_order::relaxed, + sycl::memory_scope::device, + sycl::access::address_space::global_space> + scratch_flags_ref(*scratch_flags); + num_teams_done = ++scratch_flags_ref; + } + item.barrier(sycl::access::fence_space::local_space); + if (num_teams_done == n_wgroups) { + if (local_id >= n_wgroups) + final_reducer.init(&local_value); + else { + local_value = results_ptr[local_id]; + for (unsigned int id = local_id + wgroup_size; + id < n_wgroups; id += wgroup_size) { + final_reducer.join(&local_value, &results_ptr[id]); + } + } + + SYCLReduction::workgroup_reduction<>( + item, local_mem.get_pointer(), local_value, results_ptr, + device_accessible_result_ptr, final_reducer, true, + std::min(n_wgroups, item.get_local_range()[0] * + item.get_local_range()[1])); } } - - SYCLReduction::workgroup_reduction<ValueJoin, WorkTag>( - item, local_mem.get_pointer(), local_value, results_ptr, - device_accessible_result_ptr, selected_reducer, - static_cast<const FunctorType&>(functor), true, - std::min(n_wgroups, item.get_local_range()[0] * - item.get_local_range()[1])); - } - } - }; - return lambda; - }; + }; + return lambda; + }; auto dummy_reduction_lambda = team_reduction_factory({1, cgh}, nullptr); @@ -860,8 +840,9 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, const auto init_size = std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1); - results_ptr = static_cast<pointer_type>(instance.scratch_space( - sizeof(value_type) * std::max(value_count, 1u) * init_size)); + results_ptr = + static_cast<sycl::device_ptr<value_type>>(instance.scratch_space( + sizeof(value_type) * std::max(value_count, 1u) * init_size)); auto reduction_lambda = team_reduction_factory(local_mem, results_ptr); @@ -885,9 +866,6 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, Kokkos::Experimental::SYCLDeviceUSMSpace>( space, m_result_ptr, results_ptr, sizeof(*m_result_ptr) * value_count); - space.fence( - "Kokkos::Impl::ParallelReduce<TeamPolicy,SYCL>: fence because " - "reduction can't access result storage location"); } return last_reduction_event; @@ -935,13 +913,12 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, m_scratch_size[0] = m_shmem_size; m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); - // FIXME_SYCL so far accessors used instead of these pointers // Functor's reduce memory, team scan memory, and team shared memory depend // upon team size. - auto& space = *m_policy.space().impl_internal_space_instance(); - m_scratch_ptr[0] = nullptr; - m_scratch_ptr[1] = static_cast<char*>(space.resize_team_scratch_space( - static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size)); + auto& space = *m_policy.space().impl_internal_space_instance(); + m_global_scratch_ptr = + static_cast<sycl::device_ptr<char>>(space.resize_team_scratch_space( + static_cast<ptrdiff_t>(m_scratch_size[1]) * m_league_size)); if (static_cast<int>(space.m_maxShmemPerBlock) < m_shmem_size - m_shmem_begin) { @@ -960,10 +937,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, public: template <class ViewType> - ParallelReduce(FunctorType const& arg_functor, Policy const& arg_policy, - ViewType const& arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value, - void*>::type = nullptr) + ParallelReduce( + FunctorType const& arg_functor, Policy const& arg_policy, + ViewType const& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value, void*> = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp index f9b34dca2fa28f4aa3cc59aad720924b380486e8..07ca907fa5adbaab309e32a36e8b37a97678fe33 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #include <Kokkos_HostSpace.hpp> @@ -63,10 +67,13 @@ void DeepCopySYCL(void* dst, const void* src, size_t n) { void DeepCopyAsyncSYCL(const Kokkos::Experimental::SYCL& instance, void* dst, const void* src, size_t n) { - auto event = - instance.impl_internal_space_instance()->m_queue->memcpy(dst, src, n); - instance.impl_internal_space_instance()->m_queue->ext_oneapi_submit_barrier( - std::vector<sycl::event>{event}); + // FIXME_SYCL memcpy doesn't respect submit_barrier which means that we need + // to actually fence the execution space to make sure the memcpy is properly + // enqueued when using out-of-order queues. + sycl::queue& q = *instance.impl_internal_space_instance()->m_queue; + q.wait_and_throw(); + auto event = q.memcpy(dst, src, n); + q.ext_oneapi_submit_barrier(std::vector<sycl::event>{event}); } void DeepCopyAsyncSYCL(void* dst, const void* src, size_t n) { @@ -121,6 +128,23 @@ void* allocate_sycl( return hostPtr; } +void* SYCLDeviceUSMSpace::allocate(const Kokkos::Experimental::SYCL& exec_space, + const size_t arg_alloc_size) const { + return allocate(exec_space, "[unlabeled]", arg_alloc_size); +} + +void* SYCLDeviceUSMSpace::allocate(const Kokkos::Experimental::SYCL& exec_space, + const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return allocate_sycl( + arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocDevice, + sycl::usm::alloc::device, + *exec_space.impl_internal_space_instance()->m_queue); +} + void* SYCLDeviceUSMSpace::allocate(const size_t arg_alloc_size) const { return allocate("[unlabeled]", arg_alloc_size); } @@ -135,6 +159,22 @@ void* SYCLDeviceUSMSpace::allocate(const char* arg_label, sycl::usm::alloc::device, m_queue); } +void* SYCLSharedUSMSpace::allocate(const SYCL& exec_space, + const size_t arg_alloc_size) const { + return allocate(exec_space, "[unlabeled]", arg_alloc_size); +} +void* SYCLSharedUSMSpace::allocate(const SYCL& exec_space, + const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return allocate_sycl( + arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocShared, + sycl::usm::alloc::shared, + *exec_space.impl_internal_space_instance()->m_queue); +} + void* SYCLSharedUSMSpace::allocate(const size_t arg_alloc_size) const { return allocate("[unlabeled]", arg_alloc_size); } @@ -148,6 +188,21 @@ void* SYCLSharedUSMSpace::allocate(const char* arg_label, sycl::usm::alloc::shared, m_queue); } +void* SYCLHostUSMSpace::allocate(const SYCL& exec_space, + const size_t arg_alloc_size) const { + return allocate(exec_space, "[unlabeled]", arg_alloc_size); +} +void* SYCLHostUSMSpace::allocate(const SYCL& exec_space, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return allocate_sycl( + arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocHost, + sycl::usm::alloc::host, + *exec_space.impl_internal_space_instance()->m_queue); +} + void* SYCLHostUSMSpace::allocate(const size_t arg_alloc_size) const { return allocate("[unlabeled]", arg_alloc_size); } @@ -261,6 +316,56 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: "HostSpace"); } +SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: + SharedAllocationRecord( + const Kokkos::Experimental::SYCL& arg_exec_space, + const Kokkos::Experimental::SYCLDeviceUSMSpace& space, + const std::string& label, const size_t size, + const SharedAllocationRecord<void, void>::function_type dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, + void>::s_root_record, +#endif + Kokkos::Impl::checked_allocation_with_header(arg_exec_space, space, + label, size), + sizeof(SharedAllocationHeader) + size, dealloc, label), + m_space(space) { + SharedAllocationHeader header; + + this->base_t::_fill_host_accessible_header_info(header, label); + + // Copy to device memory + Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, HostSpace>( + arg_exec_space, RecordBase::m_alloc_ptr, &header, + sizeof(SharedAllocationHeader)); +} + +SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, void>:: + SharedAllocationRecord( + const Kokkos::Experimental::SYCL& exec_space, + const Kokkos::Experimental::SYCLSharedUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord<void, void>::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, + void>::s_root_record, +#endif + Impl::checked_allocation_with_header(exec_space, arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_space(arg_space) { + + this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr, + arg_label); +} + SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, void>:: SharedAllocationRecord( const Kokkos::Experimental::SYCLSharedUSMSpace& arg_space, @@ -283,6 +388,29 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, void>:: arg_label); } +SharedAllocationRecord<Kokkos::Experimental::SYCLHostUSMSpace, void>:: + SharedAllocationRecord( + const Kokkos::Experimental::SYCL& exec_space, + const Kokkos::Experimental::SYCLHostUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord<void, void>::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::Experimental::SYCLHostUSMSpace, + void>::s_root_record, +#endif + Impl::checked_allocation_with_header(exec_space, arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_space(arg_space) { + + this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr, + arg_label); +} + SharedAllocationRecord<Kokkos::Experimental::SYCLHostUSMSpace, void>:: SharedAllocationRecord( const Kokkos::Experimental::SYCLHostUSMSpace& arg_space, diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp index bda2dfd0aa9384e51462012418336cb3c629acb5..a8c60412cbca1318e5e3c6654a45cd725b3389f8 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp @@ -65,7 +65,7 @@ class SYCLTeamMember { using scratch_memory_space = execution_space::scratch_memory_space; private: - mutable void* m_team_reduce; + mutable sycl::local_ptr<void> m_team_reduce; scratch_memory_space m_team_shared; int m_team_reduce_size; sycl::nd_item<2> m_item; @@ -109,8 +109,9 @@ class SYCLTeamMember { //-------------------------------------------------------------------------- template <class ValueType> - KOKKOS_INLINE_FUNCTION std::enable_if_t<std::is_arithmetic_v<ValueType>> - team_broadcast(ValueType& val, const int thread_id) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<std::is_trivially_copyable_v<ValueType>> + team_broadcast(ValueType& val, const int thread_id) const { val = sycl::group_broadcast(m_item.get_group(), val, sycl::id<2>(thread_id, 0)); } @@ -118,17 +119,18 @@ class SYCLTeamMember { // FIXME_SYCL remove/adapt this overload once the Intel oneAPI implementation // is conforming to the SYCL2020 standard (allowing trivially-copyable types) template <class ValueType> - KOKKOS_INLINE_FUNCTION std::enable_if_t<!std::is_arithmetic_v<ValueType>> - team_broadcast(ValueType& val, const int thread_id) const { + KOKKOS_INLINE_FUNCTION + std::enable_if_t<!std::is_trivially_copyable_v<ValueType>> + team_broadcast(ValueType& val, const int thread_id) const { // Wait for shared data write until all threads arrive here sycl::group_barrier(m_item.get_group()); if (m_item.get_local_id(1) == 0 && static_cast<int>(m_item.get_local_id(0)) == thread_id) { - *static_cast<ValueType*>(m_team_reduce) = val; + *static_cast<sycl::local_ptr<ValueType>>(m_team_reduce) = val; } // Wait for shared data read until root thread writes sycl::group_barrier(m_item.get_group()); - val = *(static_cast<ValueType*>(m_team_reduce)); + val = *static_cast<sycl::local_ptr<ValueType>>(m_team_reduce); } template <class Closure, class ValueType> @@ -142,17 +144,15 @@ class SYCLTeamMember { /**\brief Reduction across a team */ template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer) const noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer) const noexcept { team_reduce(reducer, reducer.reference()); } template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer, - typename ReducerType::value_type& value) const noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) const noexcept { using value_type = typename ReducerType::value_type; auto sg = m_item.get_sub_group(); @@ -175,8 +175,9 @@ class SYCLTeamMember { const unsigned int maximum_work_range = std::min<int>(m_team_reduce_size / sizeof(value_type), n_subgroups); - const auto id_in_sg = sg.get_local_id()[0]; - auto reduction_array = static_cast<value_type*>(m_team_reduce); + const auto id_in_sg = sg.get_local_id()[0]; + auto reduction_array = + static_cast<sycl::local_ptr<value_type>>(m_team_reduce); // Load values into the first maximum_work_range values of the reduction // array in chunks. This means that only sub groups with an id in the @@ -251,7 +252,8 @@ class SYCLTeamMember { } const auto n_active_subgroups = sg.get_group_range()[0]; - const auto base_data = static_cast<Type*>(m_team_reduce); + const auto base_data = + static_cast<sycl::local_ptr<Type>>(m_team_reduce).get(); if (static_cast<int>(n_active_subgroups * sizeof(Type)) > m_team_reduce_size) Kokkos::abort("Not implemented!"); @@ -321,17 +323,15 @@ class SYCLTeamMember { //---------------------------------------- template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - vector_reduce(ReducerType const& reducer) const { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + vector_reduce(ReducerType const& reducer) const { vector_reduce(reducer, reducer.reference()); } template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - vector_reduce(ReducerType const& reducer, - typename ReducerType::value_type& value) const { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + vector_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) const { const auto tidx1 = m_item.get_local_id(1); const auto grange1 = m_item.get_local_range(1); @@ -364,12 +364,13 @@ class SYCLTeamMember { // Private for the driver KOKKOS_INLINE_FUNCTION - SYCLTeamMember(void* shared, const int shared_begin, const int shared_size, - void* scratch_level_1_ptr, const int scratch_level_1_size, - const sycl::nd_item<2> item) + SYCLTeamMember(sycl::local_ptr<void> shared, const int shared_begin, + const int shared_size, + sycl::device_ptr<void> scratch_level_1_ptr, + const int scratch_level_1_size, const sycl::nd_item<2> item) : m_team_reduce(shared), - m_team_shared(static_cast<char*>(shared) + shared_begin, shared_size, - scratch_level_1_ptr, scratch_level_1_size), + m_team_shared(static_cast<sycl::local_ptr<char>>(shared) + shared_begin, + shared_size, scratch_level_1_ptr, scratch_level_1_size), m_team_reduce_size(shared_begin), m_item(item) {} @@ -456,9 +457,9 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::SYCLTeamMember> + std::common_type_t<iType1, iType2>, Impl::SYCLTeamMember> TeamThreadRange(const Impl::SYCLTeamMember& thread, iType1 begin, iType2 end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( thread, iType(begin), iType(end)); } @@ -473,10 +474,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::SYCLTeamMember> + std::common_type_t<iType1, iType2>, Impl::SYCLTeamMember> TeamVectorRange(const Impl::SYCLTeamMember& thread, const iType1& begin, const iType2& end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( thread, iType(begin), iType(end)); } @@ -491,10 +492,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Impl::SYCLTeamMember> + std::common_type_t<iType1, iType2>, Impl::SYCLTeamMember> ThreadVectorRange(const Impl::SYCLTeamMember& thread, iType1 arg_begin, iType2 arg_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( thread, iType(arg_begin), iType(arg_end)); } @@ -542,11 +543,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * performed and put into result. */ template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::SYCLTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { typename ReducerType::value_type value; reducer.init(value); @@ -569,11 +569,10 @@ KOKKOS_INLINE_FUNCTION * performed and put into result. */ template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::SYCLTeamMember>& loop_boundaries, - const Closure& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { ValueType val; Kokkos::Sum<ValueType> reducer(val); @@ -653,11 +652,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( } template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::SYCLTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { typename ReducerType::value_type value; reducer.init(value); @@ -676,11 +674,10 @@ KOKKOS_INLINE_FUNCTION } template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::SYCLTeamMember>& loop_boundaries, - const Closure& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { ValueType val; Kokkos::Sum<ValueType> reducer(val); @@ -744,11 +741,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * constructed value. */ template <typename iType, class Closure, class ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::SYCLTeamMember> const& loop_boundaries, - Closure const& closure, ReducerType const& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> +parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember> const& loop_boundaries, + Closure const& closure, ReducerType const& reducer) { reducer.init(reducer.reference()); const iType tidx1 = loop_boundaries.member.item().get_local_id(1); @@ -773,11 +769,10 @@ KOKKOS_INLINE_FUNCTION * constructed value. */ template <typename iType, class Closure, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!is_reducer<ValueType>::value>::type - parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::SYCLTeamMember> const& loop_boundaries, - Closure const& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!is_reducer<ValueType>::value> +parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember> const& loop_boundaries, + Closure const& closure, ValueType& result) { result = ValueType(); const iType tidx1 = loop_boundaries.member.item().get_local_id(1); @@ -801,11 +796,10 @@ KOKKOS_INLINE_FUNCTION * The last call to closure has final == true. */ template <typename iType, class Closure, typename ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::SYCLTeamMember>& loop_boundaries, - const Closure& closure, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { using value_type = typename Kokkos::Impl::FunctorAnalysis< Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure>::value_type; diff --git a/packages/kokkos/core/src/impl/Kokkos_Serial.cpp b/packages/kokkos/core/src/Serial/Kokkos_Serial.cpp similarity index 82% rename from packages/kokkos/core/src/impl/Kokkos_Serial.cpp rename to packages/kokkos/core/src/Serial/Kokkos_Serial.cpp index e5917eb59d1aba7a1cb9197fe841361ecca4d512..9205e82560a34c0ef383ee1d8ed43c6352fd8e2c 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Serial.cpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial.cpp @@ -42,16 +42,20 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> -#if defined(KOKKOS_ENABLE_SERIAL) -#include <cstdlib> -#include <sstream> #include <Kokkos_Serial.hpp> #include <impl/Kokkos_Traits.hpp> #include <impl/Kokkos_Error.hpp> - +#include <impl/Kokkos_ExecSpaceManager.hpp> #include <impl/Kokkos_SharedAlloc.hpp> + +#include <cstdlib> +#include <iostream> #include <sstream> /*--------------------------------------------------------------------------*/ @@ -178,11 +182,26 @@ Serial::Serial() } #endif +void Serial::print_configuration(std::ostream& os, bool /*verbose*/) const { + os << "Host Serial Execution Space:\n"; + os << " KOKKOS_ENABLE_SERIAL: yes\n"; + + os << "Serial Atomics:\n"; + os << " KOKKOS_ENABLE_SERIAL_ATOMICS: "; +#ifdef KOKKOS_ENABLE_SERIAL_ATOMICS + os << "yes\n"; +#else + os << "no\n"; +#endif + + os << "\nSerial Runtime Configuration:\n"; +} + bool Serial::impl_is_initialized() { return Impl::SerialInternal::singleton().is_initialized(); } -void Serial::impl_initialize() { +void Serial::impl_initialize(InitializationSettings const&) { Impl::SerialInternal::singleton().initialize(); } @@ -193,44 +212,7 @@ const char* Serial::name() { return "Serial"; } namespace Impl { int g_serial_space_factory_initialized = - initialize_space_factory<SerialSpaceInitializer>("100_Serial"); - -void SerialSpaceInitializer::initialize(const InitArguments& args) { - // Prevent "unused variable" warning for 'args' input struct. If - // Serial::initialize() ever needs to take arguments from the input - // struct, you may remove this line of code. - (void)args; - - // Always initialize Serial if it is configure time enabled - Kokkos::Serial::impl_initialize(); -} - -void SerialSpaceInitializer::finalize(const bool) { - if (Kokkos::Serial::impl_is_initialized()) Kokkos::Serial::impl_finalize(); -} - -void SerialSpaceInitializer::fence() { Kokkos::Serial::impl_static_fence(); } -void SerialSpaceInitializer::fence(const std::string& name) { - Kokkos::Serial::impl_static_fence(name); -} - -void SerialSpaceInitializer::print_configuration(std::ostream& msg, - const bool detail) { - msg << "Host Serial Execution Space:" << std::endl; - msg << " KOKKOS_ENABLE_SERIAL: "; - msg << "yes" << std::endl; - - msg << "Serial Atomics:" << std::endl; - msg << " KOKKOS_ENABLE_SERIAL_ATOMICS: "; -#ifdef KOKKOS_ENABLE_SERIAL_ATOMICS - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - - msg << "\nSerial Runtime Configuration:" << std::endl; - Serial::print_configuration(msg, detail); -} + initialize_space_factory<Serial>("100_Serial"); } // namespace Impl @@ -243,7 +225,3 @@ constexpr DeviceType DeviceTypeTraits<Serial>::id; #endif } // namespace Kokkos - -#else -void KOKKOS_CORE_SRC_IMPL_SERIAL_PREVENT_LINK_ERROR() {} -#endif // defined( KOKKOS_ENABLE_SERIAL ) diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d726a86f76493187e193f8687f834fe90df0d8f5 --- /dev/null +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp @@ -0,0 +1,213 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKO_SERIAL_PARALLEL_MDRANGE_HPP +#define KOKKO_SERIAL_PARALLEL_MDRANGE_HPP + +#include <Kokkos_Parallel.hpp> +#include <KokkosExp_MDRangePolicy.hpp> + +namespace Kokkos { +namespace Impl { + +template <class FunctorType, class... Traits> +class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, + Kokkos::Serial> { + private: + using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; + using Policy = typename MDRangePolicy::impl_range_policy; + + using iterate_type = typename Kokkos::Impl::HostIterateTile< + MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void>; + + const FunctorType m_functor; + const MDRangePolicy m_mdr_policy; + const Policy m_policy; + + void exec() const { + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + iterate_type(m_mdr_policy, m_functor)(i); + } + } + + public: + inline void execute() const { this->exec(); } + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy&, const Functor&) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } + inline ParallelFor(const FunctorType& arg_functor, + const MDRangePolicy& arg_policy) + : m_functor(arg_functor), + m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} +}; + +template <class FunctorType, class ReducerType, class... Traits> +class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, + Kokkos::Serial> { + private: + using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; + using Policy = typename MDRangePolicy::impl_range_policy; + + using WorkTag = typename MDRangePolicy::work_tag; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; + + using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, + MDRangePolicy, ReducerTypeFwd>; + + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; + + using iterate_type = + typename Kokkos::Impl::HostIterateTile<MDRangePolicy, FunctorType, + WorkTag, reference_type>; + + const FunctorType m_functor; + const MDRangePolicy m_mdr_policy; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + inline void exec(reference_type update) const { + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + iterate_type(m_mdr_policy, m_functor, update)(i); + } + } + + public: + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy&, const Functor&) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } + inline void execute() const { + const size_t pool_reduce_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + const size_t team_reduce_size = 0; // Never shrinks + const size_t team_shared_size = 0; // Never shrinks + const size_t thread_local_size = 0; // Never shrinks + + auto* internal_instance = m_policy.space().impl_internal_space_instance(); + // Need to lock resize_thread_team_data + std::lock_guard<std::mutex> lock( + internal_instance->m_thread_team_data_mutex); + internal_instance->resize_thread_team_data( + pool_reduce_size, team_reduce_size, team_shared_size, + thread_local_size); + + pointer_type ptr = + m_result_ptr + ? m_result_ptr + : pointer_type( + internal_instance->m_thread_team_data.pool_reduce_local()); + + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + reference_type update = final_reducer.init(ptr); + + this->exec(update); + + final_reducer.final(ptr); + } + + template <class HostViewType> + ParallelReduce(const FunctorType& arg_functor, + const MDRangePolicy& arg_policy, + const HostViewType& arg_result_view, + std::enable_if_t<Kokkos::is_view<HostViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = nullptr) + : m_functor(arg_functor), + m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_reducer(InvalidType()), + m_result_ptr(arg_result_view.data()) { + static_assert(Kokkos::is_view<HostViewType>::value, + "Kokkos::Serial reduce result must be a View"); + + static_assert( + Kokkos::Impl::MemorySpaceAccess<typename HostViewType::memory_space, + Kokkos::HostSpace>::accessible, + "Kokkos::Serial reduce result must be a View in HostSpace"); + } + + inline ParallelReduce(const FunctorType& arg_functor, + MDRangePolicy arg_policy, const ReducerType& reducer) + : m_functor(arg_functor), + m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_reducer(reducer), + m_result_ptr(reducer.view().data()) { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" + );*/ + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp new file mode 100644 index 0000000000000000000000000000000000000000..84262227f54ad9dafcdc114417fdde2984313132 --- /dev/null +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp @@ -0,0 +1,337 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKO_SERIAL_PARALLEL_RANGE_HPP +#define KOKKO_SERIAL_PARALLEL_RANGE_HPP + +#include <Kokkos_Parallel.hpp> + +namespace Kokkos { +namespace Impl { + +template <class FunctorType, class... Traits> +class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Serial> { + private: + using Policy = Kokkos::RangePolicy<Traits...>; + + const FunctorType m_functor; + const Policy m_policy; + + template <class TagType> + std::enable_if_t<std::is_void<TagType>::value> exec() const { + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + m_functor(i); + } + } + + template <class TagType> + std::enable_if_t<!std::is_void<TagType>::value> exec() const { + const TagType t{}; + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + m_functor(t, i); + } + } + + public: + inline void execute() const { + this->template exec<typename Policy::work_tag>(); + } + + inline ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +/*--------------------------------------------------------------------------*/ + +template <class FunctorType, class ReducerType, class... Traits> +class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, + Kokkos::Serial> { + private: + using Policy = Kokkos::RangePolicy<Traits...>; + using WorkTag = typename Policy::work_tag; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; + + using Analysis = + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + template <class TagType> + inline std::enable_if_t<std::is_void<TagType>::value> exec( + reference_type update) const { + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + m_functor(i, update); + } + } + + template <class TagType> + inline std::enable_if_t<!std::is_void<TagType>::value> exec( + reference_type update) const { + const TagType t{}; + + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + m_functor(t, i, update); + } + } + + public: + inline void execute() const { + const size_t pool_reduce_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + const size_t team_reduce_size = 0; // Never shrinks + const size_t team_shared_size = 0; // Never shrinks + const size_t thread_local_size = 0; // Never shrinks + + auto* internal_instance = m_policy.space().impl_internal_space_instance(); + // Need to lock resize_thread_team_data + std::lock_guard<std::mutex> lock( + internal_instance->m_thread_team_data_mutex); + internal_instance->resize_thread_team_data( + pool_reduce_size, team_reduce_size, team_shared_size, + thread_local_size); + + pointer_type ptr = + m_result_ptr + ? m_result_ptr + : pointer_type( + internal_instance->m_thread_team_data.pool_reduce_local()); + + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + reference_type update = final_reducer.init(ptr); + + this->template exec<WorkTag>(update); + + final_reducer.final(ptr); + } + + template <class HostViewType> + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const HostViewType& arg_result_view, + std::enable_if_t<Kokkos::is_view<HostViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = nullptr) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result_view.data()) { + static_assert(Kokkos::is_view<HostViewType>::value, + "Kokkos::Serial reduce result must be a View"); + + static_assert( + Kokkos::Impl::MemorySpaceAccess<typename HostViewType::memory_space, + Kokkos::HostSpace>::accessible, + "Kokkos::Serial reduce result must be a View in HostSpace"); + } + + inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, + const ReducerType& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()) { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" + );*/ + } +}; + +/*--------------------------------------------------------------------------*/ + +template <class FunctorType, class... Traits> +class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, + Kokkos::Serial> { + private: + using Policy = Kokkos::RangePolicy<Traits...>; + using WorkTag = typename Policy::work_tag; + + using Analysis = + FunctorAnalysis<FunctorPatternInterface::SCAN, Policy, FunctorType>; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + + template <class TagType> + inline std::enable_if_t<std::is_void<TagType>::value> exec( + reference_type update) const { + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + m_functor(i, update, true); + } + } + + template <class TagType> + inline std::enable_if_t<!std::is_void<TagType>::value> exec( + reference_type update) const { + const TagType t{}; + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + m_functor(t, i, update, true); + } + } + + public: + inline void execute() const { + const size_t pool_reduce_size = Analysis::value_size(m_functor); + const size_t team_reduce_size = 0; // Never shrinks + const size_t team_shared_size = 0; // Never shrinks + const size_t thread_local_size = 0; // Never shrinks + + // Need to lock resize_thread_team_data + auto* internal_instance = m_policy.space().impl_internal_space_instance(); + std::lock_guard<std::mutex> lock( + internal_instance->m_thread_team_data_mutex); + internal_instance->resize_thread_team_data( + pool_reduce_size, team_reduce_size, team_shared_size, + thread_local_size); + + typename Analysis::Reducer final_reducer(&m_functor); + + reference_type update = final_reducer.init(pointer_type( + internal_instance->m_thread_team_data.pool_reduce_local())); + + this->template exec<WorkTag>(update); + } + + inline ParallelScan(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +/*--------------------------------------------------------------------------*/ +template <class FunctorType, class ReturnType, class... Traits> +class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, + ReturnType, Kokkos::Serial> { + private: + using Policy = Kokkos::RangePolicy<Traits...>; + using WorkTag = typename Policy::work_tag; + + using Analysis = + FunctorAnalysis<FunctorPatternInterface::SCAN, Policy, FunctorType>; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + ReturnType& m_returnvalue; + + template <class TagType> + inline std::enable_if_t<std::is_void<TagType>::value> exec( + reference_type update) const { + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + m_functor(i, update, true); + } + } + + template <class TagType> + inline std::enable_if_t<!std::is_void<TagType>::value> exec( + reference_type update) const { + const TagType t{}; + const typename Policy::member_type e = m_policy.end(); + for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) { + m_functor(t, i, update, true); + } + } + + public: + inline void execute() { + const size_t pool_reduce_size = Analysis::value_size(m_functor); + const size_t team_reduce_size = 0; // Never shrinks + const size_t team_shared_size = 0; // Never shrinks + const size_t thread_local_size = 0; // Never shrinks + + // Need to lock resize_thread_team_data + auto* internal_instance = m_policy.space().impl_internal_space_instance(); + std::lock_guard<std::mutex> lock( + internal_instance->m_thread_team_data_mutex); + internal_instance->resize_thread_team_data( + pool_reduce_size, team_reduce_size, team_shared_size, + thread_local_size); + + typename Analysis::Reducer final_reducer(&m_functor); + + reference_type update = final_reducer.init(pointer_type( + internal_instance->m_thread_team_data.pool_reduce_local())); + + this->template exec<WorkTag>(update); + + m_returnvalue = update; + } + + inline ParallelScanWithTotal(const FunctorType& arg_functor, + const Policy& arg_policy, + ReturnType& arg_returnvalue) + : m_functor(arg_functor), + m_policy(arg_policy), + m_returnvalue(arg_returnvalue) {} +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp new file mode 100644 index 0000000000000000000000000000000000000000..782ae75feb5e195feec743d4ca32523eee1ddd95 --- /dev/null +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp @@ -0,0 +1,424 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKO_SERIAL_PARALLEL_TEAM_HPP +#define KOKKO_SERIAL_PARALLEL_TEAM_HPP + +#include <Kokkos_Parallel.hpp> + +namespace Kokkos { +namespace Impl { + +/* + * < Kokkos::Serial , WorkArgTag > + * < WorkArgTag , Impl::enable_if< std::is_same< Kokkos::Serial , + * Kokkos::DefaultExecutionSpace >::value >::type > + * + */ +template <class... Properties> +class TeamPolicyInternal<Kokkos::Serial, Properties...> + : public PolicyTraits<Properties...> { + private: + size_t m_team_scratch_size[2]; + size_t m_thread_scratch_size[2]; + int m_league_size; + int m_chunk_size; + + public: + //! Tag this class as a kokkos execution policy + using execution_policy = TeamPolicyInternal; + + using traits = PolicyTraits<Properties...>; + + //! Execution space of this execution policy: + using execution_space = Kokkos::Serial; + + const typename traits::execution_space& space() const { + static typename traits::execution_space m_space; + return m_space; + } + + template <class ExecSpace, class... OtherProperties> + friend class TeamPolicyInternal; + + template <class... OtherProperties> + TeamPolicyInternal( + const TeamPolicyInternal<Kokkos::Serial, OtherProperties...>& p) { + m_league_size = p.m_league_size; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + + //---------------------------------------- + + template <class FunctorType> + int team_size_max(const FunctorType&, const ParallelForTag&) const { + return 1; + } + template <class FunctorType> + int team_size_max(const FunctorType&, const ParallelReduceTag&) const { + return 1; + } + template <class FunctorType, class ReducerType> + int team_size_max(const FunctorType&, const ReducerType&, + const ParallelReduceTag&) const { + return 1; + } + template <class FunctorType> + int team_size_recommended(const FunctorType&, const ParallelForTag&) const { + return 1; + } + template <class FunctorType> + int team_size_recommended(const FunctorType&, + const ParallelReduceTag&) const { + return 1; + } + template <class FunctorType, class ReducerType> + int team_size_recommended(const FunctorType&, const ReducerType&, + const ParallelReduceTag&) const { + return 1; + } + + //---------------------------------------- + + inline int team_size() const { return 1; } + inline bool impl_auto_team_size() const { return false; } + inline bool impl_auto_vector_length() const { return false; } + inline void impl_set_team_size(size_t) {} + inline void impl_set_vector_length(size_t) {} + inline int league_size() const { return m_league_size; } + inline size_t scratch_size(const int& level, int = 0) const { + return m_team_scratch_size[level] + m_thread_scratch_size[level]; + } + + inline int impl_vector_length() const { return 1; } + inline static int vector_length_max() { + return 1024; + } // Use arbitrary large number, is meant as a vectorizable length + + inline static int scratch_size_max(int level) { + return (level == 0 ? 1024 * 32 : 20 * 1024 * 1024); + } + /** \brief Specify league size, request team size */ + TeamPolicyInternal(const execution_space&, int league_size_request, + int team_size_request, int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_league_size(league_size_request), + m_chunk_size(32) { + if (team_size_request > 1) + Kokkos::abort("Kokkos::abort: Requested Team Size is too large!"); + } + + TeamPolicyInternal(const execution_space& space, int league_size_request, + const Kokkos::AUTO_t& /**team_size_request*/, + int vector_length_request = 1) + : TeamPolicyInternal(space, league_size_request, -1, + vector_length_request) {} + + TeamPolicyInternal(const execution_space& space, int league_size_request, + const Kokkos::AUTO_t& /* team_size_request */ + , + const Kokkos::AUTO_t& /* vector_length_request */ + ) + : TeamPolicyInternal(space, league_size_request, -1, -1) {} + + TeamPolicyInternal(const execution_space& space, int league_size_request, + int team_size_request, + const Kokkos::AUTO_t& /* vector_length_request */ + ) + : TeamPolicyInternal(space, league_size_request, team_size_request, -1) {} + + TeamPolicyInternal(int league_size_request, + const Kokkos::AUTO_t& team_size_request, + int vector_length_request = 1) + : TeamPolicyInternal(typename traits::execution_space(), + league_size_request, team_size_request, + vector_length_request) {} + + TeamPolicyInternal(int league_size_request, + const Kokkos::AUTO_t& team_size_request, + const Kokkos::AUTO_t& vector_length_request) + : TeamPolicyInternal(typename traits::execution_space(), + league_size_request, team_size_request, + vector_length_request) {} + TeamPolicyInternal(int league_size_request, int team_size_request, + const Kokkos::AUTO_t& vector_length_request) + : TeamPolicyInternal(typename traits::execution_space(), + league_size_request, team_size_request, + vector_length_request) {} + + TeamPolicyInternal(int league_size_request, int team_size_request, + int vector_length_request = 1) + : TeamPolicyInternal(typename traits::execution_space(), + league_size_request, team_size_request, + vector_length_request) {} + + inline int chunk_size() const { return m_chunk_size; } + + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal& set_chunk_size( + typename traits::index_type chunk_size_) { + m_chunk_size = chunk_size_; + return *this; + } + + /** \brief set per team scratch size for a specific level of the scratch + * hierarchy */ + inline TeamPolicyInternal& set_scratch_size(const int& level, + const PerTeamValue& per_team) { + m_team_scratch_size[level] = per_team.value; + return *this; + } + + /** \brief set per thread scratch size for a specific level of the scratch + * hierarchy */ + inline TeamPolicyInternal& set_scratch_size( + const int& level, const PerThreadValue& per_thread) { + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + /** \brief set per thread and per team scratch size for a specific level of + * the scratch hierarchy */ + inline TeamPolicyInternal& set_scratch_size( + const int& level, const PerTeamValue& per_team, + const PerThreadValue& per_thread) { + m_team_scratch_size[level] = per_team.value; + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + using member_type = Impl::HostThreadTeamMember<Kokkos::Serial>; +}; + +template <class FunctorType, class... Properties> +class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, + Kokkos::Serial> { + private: + enum { TEAM_REDUCE_SIZE = 512 }; + + using Policy = TeamPolicyInternal<Kokkos::Serial, Properties...>; + using Member = typename Policy::member_type; + + const FunctorType m_functor; + const Policy m_policy; + const int m_league; + const size_t m_shared; + + template <class TagType> + inline std::enable_if_t<std::is_void<TagType>::value> exec( + HostThreadTeamData& data) const { + for (int ileague = 0; ileague < m_league; ++ileague) { + m_functor(Member(data, ileague, m_league)); + } + } + + template <class TagType> + inline std::enable_if_t<!std::is_void<TagType>::value> exec( + HostThreadTeamData& data) const { + const TagType t{}; + for (int ileague = 0; ileague < m_league; ++ileague) { + m_functor(t, Member(data, ileague, m_league)); + } + } + + public: + inline void execute() const { + const size_t pool_reduce_size = 0; // Never shrinks + const size_t team_reduce_size = TEAM_REDUCE_SIZE; + const size_t team_shared_size = m_shared; + const size_t thread_local_size = 0; // Never shrinks + + auto* internal_instance = m_policy.space().impl_internal_space_instance(); + // Need to lock resize_thread_team_data + std::lock_guard<std::mutex> lock( + internal_instance->m_thread_team_data_mutex); + internal_instance->resize_thread_team_data( + pool_reduce_size, team_reduce_size, team_shared_size, + thread_local_size); + + this->template exec<typename Policy::work_tag>( + internal_instance->m_thread_team_data); + } + + ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), + m_policy(arg_policy), + m_league(arg_policy.league_size()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize<FunctorType>::value(arg_functor, 1)) {} +}; + +/*--------------------------------------------------------------------------*/ + +template <class FunctorType, class ReducerType, class... Properties> +class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, + ReducerType, Kokkos::Serial> { + private: + enum { TEAM_REDUCE_SIZE = 512 }; + + using Policy = TeamPolicyInternal<Kokkos::Serial, Properties...>; + + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; + + using Analysis = + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, ReducerTypeFwd>; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + const int m_league; + const ReducerType m_reducer; + pointer_type m_result_ptr; + size_t m_shared; + + template <class TagType> + inline std::enable_if_t<std::is_void<TagType>::value> exec( + HostThreadTeamData& data, reference_type update) const { + for (int ileague = 0; ileague < m_league; ++ileague) { + m_functor(Member(data, ileague, m_league), update); + } + } + + template <class TagType> + inline std::enable_if_t<!std::is_void<TagType>::value> exec( + HostThreadTeamData& data, reference_type update) const { + const TagType t{}; + + for (int ileague = 0; ileague < m_league; ++ileague) { + m_functor(t, Member(data, ileague, m_league), update); + } + } + + public: + inline void execute() const { + const size_t pool_reduce_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + + const size_t team_reduce_size = TEAM_REDUCE_SIZE; + const size_t team_shared_size = m_shared; + const size_t thread_local_size = 0; // Never shrinks + + auto* internal_instance = m_policy.space().impl_internal_space_instance(); + // Need to lock resize_thread_team_data + std::lock_guard<std::mutex> lock( + internal_instance->m_thread_team_data_mutex); + internal_instance->resize_thread_team_data( + pool_reduce_size, team_reduce_size, team_shared_size, + thread_local_size); + + pointer_type ptr = + m_result_ptr + ? m_result_ptr + : pointer_type( + internal_instance->m_thread_team_data.pool_reduce_local()); + + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + + reference_type update = final_reducer.init(ptr); + + this->template exec<WorkTag>(internal_instance->m_thread_team_data, update); + + final_reducer.final(ptr); + } + + template <class ViewType> + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void*> = nullptr) + : m_functor(arg_functor), + m_policy(arg_policy), + m_league(arg_policy.league_size()), + m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize<FunctorType>::value(m_functor, 1)) { + static_assert(Kokkos::is_view<ViewType>::value, + "Reduction result on Kokkos::Serial must be a Kokkos::View"); + + static_assert( + Kokkos::Impl::MemorySpaceAccess<typename ViewType::memory_space, + Kokkos::HostSpace>::accessible, + "Reduction result on Kokkos::Serial must be a Kokkos::View in " + "HostSpace"); + } + + inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, + const ReducerType& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_league(arg_policy.league_size()), + m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize<FunctorType>::value(arg_functor, 1)) { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" + );*/ + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Task.cpp similarity index 92% rename from packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp rename to packages/kokkos/core/src/Serial/Kokkos_Serial_Task.cpp index 179c55b10dd5357fecc1016dfa239ebd2813149f..468f27eebd4da29fab83bae37beb518e2da2c99d 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Task.cpp @@ -42,12 +42,16 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_SERIAL) && defined(KOKKOS_ENABLE_TASKDAG) +#if defined(KOKKOS_ENABLE_TASKDAG) #include <Kokkos_Core.hpp> -#include <impl/Kokkos_Serial_Task.hpp> +#include <Serial/Kokkos_Serial_Task.hpp> #include <impl/Kokkos_TaskQueue_impl.hpp> //---------------------------------------------------------------------------- @@ -63,5 +67,4 @@ template class TaskQueue<Kokkos::Serial, typename Kokkos::Serial::memory_space>; #else void KOKKOS_CORE_SRC_IMPL_SERIAL_TASK_PREVENT_LINK_ERROR() {} -#endif /* #if defined( KOKKOS_ENABLE_SERIAL ) && defined( \ - KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ diff --git a/packages/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_Task.hpp similarity index 98% rename from packages/kokkos/core/src/impl/Kokkos_Serial_Task.hpp rename to packages/kokkos/core/src/Serial/Kokkos_Serial_Task.hpp index be732f4486d4618b4f8601d1859be44ce1a31296..8d8c1d748dade42ddd8a09050a29afbd45800c84 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Serial_Task.hpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_Task.hpp @@ -61,7 +61,7 @@ namespace Kokkos { namespace Impl { template <class QueueType> -class TaskQueueSpecialization<SimpleTaskScheduler<Kokkos::Serial, QueueType> > { +class TaskQueueSpecialization<SimpleTaskScheduler<Kokkos::Serial, QueueType>> { public: // Note: Scheduler may be an incomplete type at class scope (but not inside // of the methods, obviously) @@ -131,8 +131,8 @@ class TaskQueueSpecialization<SimpleTaskScheduler<Kokkos::Serial, QueueType> > { template <class Scheduler> class TaskQueueSpecializationConstrained< Scheduler, - typename std::enable_if<std::is_same<typename Scheduler::execution_space, - Kokkos::Serial>::value>::type> { + std::enable_if_t<std::is_same<typename Scheduler::execution_space, + Kokkos::Serial>::value>> { public: // Note: Scheduler may be an incomplete type at class scope (but not inside // of the methods, obviously) diff --git a/packages/kokkos/core/src/Serial/Kokkos_Serial_UniqueToken.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_UniqueToken.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cc845f3dca7eac39519a965d2f4be1a19ac55020 --- /dev/null +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_UniqueToken.hpp @@ -0,0 +1,109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SERIAL_UNIQUE_TOKEN_HPP +#define KOKKOS_SERIAL_UNIQUE_TOKEN_HPP + +#include <Kokkos_UniqueToken.hpp> + +namespace Kokkos { +namespace Experimental { + +template <> +class UniqueToken<Serial, UniqueTokenScope::Instance> { + public: + using execution_space = Serial; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken(execution_space const& = execution_space()) noexcept {} + + /// \brief create object size for requested size on given instance + /// + /// It is the users responsibility to only acquire size tokens concurrently + UniqueToken(size_type, execution_space const& = execution_space()) {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int size() const noexcept { return 1; } + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int acquire() const noexcept { return 0; } + + /// \brief release a value acquired by generate + KOKKOS_INLINE_FUNCTION + void release(int) const noexcept {} +}; + +template <> +class UniqueToken<Serial, UniqueTokenScope::Global> { + public: + using execution_space = Serial; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken(execution_space const& = execution_space()) noexcept {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int size() const noexcept { return 1; } + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int acquire() const noexcept { return 0; } + + /// \brief release a value acquired by generate + KOKKOS_INLINE_FUNCTION + void release(int) const noexcept {} +}; + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Serial_WorkGraphPolicy.hpp b/packages/kokkos/core/src/Serial/Kokkos_Serial_WorkGraphPolicy.hpp similarity index 95% rename from packages/kokkos/core/src/impl/Kokkos_Serial_WorkGraphPolicy.hpp rename to packages/kokkos/core/src/Serial/Kokkos_Serial_WorkGraphPolicy.hpp index 0f6ad5cb0354a3d467a686603f72869990f8380c..05980170b841daae67941b07c51fca6fa9a03e57 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Serial_WorkGraphPolicy.hpp +++ b/packages/kokkos/core/src/Serial/Kokkos_Serial_WorkGraphPolicy.hpp @@ -58,13 +58,13 @@ class ParallelFor<FunctorType, Kokkos::WorkGraphPolicy<Traits...>, FunctorType m_functor; template <class TagType> - typename std::enable_if<std::is_same<TagType, void>::value>::type exec_one( + std::enable_if_t<std::is_void<TagType>::value> exec_one( const std::int32_t w) const noexcept { m_functor(w); } template <class TagType> - typename std::enable_if<!std::is_same<TagType, void>::value>::type exec_one( + std::enable_if_t<!std::is_void<TagType>::value> exec_one( const std::int32_t w) const noexcept { const TagType t{}; m_functor(t, w); diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 9682564ee0bb339f5e9f412c3d1214e8de94771c..346eb1dc08d6e789385a4a590075f79b4d256d42 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -42,8 +42,11 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_THREADS) #include <cstdint> #include <limits> @@ -51,12 +54,14 @@ #include <iostream> #include <sstream> #include <thread> +#include <mutex> #include <Kokkos_Core.hpp> #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_CPUDiscovery.hpp> #include <impl/Kokkos_Tools.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -64,6 +69,26 @@ namespace Kokkos { namespace Impl { namespace { +std::mutex host_internal_cppthread_mutex; + +// std::thread compatible driver. +// Recovery from an exception would require constant intra-thread health +// verification; which would negatively impact runtime. As such simply +// abort the process. +void internal_cppthread_driver() { + try { + ThreadsExec::driver(); + } catch (const std::exception &x) { + std::cerr << "Exception thrown from worker thread: " << x.what() + << std::endl; + std::cerr.flush(); + std::abort(); + } catch (...) { + std::cerr << "Exception thrown from worker thread" << std::endl; + std::cerr.flush(); + std::abort(); + } +} ThreadsExec s_threads_process; ThreadsExec *s_threads_exec[ThreadsExec::MAX_THREAD_COUNT] = {nullptr}; @@ -110,6 +135,34 @@ inline unsigned fan_size(const unsigned rank, const unsigned size) { namespace Kokkos { namespace Impl { +//---------------------------------------------------------------------------- +// Spawn a thread + +void ThreadsExec::spawn() { + std::thread t(internal_cppthread_driver); + t.detach(); +} + +//---------------------------------------------------------------------------- + +bool ThreadsExec::is_process() { + static const std::thread::id master_pid = std::this_thread::get_id(); + + return master_pid == std::this_thread::get_id(); +} + +void ThreadsExec::global_lock() { host_internal_cppthread_mutex.lock(); } + +void ThreadsExec::global_unlock() { host_internal_cppthread_mutex.unlock(); } + +//---------------------------------------------------------------------------- + +void ThreadsExec::wait_yield(volatile int &flag, const int value) { + while (value == flag) { + std::this_thread::yield(); + } +} + void execute_function_noop(ThreadsExec &, const void *) {} void ThreadsExec::driver() { @@ -144,11 +197,11 @@ ThreadsExec::ThreadsExec() ThreadsExec *const nil = nullptr; // Which entry in 's_threads_exec', possibly determined from hwloc binding - const int entry = - ((size_t)s_current_function_arg) < size_t(s_thread_pool_size[0]) - ? ((size_t)s_current_function_arg) - : size_t(Kokkos::hwloc::bind_this_thread(s_thread_pool_size[0], - s_threads_coord)); + const int entry = reinterpret_cast<size_t>(s_current_function_arg) < + size_t(s_thread_pool_size[0]) + ? reinterpret_cast<size_t>(s_current_function_arg) + : size_t(Kokkos::hwloc::bind_this_thread( + s_thread_pool_size[0], s_threads_coord)); // Given a good entry set this thread in the 's_threads_exec' array if (entry < s_thread_pool_size[0] && @@ -297,7 +350,7 @@ void ThreadsExec::fence(const std::string &name) { void ThreadsExec::internal_fence(Impl::fence_is_static is_static) { internal_fence((is_static == Impl::fence_is_static::no) ? "Kokkos::ThreadsExec::fence: Unnamed Instance Fence" - : "Kokkos::ThreadsExec::fence: Unnamed Global Fence", + : "Kokkos::ThreadsExec::fence: Unnamed Static Fence", is_static); } @@ -582,9 +635,12 @@ void ThreadsExec::print_configuration(std::ostream &s, const bool detail) { int ThreadsExec::is_initialized() { return nullptr != s_threads_exec[0]; } -void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count, - unsigned use_cores_per_numa, - bool allow_asynchronous_threadpool) { +void ThreadsExec::initialize(int thread_count_arg) { + // legacy arguments + unsigned thread_count = thread_count_arg == -1 ? 0 : thread_count_arg; + unsigned use_numa_count = 0; + unsigned use_cores_per_numa = 0; + bool allow_asynchronous_threadpool = false; // need to provide an initializer for Intel compilers static const Sentinel sentinel = {}; @@ -637,7 +693,7 @@ void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count, // choose its own entry in 's_threads_coord' // otherwise specify the entry. s_current_function_arg = - (void *)static_cast<uintptr_t>(hwloc_can_bind ? ~0u : ith); + reinterpret_cast<void *>(hwloc_can_bind ? ~0u : ith); // Make sure all outstanding memory writes are complete // before spawning the new thread. @@ -804,9 +860,6 @@ void ThreadsExec::finalize() { namespace Kokkos { int Threads::concurrency() { return impl_thread_pool_size(0); } -void Threads::fence() const { - Impl::ThreadsExec::internal_fence(Impl::fence_is_static::no); -} void Threads::fence(const std::string &name) const { Impl::ThreadsExec::internal_fence(name, Impl::fence_is_static::no); } @@ -834,56 +887,7 @@ const char *Threads::name() { return "Threads"; } namespace Impl { int g_threads_space_factory_initialized = - initialize_space_factory<ThreadsSpaceInitializer>("050_Threads"); - -void ThreadsSpaceInitializer::initialize(const InitArguments &args) { - const int num_threads = args.num_threads; - const int use_numa = args.num_numa; - if (std::is_same<Kokkos::Threads, Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::Threads, - Kokkos::HostSpace::execution_space>::value) { - if (num_threads > 0) { - if (use_numa > 0) { - Kokkos::Threads::impl_initialize(num_threads, use_numa); - } else { - Kokkos::Threads::impl_initialize(num_threads); - } - } else { - Kokkos::Threads::impl_initialize(); - } - // std::cout << "Kokkos::initialize() fyi: CppThread enabled and - // initialized" - // << std::endl ; - } else { - // std::cout << "Kokkos::initialize() fyi: CppThread enabled but not - // initialized" << std::endl ; - } -} - -void ThreadsSpaceInitializer::finalize(const bool all_spaces) { - if (std::is_same<Kokkos::Threads, Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::Threads, - Kokkos::HostSpace::execution_space>::value || - all_spaces) { - if (Kokkos::Threads::impl_is_initialized()) - Kokkos::Threads::impl_finalize(); - } -} - -void ThreadsSpaceInitializer::fence() { Kokkos::Threads::impl_static_fence(); } -void ThreadsSpaceInitializer::fence(const std::string &name) { - Kokkos::Threads::impl_static_fence(name); -} - -void ThreadsSpaceInitializer::print_configuration(std::ostream &msg, - const bool detail) { - msg << "Host Parallel Execution Space:" << std::endl; - msg << " KOKKOS_ENABLE_THREADS: "; - msg << "yes" << std::endl; - - msg << "\nThreads Runtime Configuration:" << std::endl; - Kokkos::Threads::print_configuration(msg, detail); -} + initialize_space_factory<Threads>("050_Threads"); } // namespace Impl @@ -896,8 +900,3 @@ constexpr DeviceType DeviceTypeTraits<Threads>::id; #endif } /* namespace Kokkos */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -#else -void KOKKOS_CORE_SRC_THREADS_EXEC_PREVENT_LINK_ERROR() {} -#endif /* #if defined( KOKKOS_ENABLE_THREADS ) */ diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index d17f417bbcac13c2542ed583fbd48bccf0dd3f9b..238a7655457944445752b40c7e55f93d76d378d1 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -46,17 +46,14 @@ #define KOKKOS_THREADSEXEC_HPP #include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_THREADS) #include <cstdio> #include <utility> #include <impl/Kokkos_Spinwait.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> #include <Kokkos_Atomic.hpp> -#include <Kokkos_UniqueToken.hpp> #include <impl/Kokkos_ConcurrentBitset.hpp> //---------------------------------------------------------------------------- @@ -99,7 +96,7 @@ class ThreadsExec { void *m_scratch; int m_scratch_reduce_end; - int m_scratch_thread_end; + size_t m_scratch_thread_end; int m_numa_rank; int m_numa_core_rank; int m_pool_rank; @@ -167,9 +164,7 @@ class ThreadsExec { static int is_initialized(); - static void initialize(unsigned thread_count, unsigned use_numa_count, - unsigned use_cores_per_numa, - bool allow_asynchronous_threadpool); + static void initialize(int thread_count); static void finalize(); @@ -189,7 +184,7 @@ class ThreadsExec { // Make sure there is enough scratch space: const int rev_rank = m_pool_size - (m_pool_rank + 1); - *((volatile int *)reduce_memory()) = value; + *static_cast<volatile int *>(reduce_memory()) = value; memory_fence(); @@ -210,11 +205,12 @@ class ThreadsExec { int accum = 0; for (int rank = 0; rank < m_pool_size; ++rank) { - accum += *((volatile int *)get_thread(rank)->reduce_memory()); + accum += + *static_cast<volatile int *>(get_thread(rank)->reduce_memory()); } for (int rank = 0; rank < m_pool_size; ++rank) { - *((volatile int *)get_thread(rank)->reduce_memory()) = accum; + *static_cast<volatile int *>(get_thread(rank)->reduce_memory()) = accum; } memory_fence(); @@ -224,7 +220,7 @@ class ThreadsExec { } } - return *((volatile int *)reduce_memory()); + return *static_cast<volatile int *>(reduce_memory()); } inline void barrier() { @@ -258,11 +254,8 @@ class ThreadsExec { //------------------------------------ // All-thread functions: - template <class FunctorType, class ArgTag> + template <class FunctorType> inline void fan_in_reduce(const FunctorType &f) const { - using Join = Kokkos::Impl::FunctorValueJoin<FunctorType, ArgTag>; - using Final = Kokkos::Impl::FunctorFinal<FunctorType, ArgTag>; - const int rev_rank = m_pool_size - (m_pool_rank + 1); for (int i = 0; i < m_pool_fan_size; ++i) { @@ -270,11 +263,15 @@ class ThreadsExec { Impl::spinwait_while_equal<int>(fan.m_pool_state, ThreadsExec::Active); - Join::join(f, reduce_memory(), fan.reduce_memory()); + f.join( + reinterpret_cast<typename FunctorType::value_type *>(reduce_memory()), + reinterpret_cast<const typename FunctorType::value_type *>( + fan.reduce_memory())); } if (!rev_rank) { - Final::final(f, reduce_memory()); + f.final(reinterpret_cast<typename FunctorType::value_type *>( + reduce_memory())); } // This thread has updated 'reduce_memory()' and upon returning @@ -298,7 +295,7 @@ class ThreadsExec { } } - template <class FunctorType, class ArgTag> + template <class FunctorType> inline void scan_large(const FunctorType &f) { // Sequence of states: // 0) Active : entry and exit state @@ -307,14 +304,10 @@ class ThreadsExec { // 3) Rendezvous : All threads inclusive scan value are available // 4) ScanCompleted : exclusive scan value copied - using Traits = Kokkos::Impl::FunctorValueTraits<FunctorType, ArgTag>; - using Join = Kokkos::Impl::FunctorValueJoin<FunctorType, ArgTag>; - using Init = Kokkos::Impl::FunctorValueInit<FunctorType, ArgTag>; - - using scalar_type = typename Traits::value_type; + using scalar_type = typename FunctorType::value_type; const int rev_rank = m_pool_size - (m_pool_rank + 1); - const unsigned count = Traits::value_count(f); + const unsigned count = FunctorType::value_count(f); scalar_type *const work_value = (scalar_type *)reduce_memory(); @@ -325,7 +318,7 @@ class ThreadsExec { // Wait: Active -> ReductionAvailable (or ScanAvailable) Impl::spinwait_while_equal<int>(fan.m_pool_state, ThreadsExec::Active); - Join::join(f, work_value, fan.reduce_memory()); + f.join(work_value, fan.reduce_memory()); } // Copy reduction value to scan value before releasing from this phase. @@ -347,8 +340,7 @@ class ThreadsExec { Impl::spinwait_while_equal<int>(th.m_pool_state, ThreadsExec::ReductionAvailable); - Join::join(f, work_value + count, - ((scalar_type *)th.reduce_memory()) + count); + f.join(work_value + count, ((scalar_type *)th.reduce_memory()) + count); } // This thread has completed inclusive scan @@ -388,7 +380,7 @@ class ThreadsExec { work_value[j] = src_value[j]; } } else { - (void)Init::init(f, work_value); + f.init(work_value); } //-------------------------------- @@ -411,16 +403,12 @@ class ThreadsExec { } } - template <class FunctorType, class ArgTag> + template <class FunctorType> inline void scan_small(const FunctorType &f) { - using Traits = Kokkos::Impl::FunctorValueTraits<FunctorType, ArgTag>; - using Join = Kokkos::Impl::FunctorValueJoin<FunctorType, ArgTag>; - using Init = Kokkos::Impl::FunctorValueInit<FunctorType, ArgTag>; - - using scalar_type = typename Traits::value_type; + using scalar_type = typename FunctorType::value_type; const int rev_rank = m_pool_size - (m_pool_rank + 1); - const unsigned count = Traits::value_count(f); + const unsigned count = f.length(); scalar_type *const work_value = (scalar_type *)reduce_memory(); @@ -452,9 +440,9 @@ class ThreadsExec { for (unsigned i = 0; i < count; ++i) { ptr[i] = ptr_prev[i + count]; } - Join::join(f, ptr + count, ptr); + f.join(ptr + count, ptr); } else { - (void)Init::init(f, ptr); + f.init(ptr); } ptr_prev = ptr; } @@ -626,139 +614,24 @@ inline int Threads::impl_is_initialized() { return Impl::ThreadsExec::is_initialized(); } -inline void Threads::impl_initialize(unsigned threads_count, - unsigned use_numa_count, - unsigned use_cores_per_numa, - bool allow_asynchronous_threadpool) { - Impl::ThreadsExec::initialize(threads_count, use_numa_count, - use_cores_per_numa, - allow_asynchronous_threadpool); +inline void Threads::impl_initialize(InitializationSettings const &settings) { + Impl::ThreadsExec::initialize( + settings.has_num_threads() ? settings.get_num_threads() : -1); } inline void Threads::impl_finalize() { Impl::ThreadsExec::finalize(); } -inline void Threads::print_configuration(std::ostream &s, const bool detail) { - Impl::ThreadsExec::print_configuration(s, detail); -} +inline void Threads::print_configuration(std::ostream &os, bool verbose) const { + os << "Host Parallel Execution Space:\n"; + os << " KOKKOS_ENABLE_THREADS: yes\n"; -inline void Threads::impl_static_fence() { - Impl::ThreadsExec::internal_fence(Impl::fence_is_static::yes); + os << "\nThreads Runtime Configuration:\n"; + Impl::ThreadsExec::print_configuration(os, verbose); } + inline void Threads::impl_static_fence(const std::string &name) { Impl::ThreadsExec::internal_fence(name, Impl::fence_is_static::yes); } } /* namespace Kokkos */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -template <> -class UniqueToken<Threads, UniqueTokenScope::Instance> { - private: - using buffer_type = Kokkos::View<uint32_t *, Kokkos::HostSpace>; - int m_count; - buffer_type m_buffer_view; - uint32_t volatile *m_buffer; - - public: - using execution_space = Threads; - using size_type = int; - - /// \brief create object size for concurrency on the given instance - /// - /// This object should not be shared between instances - UniqueToken(execution_space const & = execution_space()) noexcept - : m_count(::Kokkos::Threads::impl_thread_pool_size()), - m_buffer_view(buffer_type()), - m_buffer(nullptr) {} - - UniqueToken(size_type max_size, execution_space const & = execution_space()) - : m_count(max_size > ::Kokkos::Threads::impl_thread_pool_size() - ? ::Kokkos::Threads::impl_thread_pool_size() - : max_size), - m_buffer_view( - max_size > ::Kokkos::Threads::impl_thread_pool_size() - ? buffer_type() - : buffer_type("UniqueToken::m_buffer_view", - ::Kokkos::Impl::concurrent_bitset::buffer_bound( - m_count))), - m_buffer(m_buffer_view.data()) {} - - /// \brief upper bound for acquired values, i.e. 0 <= value < size() - KOKKOS_INLINE_FUNCTION - int size() const noexcept { return m_count; } - - /// \brief acquire value such that 0 <= value < size() - KOKKOS_INLINE_FUNCTION - int acquire() const noexcept { - KOKKOS_IF_ON_HOST(( - if (m_buffer == nullptr) { - return Threads::impl_thread_pool_rank(); - } else { - const ::Kokkos::pair<int, int> result = - ::Kokkos::Impl::concurrent_bitset::acquire_bounded( - m_buffer, m_count, ::Kokkos::Impl::clock_tic() % m_count); - - if (result.first < 0) { - ::Kokkos::abort( - "UniqueToken<Threads> failure to acquire tokens, no tokens " - "available"); - } - return result.first; - })) - - KOKKOS_IF_ON_DEVICE((return 0;)) - } - - /// \brief release a value acquired by generate - KOKKOS_INLINE_FUNCTION - void release(int i) const noexcept { - KOKKOS_IF_ON_HOST((if (m_buffer != nullptr) { - ::Kokkos::Impl::concurrent_bitset::release(m_buffer, i); - })) - - KOKKOS_IF_ON_DEVICE(((void)i;)) - } -}; - -template <> -class UniqueToken<Threads, UniqueTokenScope::Global> { - public: - using execution_space = Threads; - using size_type = int; - - /// \brief create object size for concurrency on the given instance - /// - /// This object should not be shared between instances - UniqueToken(execution_space const & = execution_space()) noexcept {} - - /// \brief upper bound for acquired values, i.e. 0 <= value < size() - KOKKOS_INLINE_FUNCTION - int size() const noexcept { - KOKKOS_IF_ON_HOST((return Threads::impl_thread_pool_size();)) - - KOKKOS_IF_ON_DEVICE((return 0;)) - } - - /// \brief acquire value such that 0 <= value < size() - KOKKOS_INLINE_FUNCTION - int acquire() const noexcept { - KOKKOS_IF_ON_HOST((return Threads::impl_thread_pool_rank();)) - - KOKKOS_IF_ON_DEVICE((return 0;)) - } - - /// \brief release a value acquired by generate - KOKKOS_INLINE_FUNCTION - void release(int) const noexcept {} -}; - -} // namespace Experimental -} // namespace Kokkos -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -#endif #endif /* #define KOKKOS_THREADSEXEC_HPP */ diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index 36d6a25b0ddfa6fae9c579485627965180e62daf..02ce9325065e11e311252b2c931245318f289dc4 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -46,13 +46,11 @@ #define KOKKOS_THREADSTEAM_HPP #include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_THREADS) #include <cstdio> #include <utility> #include <impl/Kokkos_Spinwait.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_HostThreadTeam.hpp> #include <Kokkos_Atomic.hpp> @@ -82,7 +80,7 @@ class ThreadsExecTeamMember { ThreadsExec* const m_exec; ThreadsExec* const* m_team_base; ///< Base for team fan-in space m_team_shared; - int m_team_shared_size; + size_t m_team_shared_size; int m_team_size; int m_team_rank; int m_team_rank_rev; @@ -97,9 +95,9 @@ class ThreadsExecTeamMember { int m_team_alloc; inline void set_team_shared() { - new (&m_team_shared) - space(((char*)(*m_team_base)->scratch_memory()) + TEAM_REDUCE_SIZE, - m_team_shared_size); + new (&m_team_shared) space( + static_cast<char*>((*m_team_base)->scratch_memory()) + TEAM_REDUCE_SIZE, + m_team_shared_size); } public: @@ -208,7 +206,7 @@ class ThreadsExecTeamMember { template <typename Type> KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<Type>::value, Type>::type + std::enable_if_t<!Kokkos::is_reducer<Type>::value, Type> team_reduce(const Type& value) const { KOKKOS_IF_ON_DEVICE((return value;)) @@ -240,61 +238,60 @@ class ThreadsExecTeamMember { } template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer) const noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer) const noexcept { team_reduce(reducer, reducer.reference()); } template <typename ReducerType> KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type + std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> team_reduce(const ReducerType& reducer, const typename ReducerType::value_type contribution) const { KOKKOS_IF_ON_DEVICE(((void)reducer; (void)contribution;)) - KOKKOS_IF_ON_HOST( - (using value_type = typename ReducerType::value_type; - // Make sure there is enough scratch space: - using type = typename if_c<sizeof(value_type) < TEAM_REDUCE_SIZE, - value_type, void>::type; + KOKKOS_IF_ON_HOST(( + using value_type = typename ReducerType::value_type; + // Make sure there is enough scratch space: + using type = typename if_c<sizeof(value_type) < TEAM_REDUCE_SIZE, + value_type, void>::type; - if (nullptr == m_exec) return; + if (nullptr == m_exec) return; - type* const local_value = ((type*)m_exec->scratch_memory()); + type* const local_value = ((type*)m_exec->scratch_memory()); - // Set this thread's contribution - if (team_rank() != team_size() - 1)* local_value = contribution; + // Set this thread's contribution + if (team_rank() != team_size() - 1) { *local_value = contribution; } - // Fence to make sure the base team member has access: - memory_fence(); + // Fence to make sure the base team member has access: + memory_fence(); - if (team_fan_in()) { - // The last thread to synchronize returns true, all other threads - // wait for team_fan_out() - type* const team_value = ((type*)m_team_base[0]->scratch_memory()); + if (team_fan_in()) { + // The last thread to synchronize returns true, all other threads + // wait for team_fan_out() + type* const team_value = ((type*)m_team_base[0]->scratch_memory()); - *team_value = contribution; - // Join to the team value: - for (int i = 1; i < m_team_size; ++i) { - reducer.join(*team_value, - *((type*)m_team_base[i]->scratch_memory())); - } + *team_value = contribution; + // Join to the team value: + for (int i = 1; i < m_team_size; ++i) { + reducer.join(*team_value, + *((type*)m_team_base[i]->scratch_memory())); + } - // Team base thread may "lap" member threads so copy out to their - // local value. - for (int i = 1; i < m_team_size; ++i) { - *((type*)m_team_base[i]->scratch_memory()) = *team_value; - } + // Team base thread may "lap" member threads so copy out to their + // local value. + for (int i = 1; i < m_team_size; ++i) { + *((type*)m_team_base[i]->scratch_memory()) = *team_value; + } - // Fence to make sure all team members have access - memory_fence(); - } + // Fence to make sure all team members have access + memory_fence(); + } - team_fan_out(); + team_fan_out(); - // Value was changed by the team base - reducer.reference() = *((type volatile const*)local_value);)) + // Value was changed by the team base + reducer.reference() = *local_value;)) } /** \brief Intra-team exclusive prefix sum with team_rank() ordering @@ -374,7 +371,7 @@ class ThreadsExecTeamMember { ThreadsExecTeamMember( Impl::ThreadsExec* exec, const TeamPolicyInternal<Kokkos::Threads, Properties...>& team, - const int shared_size) + const size_t shared_size) : m_exec(exec), m_team_base(nullptr), m_team_shared(nullptr, 0), @@ -415,7 +412,7 @@ class ThreadsExecTeamMember { if (league_iter_end > team.league_size()) league_iter_end = team.league_size(); - if ((team.team_alloc() > m_team_size) + if ((team.team_alloc() > size_t(m_team_size)) ? (team_rank_rev >= m_team_size) : (m_exec->pool_size() - pool_num_teams * m_team_size > m_exec->pool_rank())) @@ -525,7 +522,7 @@ class ThreadsExecTeamMember { } void set_league_shmem(const int arg_league_rank, const int arg_league_size, - const int arg_shmem_size) { + const size_t arg_shmem_size) { m_league_rank = arg_league_rank; m_league_size = arg_league_size; m_team_shared_size = arg_shmem_size; @@ -666,7 +663,7 @@ class TeamPolicyInternal<Kokkos::Threads, Properties...> inline int team_size() const { return m_team_size; } inline int impl_vector_length() const { return 1; } - inline int team_alloc() const { return m_team_alloc; } + inline size_t team_alloc() const { return m_team_alloc; } inline int league_size() const { return m_league_size; } inline bool impl_auto_team_size() const { return m_tune_team_size; } @@ -828,11 +825,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, - Impl::ThreadsExecTeamMember> + std::common_type_t<iType1, iType2>, Impl::ThreadsExecTeamMember> TeamThreadRange(const Impl::ThreadsExecTeamMember& thread, const iType1& begin, const iType2& end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::ThreadsExecTeamMember>( thread, iType(begin), iType(end)); @@ -850,11 +846,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, - Impl::ThreadsExecTeamMember> + std::common_type_t<iType1, iType2>, Impl::ThreadsExecTeamMember> TeamVectorRange(const Impl::ThreadsExecTeamMember& thread, const iType1& begin, const iType2& end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::ThreadsExecTeamMember>( thread, iType(begin), iType(end)); @@ -872,11 +867,10 @@ KOKKOS_INLINE_FUNCTION template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, - Impl::ThreadsExecTeamMember> + std::common_type_t<iType1, iType2>, Impl::ThreadsExecTeamMember> ThreadVectorRange(const Impl::ThreadsExecTeamMember& thread, const iType1& arg_begin, const iType2& arg_end) { - using iType = typename std::common_type<iType1, iType2>::type; + using iType = std::common_type_t<iType1, iType2>; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::ThreadsExecTeamMember>( thread, iType(arg_begin), iType(arg_end)); @@ -919,11 +913,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * and a summation of val is performed and put into result. */ template <typename iType, class Lambda, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::ThreadsExecTeamMember>& loop_boundaries, - const Lambda& lambda, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::ThreadsExecTeamMember>& loop_boundaries, + const Lambda& lambda, ValueType& result) { ValueType intermediate; Sum<ValueType> sum(intermediate); sum.init(intermediate); @@ -940,11 +933,10 @@ KOKKOS_INLINE_FUNCTION } template <typename iType, class Lambda, typename ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::ThreadsExecTeamMember>& loop_boundaries, - const Lambda& lambda, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::ThreadsExecTeamMember>& loop_boundaries, + const Lambda& lambda, const ReducerType& reducer) { typename ReducerType::value_type value; reducer.init(value); @@ -984,11 +976,10 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * and a summation of val is performed and put into result. */ template <typename iType, class Lambda, typename ValueType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type - parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::ThreadsExecTeamMember>& loop_boundaries, - const Lambda& lambda, ValueType& result) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<!Kokkos::is_reducer<ValueType>::value> +parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::ThreadsExecTeamMember>& loop_boundaries, + const Lambda& lambda, ValueType& result) { result = ValueType(); for (iType i = loop_boundaries.start; i < loop_boundaries.end; i += loop_boundaries.increment) { @@ -997,11 +988,10 @@ KOKKOS_INLINE_FUNCTION } template <typename iType, class Lambda, typename ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::ThreadsExecTeamMember>& loop_boundaries, - const Lambda& lambda, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::ThreadsExecTeamMember>& loop_boundaries, + const Lambda& lambda, const ReducerType& reducer) { reducer.init(reducer.reference()); for (iType i = loop_boundaries.start; i < loop_boundaries.end; i += loop_boundaries.increment) { @@ -1062,8 +1052,10 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( const Impl::ThreadVectorRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember>& loop_boundaries, const FunctorType& lambda) { - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = typename ValueTraits::value_type; + using value_type = + typename Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + TeamPolicy<Threads>, + FunctorType>::value_type; value_type scan_val = value_type(); @@ -1080,11 +1072,10 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( * */ template <typename iType, class FunctorType, typename ReducerType> -KOKKOS_INLINE_FUNCTION - typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type - parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::ThreadsExecTeamMember>& loop_boundaries, - const FunctorType& lambda, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION std::enable_if_t<Kokkos::is_reducer<ReducerType>::value> +parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::ThreadsExecTeamMember>& loop_boundaries, + const FunctorType& lambda, const ReducerType& reducer) { typename ReducerType::value_type scan_val; reducer.init(scan_val); @@ -1137,5 +1128,4 @@ KOKKOS_INLINE_FUNCTION void single( //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#endif #endif /* #define KOKKOS_THREADSTEAM_HPP */ diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp deleted file mode 100644 index 88dc670fa43ff5a1e969857808bd38cafd1544c4..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp +++ /dev/null @@ -1,1034 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_THREADS_PARALLEL_HPP -#define KOKKOS_THREADS_PARALLEL_HPP - -#include <Kokkos_Macros.hpp> -#if defined(KOKKOS_ENABLE_THREADS) - -#include <Kokkos_Parallel.hpp> - -#include <impl/Kokkos_FunctorAdapter.hpp> - -#include <KokkosExp_MDRangePolicy.hpp> - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -/* ParallelFor Kokkos::Threads with RangePolicy */ - -template <class FunctorType, class... Traits> -class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, - Kokkos::Threads> { - private: - using Policy = Kokkos::RangePolicy<Traits...>; - using WorkTag = typename Policy::work_tag; - using WorkRange = typename Policy::WorkRange; - using Member = typename Policy::member_type; - - const FunctorType m_functor; - const Policy m_policy; - - template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const FunctorType &functor, const Member ibeg, - const Member iend) { -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - functor(i); - } - } - - template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const FunctorType &functor, const Member ibeg, - const Member iend) { - const TagType t{}; -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - functor(t, i); - } - } - - static void exec(ThreadsExec &exec, const void *arg) { - exec_schedule<typename Policy::schedule_type::type>(exec, arg); - } - - template <class Schedule> - static typename std::enable_if< - std::is_same<Schedule, Kokkos::Static>::value>::type - exec_schedule(ThreadsExec &exec, const void *arg) { - const ParallelFor &self = *((const ParallelFor *)arg); - - WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - ParallelFor::template exec_range<WorkTag>(self.m_functor, range.begin(), - range.end()); - - exec.fan_in(); - } - - template <class Schedule> - static typename std::enable_if< - std::is_same<Schedule, Kokkos::Dynamic>::value>::type - exec_schedule(ThreadsExec &exec, const void *arg) { - const ParallelFor &self = *((const ParallelFor *)arg); - - WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - exec.set_work_range(range.begin() - self.m_policy.begin(), - range.end() - self.m_policy.begin(), - self.m_policy.chunk_size()); - exec.reset_steal_target(); - exec.barrier(); - - long work_index = exec.get_work_index(); - - while (work_index != -1) { - const Member begin = - static_cast<Member>(work_index) * self.m_policy.chunk_size() + - self.m_policy.begin(); - const Member end = - begin + self.m_policy.chunk_size() < self.m_policy.end() - ? begin + self.m_policy.chunk_size() - : self.m_policy.end(); - ParallelFor::template exec_range<WorkTag>(self.m_functor, begin, end); - work_index = exec.get_work_index(); - } - - exec.fan_in(); - } - - public: - inline void execute() const { - ThreadsExec::start(&ParallelFor::exec, this); - ThreadsExec::fence(); - } - - ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} -}; - -// MDRangePolicy impl -template <class FunctorType, class... Traits> -class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, - Kokkos::Threads> { - private: - using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; - using Policy = typename MDRangePolicy::impl_range_policy; - - using WorkTag = typename MDRangePolicy::work_tag; - - using WorkRange = typename Policy::WorkRange; - using Member = typename Policy::member_type; - - using iterate_type = typename Kokkos::Impl::HostIterateTile< - MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void>; - - const FunctorType m_functor; - const MDRangePolicy m_mdr_policy; - const Policy m_policy; // construct as RangePolicy( 0, num_tiles - // ).set_chunk_size(1) in ctor - - inline static void exec_range(const MDRangePolicy &mdr_policy, - const FunctorType &functor, const Member ibeg, - const Member iend) { -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - iterate_type(mdr_policy, functor)(i); - } - } - - static void exec(ThreadsExec &exec, const void *arg) { - exec_schedule<typename Policy::schedule_type::type>(exec, arg); - } - - template <class Schedule> - static typename std::enable_if< - std::is_same<Schedule, Kokkos::Static>::value>::type - exec_schedule(ThreadsExec &exec, const void *arg) { - const ParallelFor &self = *((const ParallelFor *)arg); - - WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - ParallelFor::exec_range(self.m_mdr_policy, self.m_functor, range.begin(), - range.end()); - - exec.fan_in(); - } - - template <class Schedule> - static typename std::enable_if< - std::is_same<Schedule, Kokkos::Dynamic>::value>::type - exec_schedule(ThreadsExec &exec, const void *arg) { - const ParallelFor &self = *((const ParallelFor *)arg); - - WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - exec.set_work_range(range.begin(), range.end(), self.m_policy.chunk_size()); - exec.reset_steal_target(); - exec.barrier(); - - long work_index = exec.get_work_index(); - - while (work_index != -1) { - const Member begin = - static_cast<Member>(work_index) * self.m_policy.chunk_size(); - const Member end = - begin + self.m_policy.chunk_size() < self.m_policy.end() - ? begin + self.m_policy.chunk_size() - : self.m_policy.end(); - - ParallelFor::exec_range(self.m_mdr_policy, self.m_functor, begin, end); - work_index = exec.get_work_index(); - } - - exec.fan_in(); - } - - public: - inline void execute() const { - ThreadsExec::start(&ParallelFor::exec, this); - ThreadsExec::fence(); - } - - ParallelFor(const FunctorType &arg_functor, const MDRangePolicy &arg_policy) - : m_functor(arg_functor), - m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} - - template <typename Policy, typename Functor> - static int max_tile_size_product(const Policy &, const Functor &) { - /** - * 1024 here is just our guess for a reasonable max tile size, - * it isn't a hardware constraint. If people see a use for larger - * tile size products, we're happy to change this. - */ - return 1024; - } -}; - -//---------------------------------------------------------------------------- -/* ParallelFor Kokkos::Threads with TeamPolicy */ - -template <class FunctorType, class... Properties> -class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, - Kokkos::Threads> { - private: - using Policy = - Kokkos::Impl::TeamPolicyInternal<Kokkos::Threads, Properties...>; - using WorkTag = typename Policy::work_tag; - using Member = typename Policy::member_type; - - const FunctorType m_functor; - const Policy m_policy; - const int m_shared; - - template <class TagType, class Schedule> - inline static typename std::enable_if< - std::is_same<TagType, void>::value && - std::is_same<Schedule, Kokkos::Static>::value>::type - exec_team(const FunctorType &functor, Member member) { - for (; member.valid_static(); member.next_static()) { - functor(member); - } - } - - template <class TagType, class Schedule> - inline static typename std::enable_if< - !std::is_same<TagType, void>::value && - std::is_same<Schedule, Kokkos::Static>::value>::type - exec_team(const FunctorType &functor, Member member) { - const TagType t{}; - for (; member.valid_static(); member.next_static()) { - functor(t, member); - } - } - - template <class TagType, class Schedule> - inline static typename std::enable_if< - std::is_same<TagType, void>::value && - std::is_same<Schedule, Kokkos::Dynamic>::value>::type - exec_team(const FunctorType &functor, Member member) { - for (; member.valid_dynamic(); member.next_dynamic()) { - functor(member); - } - } - - template <class TagType, class Schedule> - inline static typename std::enable_if< - !std::is_same<TagType, void>::value && - std::is_same<Schedule, Kokkos::Dynamic>::value>::type - exec_team(const FunctorType &functor, Member member) { - const TagType t{}; - for (; member.valid_dynamic(); member.next_dynamic()) { - functor(t, member); - } - } - - static void exec(ThreadsExec &exec, const void *arg) { - const ParallelFor &self = *((const ParallelFor *)arg); - - ParallelFor::exec_team<WorkTag, typename Policy::schedule_type::type>( - self.m_functor, Member(&exec, self.m_policy, self.m_shared)); - - exec.barrier(); - exec.fan_in(); - } - template <typename Policy> - Policy fix_policy(Policy policy) { - if (policy.impl_vector_length() < 0) { - policy.impl_set_vector_length(1); - } - if (policy.team_size() < 0) { - policy.impl_set_team_size( - policy.team_size_recommended(m_functor, ParallelForTag{})); - } - return policy; - } - - public: - inline void execute() const { - ThreadsExec::resize_scratch( - 0, Policy::member_type::team_reduce_size() + m_shared); - - ThreadsExec::start(&ParallelFor::exec, this); - - ThreadsExec::fence(); - } - - ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) - : m_functor(arg_functor), - m_policy(fix_policy(arg_policy)), - m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) + - FunctorTeamShmemSize<FunctorType>::value( - arg_functor, m_policy.team_size())) {} -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -/* ParallelReduce with Kokkos::Threads and RangePolicy */ - -template <class FunctorType, class ReducerType, class... Traits> -class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, - Kokkos::Threads> { - private: - using Policy = Kokkos::RangePolicy<Traits...>; - - using WorkTag = typename Policy::work_tag; - using WorkRange = typename Policy::WorkRange; - using Member = typename Policy::member_type; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueFinal = Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - - const FunctorType m_functor; - const Policy m_policy; - const ReducerType m_reducer; - const pointer_type m_result_ptr; - - template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const FunctorType &functor, const Member &ibeg, - const Member &iend, reference_type update) { -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - functor(i, update); - } - } - - template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const FunctorType &functor, const Member &ibeg, - const Member &iend, reference_type update) { - const TagType t{}; -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - functor(t, i, update); - } - } - - static void exec(ThreadsExec &exec, const void *arg) { - exec_schedule<typename Policy::schedule_type::type>(exec, arg); - } - - template <class Schedule> - static typename std::enable_if< - std::is_same<Schedule, Kokkos::Static>::value>::type - exec_schedule(ThreadsExec &exec, const void *arg) { - const ParallelReduce &self = *((const ParallelReduce *)arg); - const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - ParallelReduce::template exec_range<WorkTag>( - self.m_functor, range.begin(), range.end(), - ValueInit::init( - ReducerConditional::select(self.m_functor, self.m_reducer), - exec.reduce_memory())); - - exec.template fan_in_reduce<ReducerTypeFwd, WorkTagFwd>( - ReducerConditional::select(self.m_functor, self.m_reducer)); - } - - template <class Schedule> - static typename std::enable_if< - std::is_same<Schedule, Kokkos::Dynamic>::value>::type - exec_schedule(ThreadsExec &exec, const void *arg) { - const ParallelReduce &self = *((const ParallelReduce *)arg); - const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - exec.set_work_range(range.begin() - self.m_policy.begin(), - range.end() - self.m_policy.begin(), - self.m_policy.chunk_size()); - exec.reset_steal_target(); - exec.barrier(); - - long work_index = exec.get_work_index(); - reference_type update = ValueInit::init( - ReducerConditional::select(self.m_functor, self.m_reducer), - exec.reduce_memory()); - while (work_index != -1) { - const Member begin = - static_cast<Member>(work_index) * self.m_policy.chunk_size() + - self.m_policy.begin(); - const Member end = - begin + self.m_policy.chunk_size() < self.m_policy.end() - ? begin + self.m_policy.chunk_size() - : self.m_policy.end(); - ParallelReduce::template exec_range<WorkTag>(self.m_functor, begin, end, - update); - work_index = exec.get_work_index(); - } - - exec.template fan_in_reduce<ReducerTypeFwd, WorkTagFwd>( - ReducerConditional::select(self.m_functor, self.m_reducer)); - } - - public: - inline void execute() const { - if (m_policy.end() <= m_policy.begin()) { - if (m_result_ptr) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - ValueFinal::final(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - } - } else { - ThreadsExec::resize_scratch( - ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)), - 0); - - ThreadsExec::start(&ParallelReduce::exec, this); - - ThreadsExec::fence(); - - if (m_result_ptr) { - const pointer_type data = - (pointer_type)ThreadsExec::root_reduce_scratch(); - - const unsigned n = ValueTraits::value_count( - ReducerConditional::select(m_functor, m_reducer)); - for (unsigned i = 0; i < n; ++i) { - m_result_ptr[i] = data[i]; - } - } - } - } - - template <class HostViewType> - ParallelReduce( - const FunctorType &arg_functor, const Policy &arg_policy, - const HostViewType &arg_result_view, - typename std::enable_if<Kokkos::is_view<HostViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void *>::type = nullptr) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(InvalidType()), - m_result_ptr(arg_result_view.data()) { - static_assert(Kokkos::is_view<HostViewType>::value, - "Kokkos::Threads reduce result must be a View"); - - static_assert( - std::is_same<typename HostViewType::memory_space, HostSpace>::value, - "Kokkos::Threads reduce result must be a View in HostSpace"); - } - - inline ParallelReduce(const FunctorType &arg_functor, Policy arg_policy, - const ReducerType &reducer) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(reducer), - m_result_ptr(reducer.view().data()) { - /*static_assert( std::is_same< typename ViewType::memory_space - , Kokkos::HostSpace >::value - , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" - );*/ - } -}; - -// MDRangePolicy impl -template <class FunctorType, class ReducerType, class... Traits> -class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, - Kokkos::Threads> { - private: - using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; - using Policy = typename MDRangePolicy::impl_range_policy; - - using WorkTag = typename MDRangePolicy::work_tag; - using WorkRange = typename Policy::WorkRange; - using Member = typename Policy::member_type; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename ValueTraits::pointer_type; - using value_type = typename ValueTraits::value_type; - using reference_type = typename ValueTraits::reference_type; - - using iterate_type = - typename Kokkos::Impl::HostIterateTile<MDRangePolicy, FunctorType, - WorkTag, reference_type>; - - const FunctorType m_functor; - const MDRangePolicy m_mdr_policy; - const Policy m_policy; // construct as RangePolicy( 0, num_tiles - // ).set_chunk_size(1) in ctor - const ReducerType m_reducer; - const pointer_type m_result_ptr; - - inline static void exec_range(const MDRangePolicy &mdr_policy, - const FunctorType &functor, const Member &ibeg, - const Member &iend, reference_type update) { -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - iterate_type(mdr_policy, functor, update)(i); - } - } - - static void exec(ThreadsExec &exec, const void *arg) { - exec_schedule<typename Policy::schedule_type::type>(exec, arg); - } - - template <class Schedule> - static typename std::enable_if< - std::is_same<Schedule, Kokkos::Static>::value>::type - exec_schedule(ThreadsExec &exec, const void *arg) { - const ParallelReduce &self = *((const ParallelReduce *)arg); - const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - ParallelReduce::exec_range( - self.m_mdr_policy, self.m_functor, range.begin(), range.end(), - ValueInit::init( - ReducerConditional::select(self.m_functor, self.m_reducer), - exec.reduce_memory())); - - exec.template fan_in_reduce<ReducerTypeFwd, WorkTagFwd>( - ReducerConditional::select(self.m_functor, self.m_reducer)); - } - - template <class Schedule> - static typename std::enable_if< - std::is_same<Schedule, Kokkos::Dynamic>::value>::type - exec_schedule(ThreadsExec &exec, const void *arg) { - const ParallelReduce &self = *((const ParallelReduce *)arg); - const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - exec.set_work_range(range.begin(), range.end(), self.m_policy.chunk_size()); - exec.reset_steal_target(); - exec.barrier(); - - long work_index = exec.get_work_index(); - reference_type update = ValueInit::init( - ReducerConditional::select(self.m_functor, self.m_reducer), - exec.reduce_memory()); - while (work_index != -1) { - const Member begin = - static_cast<Member>(work_index) * self.m_policy.chunk_size(); - const Member end = - begin + self.m_policy.chunk_size() < self.m_policy.end() - ? begin + self.m_policy.chunk_size() - : self.m_policy.end(); - ParallelReduce::exec_range(self.m_mdr_policy, self.m_functor, begin, end, - update); - work_index = exec.get_work_index(); - } - - exec.template fan_in_reduce<ReducerTypeFwd, WorkTagFwd>( - ReducerConditional::select(self.m_functor, self.m_reducer)); - } - - public: - inline void execute() const { - ThreadsExec::resize_scratch( - ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)), - 0); - - ThreadsExec::start(&ParallelReduce::exec, this); - - ThreadsExec::fence(); - - if (m_result_ptr) { - const pointer_type data = - (pointer_type)ThreadsExec::root_reduce_scratch(); - - const unsigned n = ValueTraits::value_count( - ReducerConditional::select(m_functor, m_reducer)); - for (unsigned i = 0; i < n; ++i) { - m_result_ptr[i] = data[i]; - } - } - } - - template <class HostViewType> - ParallelReduce( - const FunctorType &arg_functor, const MDRangePolicy &arg_policy, - const HostViewType &arg_result_view, - typename std::enable_if<Kokkos::is_view<HostViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void *>::type = nullptr) - : m_functor(arg_functor), - m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), - m_reducer(InvalidType()), - m_result_ptr(arg_result_view.data()) { - static_assert(Kokkos::is_view<HostViewType>::value, - "Kokkos::Threads reduce result must be a View"); - - static_assert( - std::is_same<typename HostViewType::memory_space, HostSpace>::value, - "Kokkos::Threads reduce result must be a View in HostSpace"); - } - - inline ParallelReduce(const FunctorType &arg_functor, - MDRangePolicy arg_policy, const ReducerType &reducer) - : m_functor(arg_functor), - m_mdr_policy(arg_policy), - m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), - m_reducer(reducer), - m_result_ptr(reducer.view().data()) { - /*static_assert( std::is_same< typename ViewType::memory_space - , Kokkos::HostSpace >::value - , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" - );*/ - } - - template <typename Policy, typename Functor> - static int max_tile_size_product(const Policy &, const Functor &) { - /** - * 1024 here is just our guess for a reasonable max tile size, - * it isn't a hardware constraint. If people see a use for larger - * tile size products, we're happy to change this. - */ - return 1024; - } -}; - -//---------------------------------------------------------------------------- -/* ParallelReduce with Kokkos::Threads and TeamPolicy */ - -template <class FunctorType, class ReducerType, class... Properties> -class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, - ReducerType, Kokkos::Threads> { - private: - using Policy = - Kokkos::Impl::TeamPolicyInternal<Kokkos::Threads, Properties...>; - using WorkTag = typename Policy::work_tag; - using Member = typename Policy::member_type; - - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using ReducerTypeFwd = typename ReducerConditional::type; - using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; - - using ValueTraits = - Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; - using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; - using ValueFinal = Kokkos::Impl::FunctorFinal<ReducerTypeFwd, WorkTagFwd>; - - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - - const FunctorType m_functor; - const Policy m_policy; - const ReducerType m_reducer; - const pointer_type m_result_ptr; - const int m_shared; - - template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_team(const FunctorType &functor, Member member, - reference_type update) { - for (; member.valid_static(); member.next_static()) { - functor(member, update); - } - } - - template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_team(const FunctorType &functor, Member member, - reference_type update) { - const TagType t{}; - for (; member.valid_static(); member.next_static()) { - functor(t, member, update); - } - } - - static void exec(ThreadsExec &exec, const void *arg) { - const ParallelReduce &self = *((const ParallelReduce *)arg); - - ParallelReduce::template exec_team<WorkTag>( - self.m_functor, Member(&exec, self.m_policy, self.m_shared), - ValueInit::init( - ReducerConditional::select(self.m_functor, self.m_reducer), - exec.reduce_memory())); - - exec.template fan_in_reduce<ReducerTypeFwd, WorkTagFwd>( - ReducerConditional::select(self.m_functor, self.m_reducer)); - } - - public: - inline void execute() const { - if (m_policy.league_size() * m_policy.team_size() == 0) { - if (m_result_ptr) { - ValueInit::init(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - ValueFinal::final(ReducerConditional::select(m_functor, m_reducer), - m_result_ptr); - } - } else { - ThreadsExec::resize_scratch( - ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)), - Policy::member_type::team_reduce_size() + m_shared); - - ThreadsExec::start(&ParallelReduce::exec, this); - - ThreadsExec::fence(); - - if (m_result_ptr) { - const pointer_type data = - (pointer_type)ThreadsExec::root_reduce_scratch(); - - const unsigned n = ValueTraits::value_count( - ReducerConditional::select(m_functor, m_reducer)); - for (unsigned i = 0; i < n; ++i) { - m_result_ptr[i] = data[i]; - } - } - } - } - - template <typename Policy> - Policy fix_policy(Policy policy) { - if (policy.impl_vector_length() < 0) { - policy.impl_set_vector_length(1); - } - if (policy.team_size() < 0) { - policy.impl_set_team_size(policy.team_size_recommended( - m_functor, m_reducer, ParallelReduceTag{})); - } - return policy; - } - - template <class ViewType> - inline ParallelReduce( - const FunctorType &arg_functor, const Policy &arg_policy, - const ViewType &arg_result, - typename std::enable_if<Kokkos::is_view<ViewType>::value && - !Kokkos::is_reducer_type<ReducerType>::value, - void *>::type = nullptr) - : m_functor(arg_functor), - m_policy(fix_policy(arg_policy)), - m_reducer(InvalidType()), - m_result_ptr(arg_result.data()), - m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) + - FunctorTeamShmemSize<FunctorType>::value( - arg_functor, m_policy.team_size())) {} - - inline ParallelReduce(const FunctorType &arg_functor, Policy arg_policy, - const ReducerType &reducer) - : m_functor(arg_functor), - m_policy(fix_policy(arg_policy)), - m_reducer(reducer), - m_result_ptr(reducer.view().data()), - m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) + - FunctorTeamShmemSize<FunctorType>::value( - arg_functor, m_policy.team_size())) { - /*static_assert( std::is_same< typename ViewType::memory_space - , Kokkos::HostSpace >::value - , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" - );*/ - } -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -/* ParallelScan with Kokkos::Threads and RangePolicy */ - -template <class FunctorType, class... Traits> -class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, - Kokkos::Threads> { - private: - using Policy = Kokkos::RangePolicy<Traits...>; - using WorkRange = typename Policy::WorkRange; - using WorkTag = typename Policy::work_tag; - using Member = typename Policy::member_type; - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - - const FunctorType m_functor; - const Policy m_policy; - - template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const FunctorType &functor, const Member &ibeg, - const Member &iend, reference_type update, const bool final) { -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - functor(i, update, final); - } - } - - template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const FunctorType &functor, const Member &ibeg, - const Member &iend, reference_type update, const bool final) { - const TagType t{}; -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - functor(t, i, update, final); - } - } - - static void exec(ThreadsExec &exec, const void *arg) { - const ParallelScan &self = *((const ParallelScan *)arg); - - const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - reference_type update = - ValueInit::init(self.m_functor, exec.reduce_memory()); - - ParallelScan::template exec_range<WorkTag>(self.m_functor, range.begin(), - range.end(), update, false); - - // exec.template scan_large<FunctorType,WorkTag>( self.m_functor ); - exec.template scan_small<FunctorType, WorkTag>(self.m_functor); - - ParallelScan::template exec_range<WorkTag>(self.m_functor, range.begin(), - range.end(), update, true); - - exec.fan_in(); - } - - public: - inline void execute() const { - ThreadsExec::resize_scratch(2 * ValueTraits::value_size(m_functor), 0); - ThreadsExec::start(&ParallelScan::exec, this); - ThreadsExec::fence(); - } - - ParallelScan(const FunctorType &arg_functor, const Policy &arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} -}; - -template <class FunctorType, class ReturnType, class... Traits> -class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, - ReturnType, Kokkos::Threads> { - private: - using Policy = Kokkos::RangePolicy<Traits...>; - using WorkRange = typename Policy::WorkRange; - using WorkTag = typename Policy::work_tag; - using Member = typename Policy::member_type; - using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>; - using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; - - using pointer_type = typename ValueTraits::pointer_type; - using reference_type = typename ValueTraits::reference_type; - - const FunctorType m_functor; - const Policy m_policy; - ReturnType &m_returnvalue; - - template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - exec_range(const FunctorType &functor, const Member &ibeg, - const Member &iend, reference_type update, const bool final) { -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - functor(i, update, final); - } - } - - template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - exec_range(const FunctorType &functor, const Member &ibeg, - const Member &iend, reference_type update, const bool final) { - const TagType t{}; -#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ - defined(KOKKOS_ENABLE_PRAGMA_IVDEP) -#pragma ivdep -#endif - for (Member i = ibeg; i < iend; ++i) { - functor(t, i, update, final); - } - } - - static void exec(ThreadsExec &exec, const void *arg) { - const ParallelScanWithTotal &self = *((const ParallelScanWithTotal *)arg); - - const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); - - reference_type update = - ValueInit::init(self.m_functor, exec.reduce_memory()); - - ParallelScanWithTotal::template exec_range<WorkTag>( - self.m_functor, range.begin(), range.end(), update, false); - - // exec.template scan_large<FunctorType,WorkTag>( self.m_functor ); - exec.template scan_small<FunctorType, WorkTag>(self.m_functor); - - ParallelScanWithTotal::template exec_range<WorkTag>( - self.m_functor, range.begin(), range.end(), update, true); - - exec.fan_in(); - - if (exec.pool_rank() == exec.pool_size() - 1) { - self.m_returnvalue = update; - } - } - - public: - inline void execute() const { - ThreadsExec::resize_scratch(2 * ValueTraits::value_size(m_functor), 0); - ThreadsExec::start(&ParallelScanWithTotal::exec, this); - ThreadsExec::fence(); - } - - ParallelScanWithTotal(const FunctorType &arg_functor, - const Policy &arg_policy, ReturnType &arg_returnvalue) - : m_functor(arg_functor), - m_policy(arg_policy), - m_returnvalue(arg_returnvalue) {} -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif -#endif /* #define KOKKOS_THREADS_PARALLEL_HPP */ diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_MDRange.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_MDRange.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6d1a38da1da1ae79334ff2fbd248c86bbed83437 --- /dev/null +++ b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_MDRange.hpp @@ -0,0 +1,322 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADS_PARALLEL_MDRANGE_HPP +#define KOKKOS_THREADS_PARALLEL_MDRANGE_HPP + +#include <Kokkos_Parallel.hpp> + +#include <KokkosExp_MDRangePolicy.hpp> + +namespace Kokkos { +namespace Impl { + +template <class FunctorType, class... Traits> +class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, + Kokkos::Threads> { + private: + using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; + using Policy = typename MDRangePolicy::impl_range_policy; + + using WorkTag = typename MDRangePolicy::work_tag; + + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + + using iterate_type = typename Kokkos::Impl::HostIterateTile< + MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void>; + + const FunctorType m_functor; + const MDRangePolicy m_mdr_policy; + const Policy m_policy; // construct as RangePolicy( 0, num_tiles + // ).set_chunk_size(1) in ctor + + inline static void exec_range(const MDRangePolicy &mdr_policy, + const FunctorType &functor, const Member ibeg, + const Member iend) { +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + iterate_type(mdr_policy, functor)(i); + } + } + + static void exec(ThreadsExec &exec, const void *arg) { + exec_schedule<typename Policy::schedule_type::type>(exec, arg); + } + + template <class Schedule> + static std::enable_if_t<std::is_same<Schedule, Kokkos::Static>::value> + exec_schedule(ThreadsExec &exec, const void *arg) { + const ParallelFor &self = *((const ParallelFor *)arg); + + WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + ParallelFor::exec_range(self.m_mdr_policy, self.m_functor, range.begin(), + range.end()); + + exec.fan_in(); + } + + template <class Schedule> + static std::enable_if_t<std::is_same<Schedule, Kokkos::Dynamic>::value> + exec_schedule(ThreadsExec &exec, const void *arg) { + const ParallelFor &self = *((const ParallelFor *)arg); + + WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + exec.set_work_range(range.begin(), range.end(), self.m_policy.chunk_size()); + exec.reset_steal_target(); + exec.barrier(); + + long work_index = exec.get_work_index(); + + while (work_index != -1) { + const Member begin = + static_cast<Member>(work_index) * self.m_policy.chunk_size(); + const Member end = + begin + self.m_policy.chunk_size() < self.m_policy.end() + ? begin + self.m_policy.chunk_size() + : self.m_policy.end(); + + ParallelFor::exec_range(self.m_mdr_policy, self.m_functor, begin, end); + work_index = exec.get_work_index(); + } + + exec.fan_in(); + } + + public: + inline void execute() const { + ThreadsExec::start(&ParallelFor::exec, this); + ThreadsExec::fence(); + } + + ParallelFor(const FunctorType &arg_functor, const MDRangePolicy &arg_policy) + : m_functor(arg_functor), + m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} + + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy &, const Functor &) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } +}; + +template <class FunctorType, class ReducerType, class... Traits> +class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, + Kokkos::Threads> { + private: + using MDRangePolicy = Kokkos::MDRangePolicy<Traits...>; + using Policy = typename MDRangePolicy::impl_range_policy; + + using WorkTag = typename MDRangePolicy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>::type; + + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + MDRangePolicy, ReducerTypeFwd>; + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; + + using iterate_type = + typename Kokkos::Impl::HostIterateTile<MDRangePolicy, FunctorType, + WorkTag, reference_type>; + + const FunctorType m_functor; + const MDRangePolicy m_mdr_policy; + const Policy m_policy; // construct as RangePolicy( 0, num_tiles + // ).set_chunk_size(1) in ctor + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + inline static void exec_range(const MDRangePolicy &mdr_policy, + const FunctorType &functor, const Member &ibeg, + const Member &iend, reference_type update) { +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + iterate_type(mdr_policy, functor, update)(i); + } + } + + static void exec(ThreadsExec &exec, const void *arg) { + exec_schedule<typename Policy::schedule_type::type>(exec, arg); + } + + template <class Schedule> + static std::enable_if_t<std::is_same<Schedule, Kokkos::Static>::value> + exec_schedule(ThreadsExec &exec, const void *arg) { + const ParallelReduce &self = *((const ParallelReduce *)arg); + const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + typename Analysis::Reducer reducer( + &ReducerConditional::select(self.m_functor, self.m_reducer)); + + ParallelReduce::exec_range( + self.m_mdr_policy, self.m_functor, range.begin(), range.end(), + reducer.init(static_cast<pointer_type>(exec.reduce_memory()))); + + exec.fan_in_reduce(reducer); + } + + template <class Schedule> + static std::enable_if_t<std::is_same<Schedule, Kokkos::Dynamic>::value> + exec_schedule(ThreadsExec &exec, const void *arg) { + const ParallelReduce &self = *((const ParallelReduce *)arg); + const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + exec.set_work_range(range.begin(), range.end(), self.m_policy.chunk_size()); + exec.reset_steal_target(); + exec.barrier(); + + long work_index = exec.get_work_index(); + typename Analysis::Reducer reducer( + &ReducerConditional::select(self.m_functor, self.m_reducer)); + + reference_type update = + reducer.init(static_cast<pointer_type>(exec.reduce_memory())); + while (work_index != -1) { + const Member begin = + static_cast<Member>(work_index) * self.m_policy.chunk_size(); + const Member end = + begin + self.m_policy.chunk_size() < self.m_policy.end() + ? begin + self.m_policy.chunk_size() + : self.m_policy.end(); + ParallelReduce::exec_range(self.m_mdr_policy, self.m_functor, begin, end, + update); + work_index = exec.get_work_index(); + } + + exec.fan_in_reduce(reducer); + } + + public: + inline void execute() const { + ThreadsExec::resize_scratch( + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)), + 0); + + ThreadsExec::start(&ParallelReduce::exec, this); + + ThreadsExec::fence(); + + if (m_result_ptr) { + const pointer_type data = + (pointer_type)ThreadsExec::root_reduce_scratch(); + + const unsigned n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + for (unsigned i = 0; i < n; ++i) { + m_result_ptr[i] = data[i]; + } + } + } + + template <class HostViewType> + ParallelReduce(const FunctorType &arg_functor, + const MDRangePolicy &arg_policy, + const HostViewType &arg_result_view, + std::enable_if_t<Kokkos::is_view<HostViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void *> = nullptr) + : m_functor(arg_functor), + m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_reducer(InvalidType()), + m_result_ptr(arg_result_view.data()) { + static_assert(Kokkos::is_view<HostViewType>::value, + "Kokkos::Threads reduce result must be a View"); + + static_assert( + std::is_same<typename HostViewType::memory_space, HostSpace>::value, + "Kokkos::Threads reduce result must be a View in HostSpace"); + } + + inline ParallelReduce(const FunctorType &arg_functor, + MDRangePolicy arg_policy, const ReducerType &reducer) + : m_functor(arg_functor), + m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_reducer(reducer), + m_result_ptr(reducer.view().data()) { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" + );*/ + } + + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy &, const Functor &) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_Range.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_Range.hpp new file mode 100644 index 0000000000000000000000000000000000000000..971a0bb9c2bd2d1f4005fddf58bc47041df07a93 --- /dev/null +++ b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_Range.hpp @@ -0,0 +1,485 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADS_PARALLEL_RANGE_HPP +#define KOKKOS_THREADS_PARALLEL_RANGE_HPP + +#include <Kokkos_Parallel.hpp> + +namespace Kokkos { +namespace Impl { + +template <class FunctorType, class... Traits> +class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, + Kokkos::Threads> { + private: + using Policy = Kokkos::RangePolicy<Traits...>; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + + const FunctorType m_functor; + const Policy m_policy; + + template <class TagType> + inline static std::enable_if_t<std::is_void<TagType>::value> exec_range( + const FunctorType &functor, const Member ibeg, const Member iend) { +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + functor(i); + } + } + + template <class TagType> + inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const FunctorType &functor, const Member ibeg, const Member iend) { + const TagType t{}; +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + functor(t, i); + } + } + + static void exec(ThreadsExec &exec, const void *arg) { + exec_schedule<typename Policy::schedule_type::type>(exec, arg); + } + + template <class Schedule> + static std::enable_if_t<std::is_same<Schedule, Kokkos::Static>::value> + exec_schedule(ThreadsExec &exec, const void *arg) { + const ParallelFor &self = *((const ParallelFor *)arg); + + WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + ParallelFor::template exec_range<WorkTag>(self.m_functor, range.begin(), + range.end()); + + exec.fan_in(); + } + + template <class Schedule> + static std::enable_if_t<std::is_same<Schedule, Kokkos::Dynamic>::value> + exec_schedule(ThreadsExec &exec, const void *arg) { + const ParallelFor &self = *((const ParallelFor *)arg); + + WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + exec.set_work_range(range.begin() - self.m_policy.begin(), + range.end() - self.m_policy.begin(), + self.m_policy.chunk_size()); + exec.reset_steal_target(); + exec.barrier(); + + long work_index = exec.get_work_index(); + + while (work_index != -1) { + const Member begin = + static_cast<Member>(work_index) * self.m_policy.chunk_size() + + self.m_policy.begin(); + const Member end = + begin + self.m_policy.chunk_size() < self.m_policy.end() + ? begin + self.m_policy.chunk_size() + : self.m_policy.end(); + ParallelFor::template exec_range<WorkTag>(self.m_functor, begin, end); + work_index = exec.get_work_index(); + } + + exec.fan_in(); + } + + public: + inline void execute() const { + ThreadsExec::start(&ParallelFor::exec, this); + ThreadsExec::fence(); + } + + ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +template <class FunctorType, class ReducerType, class... Traits> +class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, + Kokkos::Threads> { + private: + using Policy = Kokkos::RangePolicy<Traits...>; + + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>::type; + + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + Policy, ReducerTypeFwd>; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + template <class TagType> + inline static std::enable_if_t<std::is_void<TagType>::value> exec_range( + const FunctorType &functor, const Member &ibeg, const Member &iend, + reference_type update) { +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + functor(i, update); + } + } + + template <class TagType> + inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const FunctorType &functor, const Member &ibeg, const Member &iend, + reference_type update) { + const TagType t{}; +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + functor(t, i, update); + } + } + + static void exec(ThreadsExec &exec, const void *arg) { + exec_schedule<typename Policy::schedule_type::type>(exec, arg); + } + + template <class Schedule> + static std::enable_if_t<std::is_same<Schedule, Kokkos::Static>::value> + exec_schedule(ThreadsExec &exec, const void *arg) { + const ParallelReduce &self = *((const ParallelReduce *)arg); + const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + typename Analysis::Reducer reducer( + &ReducerConditional::select(self.m_functor, self.m_reducer)); + + ParallelReduce::template exec_range<WorkTag>( + self.m_functor, range.begin(), range.end(), + reducer.init(static_cast<pointer_type>(exec.reduce_memory()))); + + exec.fan_in_reduce(reducer); + } + + template <class Schedule> + static std::enable_if_t<std::is_same<Schedule, Kokkos::Dynamic>::value> + exec_schedule(ThreadsExec &exec, const void *arg) { + const ParallelReduce &self = *((const ParallelReduce *)arg); + const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + exec.set_work_range(range.begin() - self.m_policy.begin(), + range.end() - self.m_policy.begin(), + self.m_policy.chunk_size()); + exec.reset_steal_target(); + exec.barrier(); + + long work_index = exec.get_work_index(); + typename Analysis::Reducer reducer( + &ReducerConditional::select(self.m_functor, self.m_reducer)); + + reference_type update = + reducer.init(static_cast<pointer_type>(exec.reduce_memory())); + while (work_index != -1) { + const Member begin = + static_cast<Member>(work_index) * self.m_policy.chunk_size() + + self.m_policy.begin(); + const Member end = + begin + self.m_policy.chunk_size() < self.m_policy.end() + ? begin + self.m_policy.chunk_size() + : self.m_policy.end(); + ParallelReduce::template exec_range<WorkTag>(self.m_functor, begin, end, + update); + work_index = exec.get_work_index(); + } + + exec.fan_in_reduce(reducer); + } + + public: + inline void execute() const { + if (m_policy.end() <= m_policy.begin()) { + if (m_result_ptr) { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + final_reducer.init(m_result_ptr); + final_reducer.final(m_result_ptr); + } + } else { + ThreadsExec::resize_scratch( + Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)), + 0); + + ThreadsExec::start(&ParallelReduce::exec, this); + + ThreadsExec::fence(); + + if (m_result_ptr) { + const pointer_type data = + (pointer_type)ThreadsExec::root_reduce_scratch(); + + const unsigned n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + for (unsigned i = 0; i < n; ++i) { + m_result_ptr[i] = data[i]; + } + } + } + } + + template <class HostViewType> + ParallelReduce(const FunctorType &arg_functor, const Policy &arg_policy, + const HostViewType &arg_result_view, + std::enable_if_t<Kokkos::is_view<HostViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void *> = nullptr) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result_view.data()) { + static_assert(Kokkos::is_view<HostViewType>::value, + "Kokkos::Threads reduce result must be a View"); + + static_assert( + std::is_same<typename HostViewType::memory_space, HostSpace>::value, + "Kokkos::Threads reduce result must be a View in HostSpace"); + } + + inline ParallelReduce(const FunctorType &arg_functor, Policy arg_policy, + const ReducerType &reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()) { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" + );*/ + } +}; + +template <class FunctorType, class... Traits> +class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, + Kokkos::Threads> { + private: + using Policy = Kokkos::RangePolicy<Traits...>; + using WorkRange = typename Policy::WorkRange; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + Policy, FunctorType>; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + + template <class TagType> + inline static std::enable_if_t<std::is_void<TagType>::value> exec_range( + const FunctorType &functor, const Member &ibeg, const Member &iend, + reference_type update, const bool final) { +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + functor(i, update, final); + } + } + + template <class TagType> + inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const FunctorType &functor, const Member &ibeg, const Member &iend, + reference_type update, const bool final) { + const TagType t{}; +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + functor(t, i, update, final); + } + } + + static void exec(ThreadsExec &exec, const void *arg) { + const ParallelScan &self = *((const ParallelScan *)arg); + + const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + typename Analysis::Reducer final_reducer(&self.m_functor); + + reference_type update = + final_reducer.init(static_cast<pointer_type>(exec.reduce_memory())); + + ParallelScan::template exec_range<WorkTag>(self.m_functor, range.begin(), + range.end(), update, false); + + // exec.template scan_large( final_reducer ); + exec.scan_small(final_reducer); + + ParallelScan::template exec_range<WorkTag>(self.m_functor, range.begin(), + range.end(), update, true); + + exec.fan_in(); + } + + public: + inline void execute() const { + ThreadsExec::resize_scratch(2 * Analysis::value_size(m_functor), 0); + ThreadsExec::start(&ParallelScan::exec, this); + ThreadsExec::fence(); + } + + ParallelScan(const FunctorType &arg_functor, const Policy &arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +template <class FunctorType, class ReturnType, class... Traits> +class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, + ReturnType, Kokkos::Threads> { + private: + using Policy = Kokkos::RangePolicy<Traits...>; + using WorkRange = typename Policy::WorkRange; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::SCAN, + Policy, FunctorType>; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + ReturnType &m_returnvalue; + + template <class TagType> + inline static std::enable_if_t<std::is_void<TagType>::value> exec_range( + const FunctorType &functor, const Member &ibeg, const Member &iend, + reference_type update, const bool final) { +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + functor(i, update, final); + } + } + + template <class TagType> + inline static std::enable_if_t<!std::is_void<TagType>::value> exec_range( + const FunctorType &functor, const Member &ibeg, const Member &iend, + reference_type update, const bool final) { + const TagType t{}; +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && \ + defined(KOKKOS_ENABLE_PRAGMA_IVDEP) +#pragma ivdep +#endif + for (Member i = ibeg; i < iend; ++i) { + functor(t, i, update, final); + } + } + + static void exec(ThreadsExec &exec, const void *arg) { + const ParallelScanWithTotal &self = *((const ParallelScanWithTotal *)arg); + + const WorkRange range(self.m_policy, exec.pool_rank(), exec.pool_size()); + + typename Analysis::Reducer final_reducer(&self.m_functor); + + reference_type update = + final_reducer.init(static_cast<pointer_type>(exec.reduce_memory())); + + ParallelScanWithTotal::template exec_range<WorkTag>( + self.m_functor, range.begin(), range.end(), update, false); + + // exec.template scan_large(final_reducer); + exec.scan_small(final_reducer); + + ParallelScanWithTotal::template exec_range<WorkTag>( + self.m_functor, range.begin(), range.end(), update, true); + + exec.fan_in(); + + if (exec.pool_rank() == exec.pool_size() - 1) { + self.m_returnvalue = update; + } + } + + public: + inline void execute() const { + ThreadsExec::resize_scratch(2 * Analysis::value_size(m_functor), 0); + ThreadsExec::start(&ParallelScanWithTotal::exec, this); + ThreadsExec::fence(); + } + + ParallelScanWithTotal(const FunctorType &arg_functor, + const Policy &arg_policy, ReturnType &arg_returnvalue) + : m_functor(arg_functor), + m_policy(arg_policy), + m_returnvalue(arg_returnvalue) {} +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_Team.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_Team.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bdda110d37cef39df4d9aa977d9acf7b25fc2fb0 --- /dev/null +++ b/packages/kokkos/core/src/Threads/Kokkos_Threads_Parallel_Team.hpp @@ -0,0 +1,279 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADS_PARALLEL_TEAM_HPP +#define KOKKOS_THREADS_PARALLEL_TEAM_HPP + +#include <Kokkos_Parallel.hpp> + +namespace Kokkos { +namespace Impl { + +template <class FunctorType, class... Properties> +class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, + Kokkos::Threads> { + private: + using Policy = + Kokkos::Impl::TeamPolicyInternal<Kokkos::Threads, Properties...>; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + + const FunctorType m_functor; + const Policy m_policy; + const size_t m_shared; + + template <class TagType, class Schedule> + inline static std::enable_if_t<std::is_void<TagType>::value && + std::is_same<Schedule, Kokkos::Static>::value> + exec_team(const FunctorType &functor, Member member) { + for (; member.valid_static(); member.next_static()) { + functor(member); + } + } + + template <class TagType, class Schedule> + inline static std::enable_if_t<!std::is_void<TagType>::value && + std::is_same<Schedule, Kokkos::Static>::value> + exec_team(const FunctorType &functor, Member member) { + const TagType t{}; + for (; member.valid_static(); member.next_static()) { + functor(t, member); + } + } + + template <class TagType, class Schedule> + inline static std::enable_if_t<std::is_void<TagType>::value && + std::is_same<Schedule, Kokkos::Dynamic>::value> + exec_team(const FunctorType &functor, Member member) { + for (; member.valid_dynamic(); member.next_dynamic()) { + functor(member); + } + } + + template <class TagType, class Schedule> + inline static std::enable_if_t<!std::is_void<TagType>::value && + std::is_same<Schedule, Kokkos::Dynamic>::value> + exec_team(const FunctorType &functor, Member member) { + const TagType t{}; + for (; member.valid_dynamic(); member.next_dynamic()) { + functor(t, member); + } + } + + static void exec(ThreadsExec &exec, const void *arg) { + const ParallelFor &self = *((const ParallelFor *)arg); + + ParallelFor::exec_team<WorkTag, typename Policy::schedule_type::type>( + self.m_functor, Member(&exec, self.m_policy, self.m_shared)); + + exec.barrier(); + exec.fan_in(); + } + template <typename Policy> + Policy fix_policy(Policy policy) { + if (policy.impl_vector_length() < 0) { + policy.impl_set_vector_length(1); + } + if (policy.team_size() < 0) { + policy.impl_set_team_size( + policy.team_size_recommended(m_functor, ParallelForTag{})); + } + return policy; + } + + public: + inline void execute() const { + ThreadsExec::resize_scratch( + 0, Policy::member_type::team_reduce_size() + m_shared); + + ThreadsExec::start(&ParallelFor::exec, this); + + ThreadsExec::fence(); + } + + ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) + : m_functor(arg_functor), + m_policy(fix_policy(arg_policy)), + m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) + + FunctorTeamShmemSize<FunctorType>::value( + arg_functor, m_policy.team_size())) {} +}; + +template <class FunctorType, class ReducerType, class... Properties> +class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, + ReducerType, Kokkos::Threads> { + private: + using Policy = + Kokkos::Impl::TeamPolicyInternal<Kokkos::Threads, Properties...>; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>::type; + + using Analysis = Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, + Policy, ReducerTypeFwd>; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + const size_t m_shared; + + template <class TagType> + inline static std::enable_if_t<std::is_void<TagType>::value> exec_team( + const FunctorType &functor, Member member, reference_type update) { + for (; member.valid_static(); member.next_static()) { + functor(member, update); + } + } + + template <class TagType> + inline static std::enable_if_t<!std::is_void<TagType>::value> exec_team( + const FunctorType &functor, Member member, reference_type update) { + const TagType t{}; + for (; member.valid_static(); member.next_static()) { + functor(t, member, update); + } + } + + static void exec(ThreadsExec &exec, const void *arg) { + const ParallelReduce &self = *((const ParallelReduce *)arg); + + typename Analysis::Reducer reducer( + &ReducerConditional::select(self.m_functor, self.m_reducer)); + + ParallelReduce::template exec_team<WorkTag>( + self.m_functor, Member(&exec, self.m_policy, self.m_shared), + reducer.init(static_cast<pointer_type>(exec.reduce_memory()))); + + exec.fan_in_reduce(reducer); + } + + public: + inline void execute() const { + if (m_policy.league_size() * m_policy.team_size() == 0) { + if (m_result_ptr) { + typename Analysis::Reducer final_reducer( + &ReducerConditional::select(m_functor, m_reducer)); + final_reducer.init(m_result_ptr); + final_reducer.final(m_result_ptr); + } + } else { + ThreadsExec::resize_scratch( + Analysis::value_size( + ReducerConditional::select(m_functor, m_reducer)), + Policy::member_type::team_reduce_size() + m_shared); + + ThreadsExec::start(&ParallelReduce::exec, this); + + ThreadsExec::fence(); + + if (m_result_ptr) { + const pointer_type data = + (pointer_type)ThreadsExec::root_reduce_scratch(); + + const unsigned n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + for (unsigned i = 0; i < n; ++i) { + m_result_ptr[i] = data[i]; + } + } + } + } + + template <typename Policy> + Policy fix_policy(Policy policy) { + if (policy.impl_vector_length() < 0) { + policy.impl_set_vector_length(1); + } + if (policy.team_size() < 0) { + policy.impl_set_team_size(policy.team_size_recommended( + m_functor, m_reducer, ParallelReduceTag{})); + } + return policy; + } + + template <class ViewType> + inline ParallelReduce( + const FunctorType &arg_functor, const Policy &arg_policy, + const ViewType &arg_result, + std::enable_if_t<Kokkos::is_view<ViewType>::value && + !Kokkos::is_reducer<ReducerType>::value, + void *> = nullptr) + : m_functor(arg_functor), + m_policy(fix_policy(arg_policy)), + m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) + + FunctorTeamShmemSize<FunctorType>::value( + arg_functor, m_policy.team_size())) {} + + inline ParallelReduce(const FunctorType &arg_functor, Policy arg_policy, + const ReducerType &reducer) + : m_functor(arg_functor), + m_policy(fix_policy(arg_policy)), + m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_shared(m_policy.scratch_size(0) + m_policy.scratch_size(1) + + FunctorTeamShmemSize<FunctorType>::value( + arg_functor, m_policy.team_size())) { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" + );*/ + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_UniqueToken.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_UniqueToken.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f9901198f7db469b17514dc89283fb8fbcc6b21e --- /dev/null +++ b/packages/kokkos/core/src/Threads/Kokkos_Threads_UniqueToken.hpp @@ -0,0 +1,157 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADS_UNIQUETOKEN_HPP +#define KOKKOS_THREADS_UNIQUETOKEN_HPP + +#include <Kokkos_UniqueToken.hpp> + +namespace Kokkos { +namespace Experimental { + +template <> +class UniqueToken<Threads, UniqueTokenScope::Instance> { + private: + using buffer_type = Kokkos::View<uint32_t *, Kokkos::HostSpace>; + int m_count; + buffer_type m_buffer_view; + uint32_t volatile *m_buffer; + + public: + using execution_space = Threads; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken(execution_space const & = execution_space()) noexcept + : m_count(::Kokkos::Threads::impl_thread_pool_size()), + m_buffer_view(buffer_type()), + m_buffer(nullptr) {} + + UniqueToken(size_type max_size, execution_space const & = execution_space()) + : m_count(max_size > ::Kokkos::Threads::impl_thread_pool_size() + ? ::Kokkos::Threads::impl_thread_pool_size() + : max_size), + m_buffer_view( + max_size > ::Kokkos::Threads::impl_thread_pool_size() + ? buffer_type() + : buffer_type("UniqueToken::m_buffer_view", + ::Kokkos::Impl::concurrent_bitset::buffer_bound( + m_count))), + m_buffer(m_buffer_view.data()) {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int size() const noexcept { return m_count; } + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int acquire() const noexcept { + KOKKOS_IF_ON_HOST(( + if (m_buffer == nullptr) { + return Threads::impl_thread_pool_rank(); + } else { + const ::Kokkos::pair<int, int> result = + ::Kokkos::Impl::concurrent_bitset::acquire_bounded( + m_buffer, m_count, ::Kokkos::Impl::clock_tic() % m_count); + + if (result.first < 0) { + ::Kokkos::abort( + "UniqueToken<Threads> failure to acquire tokens, no tokens " + "available"); + } + return result.first; + })) + + KOKKOS_IF_ON_DEVICE((return 0;)) + } + + /// \brief release a value acquired by generate + KOKKOS_INLINE_FUNCTION + void release(int i) const noexcept { + KOKKOS_IF_ON_HOST((if (m_buffer != nullptr) { + ::Kokkos::Impl::concurrent_bitset::release(m_buffer, i); + })) + + KOKKOS_IF_ON_DEVICE(((void)i;)) + } +}; + +template <> +class UniqueToken<Threads, UniqueTokenScope::Global> { + public: + using execution_space = Threads; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken(execution_space const & = execution_space()) noexcept {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int size() const noexcept { + KOKKOS_IF_ON_HOST((return Threads::impl_thread_pool_size();)) + + KOKKOS_IF_ON_DEVICE((return 0;)) + } + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int acquire() const noexcept { + KOKKOS_IF_ON_HOST((return Threads::impl_thread_pool_rank();)) + + KOKKOS_IF_ON_DEVICE((return 0;)) + } + + /// \brief release a value acquired by generate + KOKKOS_INLINE_FUNCTION + void release(int) const noexcept {} +}; + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp b/packages/kokkos/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp index 401f3c0b1a08bb6fab05d2988e7da92460571905..5e8ac4604c044e5f37544bdf84ec7a0f155f43ce 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp +++ b/packages/kokkos/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp @@ -64,13 +64,13 @@ class ParallelFor<FunctorType, Kokkos::WorkGraphPolicy<Traits...>, FunctorType m_functor; template <class TagType> - typename std::enable_if<std::is_same<TagType, void>::value>::type exec_one( + std::enable_if_t<std::is_void<TagType>::value> exec_one( const std::int32_t w) const noexcept { m_functor(w); } template <class TagType> - typename std::enable_if<!std::is_same<TagType, void>::value>::type exec_one( + std::enable_if_t<!std::is_void<TagType>::value> exec_one( const std::int32_t w) const noexcept { const TagType t{}; m_functor(t, w); diff --git a/packages/kokkos/core/src/View/Hooks/Kokkos_ViewHooks.hpp b/packages/kokkos/core/src/View/Hooks/Kokkos_ViewHooks.hpp new file mode 100644 index 0000000000000000000000000000000000000000..77b2730b1b6b2db13397d58d16fd8101e88e2ef8 --- /dev/null +++ b/packages/kokkos/core/src/View/Hooks/Kokkos_ViewHooks.hpp @@ -0,0 +1,151 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_VIEWHOOKS_HPP +#define KOKKOS_EXPERIMENTAL_VIEWHOOKS_HPP + +namespace Kokkos { +namespace Experimental { + +namespace Impl { +template <typename View> +using copy_subscription_function_type = void (*)(View &, const View &); + +template <template <typename> class Invoker, typename... Subscribers> +struct invoke_subscriber_impl; + +template <template <typename> class Invoker> +struct invoke_subscriber_impl<Invoker> { + template <typename ViewType> + static void invoke(ViewType &, const ViewType &) {} +}; + +template <template <typename> class Invoker, typename Subscriber, + typename... RemSubscribers> +struct invoke_subscriber_impl<Invoker, Subscriber, RemSubscribers...> { + template <typename ViewType> + static void invoke(ViewType &self, const ViewType &other) { + Invoker<Subscriber>::call(self, other); + invoke_subscriber_impl<Invoker, RemSubscribers...>::invoke(self, other); + } +}; + +template <typename Subscriber> +struct copy_constructor_invoker { + template <typename View> + static void call(View &self, const View &other) { + Subscriber::copy_constructed(self, other); + } +}; + +template <typename Subscriber> +struct move_constructor_invoker { + template <typename View> + static void call(View &self, const View &other) { + Subscriber::move_constructed(self, other); + } +}; + +template <typename Subscriber> +struct copy_assignment_operator_invoker { + template <typename View> + static void call(View &self, const View &other) { + Subscriber::copy_assigned(self, other); + } +}; + +template <typename Subscriber> +struct move_assignment_operator_invoker { + template <typename View> + static void call(View &self, const View &other) { + Subscriber::move_assigned(self, other); + } +}; +} // namespace Impl + +struct EmptyViewHooks { + using hooks_policy = EmptyViewHooks; + + template <typename View> + static void copy_construct(View &, const View &) {} + template <typename View> + static void copy_assign(View &, const View &) {} + template <typename View> + static void move_construct(View &, const View &) {} + template <typename View> + static void move_assign(View &, const View &) {} +}; + +template <class... Subscribers> +struct SubscribableViewHooks { + using hooks_policy = SubscribableViewHooks<Subscribers...>; + + template <typename View> + static void copy_construct(View &self, const View &other) { + Impl::invoke_subscriber_impl<Impl::copy_constructor_invoker, + Subscribers...>::invoke(self, other); + } + template <typename View> + static void copy_assign(View &self, const View &other) { + Impl::invoke_subscriber_impl<Impl::copy_assignment_operator_invoker, + Subscribers...>::invoke(self, other); + } + template <typename View> + static void move_construct(View &self, const View &other) { + Impl::invoke_subscriber_impl<Impl::move_constructor_invoker, + Subscribers...>::invoke(self, other); + } + template <typename View> + static void move_assign(View &self, const View &other) { + Impl::invoke_subscriber_impl<Impl::move_assignment_operator_invoker, + Subscribers...>::invoke(self, other); + } +}; + +using DefaultViewHooks = EmptyViewHooks; + +} // namespace Experimental +} // namespace Kokkos + +#endif // KOKKOS_EXPERIMENTAL_VIEWHOOKS_HPP diff --git a/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp b/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp index 2a06cb65e56cc800867d88663ac7e1ad9e58d801..aedb8d035cd8945fffad4332fd0bea8874a61b0a 100644 --- a/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp +++ b/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp @@ -49,12 +49,13 @@ #include <Kokkos_Cuda.hpp> #include <Cuda/Kokkos_Cuda_Half_Impl_Type.hpp> #include <Cuda/Kokkos_Cuda_Half_Conversion.hpp> -#include <Cuda/Kokkos_Cuda_Parallel.hpp> +#include <Cuda/Kokkos_Cuda_Parallel_MDRange.hpp> +#include <Cuda/Kokkos_Cuda_Parallel_Range.hpp> +#include <Cuda/Kokkos_Cuda_Parallel_Team.hpp> #include <Cuda/Kokkos_Cuda_KernelLaunch.hpp> #include <Cuda/Kokkos_Cuda_Instance.hpp> #include <Cuda/Kokkos_Cuda_View.hpp> #include <Cuda/Kokkos_Cuda_Team.hpp> -#include <Cuda/Kokkos_Cuda_Parallel.hpp> #include <Cuda/Kokkos_Cuda_Task.hpp> #include <Cuda/Kokkos_Cuda_MDRangePolicy.hpp> #include <Cuda/Kokkos_Cuda_UniqueToken.hpp> diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingSequenceOperations.hpp b/packages/kokkos/core/src/decl/Kokkos_Declare_OPENACC.hpp similarity index 89% rename from packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingSequenceOperations.hpp rename to packages/kokkos/core/src/decl/Kokkos_Declare_OPENACC.hpp index dacb82bfc2a6028157c877de9c42c3efd2ca5e85..5c09b7a3b65b3a65b9331476fec317108a739827 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingSequenceOperations.hpp +++ b/packages/kokkos/core/src/decl/Kokkos_Declare_OPENACC.hpp @@ -42,10 +42,13 @@ //@HEADER */ -#ifndef KOKKOS_STD_MOD_SEQ_OPS_INC_ALL_HPP -#define KOKKOS_STD_MOD_SEQ_OPS_INC_ALL_HPP +#ifndef KOKKOS_DECLARE_OPENACC_HPP +#define KOKKOS_DECLARE_OPENACC_HPP -#include "./modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp" -#include "./modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp" +#if defined(KOKKOS_ENABLE_OPENACC) +#include <OpenACC/Kokkos_OpenACC.hpp> +#include <OpenACC/Kokkos_OpenACCSpace.hpp> +#include <OpenACC/Kokkos_OpenACC_Traits.hpp> +#endif #endif diff --git a/packages/kokkos/core/src/desul/atomics/CUDA.hpp b/packages/kokkos/core/src/desul/atomics/CUDA.hpp deleted file mode 100644 index be308a23228e3054b0d9d038c076657a2a478491..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/src/desul/atomics/CUDA.hpp +++ /dev/null @@ -1,541 +0,0 @@ -/* -Copyright (c) 2019, Lawrence Livermore National Security, LLC -and DESUL project contributors. See the COPYRIGHT file for details. -Source: https://github.com/desul/desul - -SPDX-License-Identifier: (BSD-3-Clause) -*/ -#ifndef DESUL_ATOMICS_CUDA_HPP_ -#define DESUL_ATOMICS_CUDA_HPP_ - -#ifdef DESUL_HAVE_CUDA_ATOMICS -// When building with Clang we need to include the device functions always since Clang -// must see a consistent overload set in both device and host compilation, but that -// means we need to know on the host what to make visible, i.e. we need a host side -// compile knowledge of architecture. -#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700)) || \ - (!defined(__NVCC__) && !defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA)) -#define DESUL_HAVE_CUDA_ATOMICS_ASM -#include <desul/atomics/cuda/CUDA_asm.hpp> -#endif - -#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700)) || \ - (!defined(__NVCC__) && !defined(DESUL_HAVE_CUDA_ATOMICS_ASM)) -namespace desul { -namespace Impl { -template<class T> -struct is_cuda_atomic_integer_type { - static constexpr bool value = std::is_same<T,int>::value || - std::is_same<T,unsigned int>::value || - std::is_same<T,unsigned long long int>::value; -}; - -template<class T> -struct is_cuda_atomic_add_type { - static constexpr bool value = is_cuda_atomic_integer_type<T>::value || -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 600) - std::is_same<T,double>::value || -#endif - std::is_same<T,float>::value; -}; - -template<class T> -struct is_cuda_atomic_sub_type { - static constexpr bool value = std::is_same<T,int>::value || - std::is_same<T,unsigned int>::value; -}; -} // Impl - -// Atomic Add -template<class T> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value,T>::type -atomic_fetch_add(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicAdd(dest,val); -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value,T>::type -atomic_fetch_add(T* dest, T val, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicAdd(dest,val); - __threadfence(); - return return_val; -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value,T>::type -atomic_fetch_add(T* dest, T val, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_add(dest,val,MemoryOrder(),MemoryScopeDevice()); -} - - -// Atomic Sub -template<class T> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value,T>::type -atomic_fetch_sub(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicSub(dest,val); -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value,T>::type -atomic_fetch_sub(T* dest, T val, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicSub(dest,val); - __threadfence(); - return return_val; -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value,T>::type -atomic_fetch_sub(T* dest, T val, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_sub(dest,val,MemoryOrder(),MemoryScopeDevice()); -} - -// Wrap around atomic add -__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest, - unsigned int val, - MemoryOrderRelaxed, - MemoryScopeDevice) { - return atomicInc(dest, val); -} - -template <typename MemoryOrder> -__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest, - unsigned int val, - MemoryOrder, - MemoryScopeDevice) { - __threadfence(); - unsigned int return_val = atomicInc(dest, val); - __threadfence(); - return return_val; -} - -template <typename MemoryOrder> -__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest, - unsigned int val, - MemoryOrder, - MemoryScopeCore) { - return atomic_fetch_inc_mod(dest, val, MemoryOrder(), MemoryScopeDevice()); -} - -// Wrap around atomic sub -__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest, - unsigned int val, - MemoryOrderRelaxed, - MemoryScopeDevice) { - return atomicDec(dest, val); -} - -template <typename MemoryOrder> -__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest, - unsigned int val, - MemoryOrder, - MemoryScopeDevice) { - __threadfence(); - unsigned int return_val = atomicDec(dest, val); - __threadfence(); - return return_val; -} - -template <typename MemoryOrder> -__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest, - unsigned int val, - MemoryOrder, - MemoryScopeCore) { - return atomic_fetch_dec_mod(dest, val, MemoryOrder(), MemoryScopeDevice()); -} - -// Atomic Inc -template <typename T> -__device__ inline - typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value, T>::type - atomic_fetch_inc(T* dest, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicAdd(dest, T(1)); -} - -template <typename T, typename MemoryOrder> -__device__ inline - typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value, T>::type - atomic_fetch_inc(T* dest, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicAdd(dest, T(1)); - __threadfence(); - - return return_val; -} - -template <typename T, typename MemoryOrder> -__device__ inline - typename std::enable_if<Impl::is_cuda_atomic_add_type<T>::value, T>::type - atomic_fetch_inc(T* dest, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_add(dest, T(1), MemoryOrder(), MemoryScopeDevice()); -} - -// Atomic Dec -template <typename T> -__device__ inline - typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value, T>::type - atomic_fetch_dec(T* dest, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicSub(dest, T(1)); -} - -template <typename T, typename MemoryOrder> -__device__ inline - typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value, T>::type - atomic_fetch_dec(T* dest, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicSub(dest, T(1)); - __threadfence(); - return return_val; -} - -template <typename T, typename MemoryOrder> -__device__ inline - typename std::enable_if<Impl::is_cuda_atomic_sub_type<T>::value, T>::type - atomic_fetch_dec(T* dest, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_sub(dest, T(1), MemoryOrder(), MemoryScopeDevice()); -} - -// Atomic Max -template<class T> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_max(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicMax(dest,val); -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_max(T* dest, T val, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicMax(dest,val); - __threadfence(); - return return_val; -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_max(T* dest, T val, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_max(dest,val,MemoryOrder(),MemoryScopeDevice()); -} - -// Atomic Min -template<class T> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_min(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicMin(dest,val); -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_min(T* dest, T val, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicMin(dest,val); - __threadfence(); - return return_val; -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_min(T* dest, T val, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_min(dest,val,MemoryOrder(),MemoryScopeDevice()); -} - -// Atomic And -template<class T> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_and(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicAnd(dest,val); -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_and(T* dest, T val, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicAnd(dest,val); - __threadfence(); - return return_val; -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_and(T* dest, T val, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_and(dest,val,MemoryOrder(),MemoryScopeDevice()); -} - -// Atomic XOR -template<class T> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_xor(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicXor(dest,val); -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_xor(T* dest, T val, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicXor(dest,val); - __threadfence(); - return return_val; -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_xor(T* dest, T val, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_xor(dest,val,MemoryOrder(),MemoryScopeDevice()); -} - -// Atomic OR -template<class T> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_or(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { - return atomicOr(dest,val); -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_or(T* dest, T val, MemoryOrder, MemoryScopeDevice) { - __threadfence(); - T return_val = atomicOr(dest,val); - __threadfence(); - return return_val; -} - -template<class T, class MemoryOrder> -__device__ inline -typename std::enable_if<Impl::is_cuda_atomic_integer_type<T>::value,T>::type -atomic_fetch_or(T* dest, T val, MemoryOrder, MemoryScopeCore) { - return atomic_fetch_or(dest,val,MemoryOrder(),MemoryScopeDevice()); -} -} // desul -#endif - -#if !defined(__NVCC__) -// Functions defined as device functions in CUDA which don't exist in the GCC overload set -namespace desul { - -#if defined(DESUL_HAVE_CUDA_ATOMICS_ASM) - #define DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(TYPE,ORDER,SCOPE) \ - inline void atomic_add(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ - (void) atomic_fetch_add(dest, val, order, scope); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(int32_t,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(long,MemoryOrderRelaxed,MemoryScopeDevice); // only for ASM? - DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(unsigned long long,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(float,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(double,MemoryOrderRelaxed,MemoryScopeDevice); - - #define DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(TYPE,ORDER,SCOPE) \ - inline void atomic_sub(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ - (void) atomic_fetch_sub(dest, val, order, scope); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(int32_t,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(long,MemoryOrderRelaxed,MemoryScopeDevice); // only for ASM? - DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(float,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(double,MemoryOrderRelaxed,MemoryScopeDevice); - - #define DESUL_IMPL_CUDA_HOST_ATOMIC_INC(TYPE,ORDER,SCOPE) \ - inline void atomic_inc(TYPE* const dest, ORDER order, SCOPE scope) { \ - (void) atomic_fetch_inc(dest, order, scope); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_INC(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice); // only for ASM? - - #define DESUL_IMPL_CUDA_HOST_ATOMIC_DEC(TYPE,ORDER,SCOPE) \ - inline void atomic_dec(TYPE* const dest, ORDER order, SCOPE scope) { \ - (void) atomic_fetch_dec(dest, order, scope); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_DEC(unsigned,MemoryOrderRelaxed,MemoryScopeDevice); // only for ASM? - -#endif // DESUL_HAVE_CUDA_ATOMICS_ASM - -#define DESUL_IMPL_CUDA_HOST_ATOMIC_INC_MOD(TYPE,ORDER,SCOPE) \ - inline TYPE atomic_fetch_inc_mod(TYPE* dest, TYPE val, ORDER order, SCOPE scope) { \ - using cas_t = typename Impl::atomic_compare_exchange_type<sizeof(TYPE)>::type; \ - cas_t oldval = reinterpret_cast<cas_t&>(*dest); \ - cas_t assume = oldval; \ - do { \ - assume = oldval; \ - TYPE newval = (reinterpret_cast<TYPE&>(assume) >= val) ? static_cast<TYPE>(0) : reinterpret_cast<TYPE&>(assume) + static_cast<TYPE>(1); \ - oldval = desul::atomic_compare_exchange(reinterpret_cast<cas_t*>(dest), assume, reinterpret_cast<cas_t&>(newval), order, scope); \ - } while (assume != oldval); \ - return reinterpret_cast<TYPE&>(oldval); \ -} -DESUL_IMPL_CUDA_HOST_ATOMIC_INC_MOD(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice); -#define DESUL_IMPL_CUDA_HOST_ATOMIC_DEC_MOD(TYPE,ORDER,SCOPE) \ - inline TYPE atomic_fetch_dec_mod(TYPE* dest, TYPE val, ORDER order, SCOPE scope) { \ - using cas_t = typename Impl::atomic_compare_exchange_type<sizeof(TYPE)>::type; \ - cas_t oldval = reinterpret_cast<cas_t&>(*dest); \ - cas_t assume = oldval; \ - do { \ - assume = oldval; \ - TYPE newval = ((reinterpret_cast<TYPE&>(assume) == static_cast<TYPE>(0)) | (reinterpret_cast<TYPE&>(assume) > val)) ? val : reinterpret_cast<TYPE&>(assume) - static_cast<TYPE>(1); \ - oldval = desul::atomic_compare_exchange(reinterpret_cast<cas_t*>(dest), assume, reinterpret_cast<cas_t&>(newval), order, scope); \ - } while (assume != oldval); \ - return reinterpret_cast<TYPE&>(oldval); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_DEC_MOD(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice); - - #define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_ADD(TYPE,ORDER,SCOPE) \ - inline TYPE atomic_fetch_add(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ - return Impl::atomic_fetch_oper(Impl::AddOper<TYPE, const TYPE>(),dest, val, order, scope); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_ADD(float,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_ADD(double,MemoryOrderRelaxed,MemoryScopeDevice); - - #define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_SUB(TYPE,ORDER,SCOPE) \ - inline TYPE atomic_fetch_sub(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ - return Impl::atomic_fetch_oper(Impl::SubOper<TYPE, const TYPE>(),dest, val, order, scope); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_SUB(float,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_SUB(double,MemoryOrderRelaxed,MemoryScopeDevice); - - - #define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(TYPE,ORDER,SCOPE) \ - inline TYPE atomic_fetch_max(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ - return Impl::atomic_fetch_oper(Impl::MaxOper<TYPE, const TYPE>(), dest, val, order, scope); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(int,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(long,MemoryOrderRelaxed,MemoryScopeDevice); // only for ASM? - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(unsigned long,MemoryOrderRelaxed,MemoryScopeDevice); -// DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(unsigned long long,MemoryOrderRelaxed,MemoryScopeDevice); - - #define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(TYPE,ORDER,SCOPE) \ - inline TYPE atomic_fetch_min(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ - return Impl::atomic_fetch_oper(Impl::MinOper<TYPE, const TYPE>(), dest, val, order, scope); \ - } - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(int,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(long,MemoryOrderRelaxed,MemoryScopeDevice); // only for ASM? - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(unsigned int,MemoryOrderRelaxed,MemoryScopeDevice); - DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(unsigned long,MemoryOrderRelaxed,MemoryScopeDevice); -// DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(unsigned long long,MemoryOrderRelaxed,MemoryScopeDevice); -// inline void atomic_fetch_max(int32_t* const dest, int32_t val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - -} // namespace desul - -// Functions defined int the GCC overload set but not in the device overload set -namespace desul { - __device__ inline - unsigned long long atomic_fetch_add(unsigned long long* const dest, unsigned long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::AddOper<unsigned long long, const unsigned long long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_fetch_add(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::AddOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_fetch_add(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::AddOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_fetch_sub(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::SubOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_fetch_sub(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::SubOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long atomic_fetch_max(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::MaxOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long atomic_fetch_min(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::MinOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long atomic_fetch_or(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::OrOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_fetch_or(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::OrOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_fetch_xor(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::XorOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_fetch_xor(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::XorOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_fetch_and(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::AndOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_fetch_and(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_fetch_oper(Impl::AndOper<long long, const long long>(), dest, val, order, scope); - } - - - __device__ inline - unsigned long long atomic_add_fetch(unsigned long long* const dest, unsigned long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::AddOper<unsigned long long, const unsigned long long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_add_fetch(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::AddOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_add_fetch(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::AddOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_sub_fetch(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::SubOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_sub_fetch(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::SubOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_or_fetch(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::OrOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_or_fetch(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::OrOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_xor_fetch(long long* const dest, long long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::XorOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_xor_fetch(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::XorOper<long, const long>(), dest, val, order, scope); - } - __device__ inline - long long atomic_and_fetch(long long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::AndOper<long long, const long long>(), dest, val, order, scope); - } - __device__ inline - long atomic_and_fetch(long* const dest, long val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { - return Impl::atomic_oper_fetch(Impl::AndOper<long, const long>(), dest, val, order, scope); - } -} // namespace desul -#endif - -#endif // DESUL_HAVE_CUDA_ATOMICS -#endif diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_SYCL.hpp b/packages/kokkos/core/src/desul/atomics/Compare_Exchange_SYCL.hpp deleted file mode 100644 index 14e0ab4cff97e0060582a55bafb7bbffa50d78c7..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_SYCL.hpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2019, Lawrence Livermore National Security, LLC -and DESUL project contributors. See the COPYRIGHT file for details. -Source: https://github.com/desul/desul - -SPDX-License-Identifier: (BSD-3-Clause) -*/ - -#ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_SYCL_HPP_ -#define DESUL_ATOMICS_COMPARE_EXCHANGE_SYCL_HPP_ - -// clang-format off -#include "desul/atomics/SYCLConversions.hpp" -#include "desul/atomics/Common.hpp" - -#include <CL/sycl.hpp> -// clang-format on - -#ifdef DESUL_HAVE_SYCL_ATOMICS - -namespace desul { - -template <class MemoryOrder, class MemoryScope> -inline void atomic_thread_fence(MemoryOrder, MemoryScope) { - sycl::atomic_fence( - Impl::DesulToSYCLMemoryOrder<MemoryOrder, /*extended namespace*/ false>::value, - Impl::DesulToSYCLMemoryScope<MemoryScope, /*extended namespace*/ false>::value); -} - -// FIXME_SYCL We need to either use generic_space or figure out how to check for the -// correct adress space in a SYCL-portable way. -#ifndef __NVPTX__ -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrder, MemoryScope) { - static_assert(sizeof(unsigned int) == 4, - "this function assumes an unsigned int is 32-bit"); - auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<unsigned int>(dest); - if (l) { - Impl::sycl_atomic_ref<unsigned int, - MemoryOrder, - MemoryScopeDevice, - sycl::access::address_space::local_space> - dest_ref(*reinterpret_cast<unsigned int*>(dest)); - dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned int*>(&compare), - *reinterpret_cast<unsigned int*>(&value)); - return compare; - } else { - Impl::sycl_atomic_ref<unsigned int, - MemoryOrder, - MemoryScopeDevice, - sycl::access::address_space::global_space> - dest_ref(*reinterpret_cast<unsigned int*>(dest)); - dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned int*>(&compare), - *reinterpret_cast<unsigned int*>(&value)); - return compare; - } -} -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrder, MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, - "this function assumes an unsigned long long is 64-bit"); - auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<unsigned long long int>(dest); - if (l) { - Impl::sycl_atomic_ref<unsigned long long int, - MemoryOrder, - MemoryScopeDevice, - sycl::access::address_space::local_space> - dest_ref(*reinterpret_cast<unsigned long long int*>(dest)); - dest_ref.compare_exchange_strong( - *reinterpret_cast<unsigned long long int*>(&compare), - *reinterpret_cast<unsigned long long int*>(&value)); - return compare; - } else { - Impl::sycl_atomic_ref<unsigned long long int, - MemoryOrder, - MemoryScopeDevice, - sycl::access::address_space::global_space> - dest_ref(*reinterpret_cast<unsigned long long int*>(dest)); - dest_ref.compare_exchange_strong( - *reinterpret_cast<unsigned long long int*>(&compare), - *reinterpret_cast<unsigned long long int*>(&value)); - return compare; - } -} - -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(T* const dest, - T value, - MemoryOrder, - MemoryScope) { - static_assert(sizeof(unsigned int) == 4, - "this function assumes an unsigned int is 32-bit"); - auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<unsigned int>(dest); - if (l) { - Impl::sycl_atomic_ref<unsigned int, - MemoryOrder, - MemoryScopeDevice, - sycl::access::address_space::local_space> - dest_ref(*reinterpret_cast<unsigned int*>(dest)); - unsigned int return_val = - dest_ref.exchange(*reinterpret_cast<unsigned int*>(&value)); - return reinterpret_cast<T&>(return_val); - } else { - Impl::sycl_atomic_ref<unsigned int, - MemoryOrder, - MemoryScopeDevice, - sycl::access::address_space::global_space> - dest_ref(*reinterpret_cast<unsigned int*>(dest)); - unsigned int return_val = - dest_ref.exchange(*reinterpret_cast<unsigned int*>(&value)); - return reinterpret_cast<T&>(return_val); - } -} -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(T* const dest, - T value, - MemoryOrder, - MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, - "this function assumes an unsigned long long is 64-bit"); - auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<unsigned long long int>(dest); - if (l) { - Impl::sycl_atomic_ref<unsigned long long int, - MemoryOrder, - MemoryScopeDevice, - sycl::access::address_space::local_space> - dest_ref(*reinterpret_cast<unsigned long long int*>(dest)); - unsigned long long int return_val = - dest_ref.exchange(*reinterpret_cast<unsigned long long int*>(&value)); - return reinterpret_cast<T&>(return_val); - } else { - Impl::sycl_atomic_ref<unsigned long long int, - MemoryOrder, - MemoryScopeDevice, - sycl::access::address_space::global_space> - dest_ref(*reinterpret_cast<unsigned long long int*>(dest)); - unsigned long long int return_val = - dest_ref.exchange(*reinterpret_cast<unsigned long long int*>(&value)); - return reinterpret_cast<T&>(return_val); - } -} -#else -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrder, MemoryScope) { - static_assert(sizeof(unsigned int) == 4, - "this function assumes an unsigned int is 32-bit"); - Impl::sycl_atomic_ref<unsigned int, - MemoryOrder, - MemoryScope, - sycl::access::address_space::global_space> - dest_ref(*reinterpret_cast<unsigned int*>(dest)); - dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned int*>(&compare), - *reinterpret_cast<unsigned int*>(&value)); - return compare; -} -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrder, MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, - "this function assumes an unsigned long long is 64-bit"); - Impl::sycl_atomic_ref<unsigned long long int, - MemoryOrder, - MemoryScope, - sycl::access::address_space::global_space> - dest_ref(*reinterpret_cast<unsigned long long int*>(dest)); - dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned long long int*>(&compare), - *reinterpret_cast<unsigned long long int*>(&value)); - return compare; -} - -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(T* const dest, - T value, - MemoryOrder, - MemoryScope) { - static_assert(sizeof(unsigned int) == 4, - "this function assumes an unsigned int is 32-bit"); - Impl::sycl_atomic_ref<unsigned int, - MemoryOrder, - MemoryScope, - sycl::access::address_space::global_space> - dest_ref(*reinterpret_cast<unsigned int*>(dest)); - unsigned int return_val = dest_ref.exchange(*reinterpret_cast<unsigned int*>(&value)); - return reinterpret_cast<T&>(return_val); -} -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(T* const dest, - T value, - MemoryOrder, - MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, - "this function assumes an unsigned long long is 64-bit"); - Impl::sycl_atomic_ref<unsigned long long int, - MemoryOrder, - MemoryScope, - sycl::access::address_space::global_space> - dest_ref(*reinterpret_cast<unsigned long long int*>(dest)); - unsigned long long int return_val = - dest_ref.exchange(reinterpret_cast<unsigned long long int&>(value)); - return reinterpret_cast<T&>(return_val); -} -#endif - -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type -atomic_compare_exchange( - T* const /*dest*/, T compare, T /*value*/, MemoryOrder, MemoryScope) { - // FIXME_SYCL not implemented - assert(false); - return compare; -} - -template <typename T, class MemoryOrder, class MemoryScope> -typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type atomic_exchange( - T* const /*dest*/, T value, MemoryOrder, MemoryScope) { - // FIXME_SYCL not implemented - assert(false); - return value; -} - -} // namespace desul - -#endif -#endif diff --git a/packages/kokkos/core/src/desul/atomics/SYCL.hpp b/packages/kokkos/core/src/desul/atomics/SYCL.hpp deleted file mode 100644 index 852559101740c6d52a1ce1db85207417fa570d4f..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/src/desul/atomics/SYCL.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/* -Copyright (c) 2019, Lawrence Livermore National Security, LLC -and DESUL project contributors. See the COPYRIGHT file for details. -Source: https://github.com/desul/desul - -SPDX-License-Identifier: (BSD-3-Clause) -*/ -#ifndef DESUL_ATOMICS_SYCL_HPP_ -#define DESUL_ATOMICS_SYCL_HPP_ - -#ifdef DESUL_HAVE_SYCL_ATOMICS - -// clang-format off -#include "desul/atomics/SYCLConversions.hpp" -#include "desul/atomics/Common.hpp" -// clang-format on - -namespace desul { - -// FIXME_SYCL We need to either use generic_space or figure out how to check for the -// correct adress space in a SYCL-portable way. -#ifndef __NVPTX__ -#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, TYPE) \ - template <class MemoryOrder> \ - TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeDevice) { \ - auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<TYPE>(dest); \ - if (l) { \ - Impl::sycl_atomic_ref<TYPE, \ - MemoryOrder, \ - MemoryScopeDevice, \ - sycl::access::address_space::local_space> \ - dest_ref(*dest); \ - return dest_ref.fetch_##OPER(val); \ - } else { \ - Impl::sycl_atomic_ref<TYPE, \ - MemoryOrder, \ - MemoryScopeDevice, \ - sycl::access::address_space::global_space> \ - dest_ref(*dest); \ - return dest_ref.fetch_##OPER(val); \ - } \ - } \ - template <class MemoryOrder> \ - TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeCore) { \ - auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<TYPE>(dest); \ - if (l) { \ - Impl::sycl_atomic_ref<TYPE, \ - MemoryOrder, \ - MemoryScopeDevice, \ - sycl::access::address_space::local_space> \ - dest_ref(*dest); \ - return dest_ref.fetch_##OPER(val); \ - } else { \ - Impl::sycl_atomic_ref<TYPE, \ - MemoryOrder, \ - MemoryScopeDevice, \ - sycl::access::address_space::global_space> \ - dest_ref(*dest); \ - return dest_ref.fetch_##OPER(val); \ - } \ - } -#else -#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, TYPE) \ - template <class MemoryOrder> \ - TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeDevice) { \ - Impl::sycl_atomic_ref<TYPE, \ - MemoryOrder, \ - MemoryScopeDevice, \ - sycl::access::address_space::global_space> \ - dest_ref(*dest); \ - return dest_ref.fetch_##OPER(val); \ - } \ - template <class MemoryOrder> \ - TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeCore) { \ - Impl::sycl_atomic_ref<TYPE, \ - MemoryOrder, \ - MemoryScopeCore, \ - sycl::access::address_space::global_space> \ - dest_ref(*dest); \ - return dest_ref.fetch_##OPER(val); \ - } -#endif - -#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(OPER) \ - DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, int) \ - DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned int) \ - DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, long) \ - DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned long) \ - DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, long long) \ - DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned long long) - -#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(OPER) \ - DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, float) \ - DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, double) - -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(add) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(sub) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(and) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(or) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(xor) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(min) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(max) - -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(add) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(sub) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(min) -DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(max) - -#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT -#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL -#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER - -} // namespace desul - -#endif // DESUL_HAVE_SYCL_ATOMICS -#endif // DESUL_ATOMICS_SYCL_HPP_ diff --git a/packages/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp b/packages/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp index 1a4e7b482c44b93f87ed981682e3895cf5a534ff..27540865a5950ba10096fadfb57b752e6fdef0a0 100644 --- a/packages/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp +++ b/packages/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp @@ -50,6 +50,7 @@ namespace Kokkos { namespace Experimental { class HIPSpace; ///< Memory space on HIP GPU class HIPHostPinnedSpace; ///< Memory space on Host accessible to HIP GPU +class HIPManagedSpace; ///< Memory migratable between Host and HIP GPU class HIP; ///< Execution space for HIP GPU } // namespace Experimental } // namespace Kokkos diff --git a/packages/kokkos/core/src/fwd/Kokkos_Fwd_OPENACC.hpp b/packages/kokkos/core/src/fwd/Kokkos_Fwd_OPENACC.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d733f993d023d8247c14bc3a0594068b95b64786 --- /dev/null +++ b/packages/kokkos/core/src/fwd/Kokkos_Fwd_OPENACC.hpp @@ -0,0 +1,56 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENACC_FWD_HPP_ +#define KOKKOS_OPENACC_FWD_HPP_ + +#if defined(KOKKOS_ENABLE_OPENACC) +namespace Kokkos { +namespace Experimental { +class OpenACC; ///< OpenACC execution space. +class OpenACCSpace; +} // namespace Experimental +} // namespace Kokkos +#endif +#endif diff --git a/packages/kokkos/core/src/impl/CMakeLists.txt b/packages/kokkos/core/src/impl/CMakeLists.txt index 9ff02a2eae84ba86f5c825b9054798f0b7218f61..203fd4a3a44adbc41e2615bd73e91a3bfaaec0f4 100644 --- a/packages/kokkos/core/src/impl/CMakeLists.txt +++ b/packages/kokkos/core/src/impl/CMakeLists.txt @@ -12,7 +12,7 @@ TRIBITS_ADD_LIBRARY( DEPLIBS ) -SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) +SET(TRILINOS_INCDIR ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) INSTALL(FILES ${HEADERS} DESTINATION ${TRILINOS_INCDIR}/impl/) diff --git a/packages/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp b/packages/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp index 5167c9ed65b42b1e567286849f37e89616e0e980..2b2120ce49b30da1db2c07b9aa6cd8a3ca1be321 100644 --- a/packages/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp +++ b/packages/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp @@ -1366,9 +1366,8 @@ struct Tile_Loop_Type<8, IsLeft, IType, void, void> { // tagged versions template <bool IsLeft, typename IType, typename Tagged> -struct Tile_Loop_Type< - 1, IsLeft, IType, Tagged, - typename std::enable_if<!std::is_same<Tagged, void>::value>::type> { +struct Tile_Loop_Type<1, IsLeft, IType, Tagged, + std::enable_if_t<!std::is_void<Tagged>::value>> { template <typename Func, typename Offset, typename ExtentA, typename ExtentB> static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) { @@ -1385,9 +1384,8 @@ struct Tile_Loop_Type< }; template <bool IsLeft, typename IType, typename Tagged> -struct Tile_Loop_Type< - 2, IsLeft, IType, Tagged, - typename std::enable_if<!std::is_same<Tagged, void>::value>::type> { +struct Tile_Loop_Type<2, IsLeft, IType, Tagged, + std::enable_if_t<!std::is_void<Tagged>::value>> { template <typename Func, typename Offset, typename ExtentA, typename ExtentB> static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) { @@ -1404,9 +1402,8 @@ struct Tile_Loop_Type< }; template <bool IsLeft, typename IType, typename Tagged> -struct Tile_Loop_Type< - 3, IsLeft, IType, Tagged, - typename std::enable_if<!std::is_same<Tagged, void>::value>::type> { +struct Tile_Loop_Type<3, IsLeft, IType, Tagged, + std::enable_if_t<!std::is_void<Tagged>::value>> { template <typename Func, typename Offset, typename ExtentA, typename ExtentB> static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) { @@ -1423,9 +1420,8 @@ struct Tile_Loop_Type< }; template <bool IsLeft, typename IType, typename Tagged> -struct Tile_Loop_Type< - 4, IsLeft, IType, Tagged, - typename std::enable_if<!std::is_same<Tagged, void>::value>::type> { +struct Tile_Loop_Type<4, IsLeft, IType, Tagged, + std::enable_if_t<!std::is_void<Tagged>::value>> { template <typename Func, typename Offset, typename ExtentA, typename ExtentB> static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) { @@ -1442,9 +1438,8 @@ struct Tile_Loop_Type< }; template <bool IsLeft, typename IType, typename Tagged> -struct Tile_Loop_Type< - 5, IsLeft, IType, Tagged, - typename std::enable_if<!std::is_same<Tagged, void>::value>::type> { +struct Tile_Loop_Type<5, IsLeft, IType, Tagged, + std::enable_if_t<!std::is_void<Tagged>::value>> { template <typename Func, typename Offset, typename ExtentA, typename ExtentB> static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) { @@ -1461,9 +1456,8 @@ struct Tile_Loop_Type< }; template <bool IsLeft, typename IType, typename Tagged> -struct Tile_Loop_Type< - 6, IsLeft, IType, Tagged, - typename std::enable_if<!std::is_same<Tagged, void>::value>::type> { +struct Tile_Loop_Type<6, IsLeft, IType, Tagged, + std::enable_if_t<!std::is_void<Tagged>::value>> { template <typename Func, typename Offset, typename ExtentA, typename ExtentB> static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) { @@ -1480,9 +1474,8 @@ struct Tile_Loop_Type< }; template <bool IsLeft, typename IType, typename Tagged> -struct Tile_Loop_Type< - 7, IsLeft, IType, Tagged, - typename std::enable_if<!std::is_same<Tagged, void>::value>::type> { +struct Tile_Loop_Type<7, IsLeft, IType, Tagged, + std::enable_if_t<!std::is_void<Tagged>::value>> { template <typename Func, typename Offset, typename ExtentA, typename ExtentB> static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) { @@ -1499,9 +1492,8 @@ struct Tile_Loop_Type< }; template <bool IsLeft, typename IType, typename Tagged> -struct Tile_Loop_Type< - 8, IsLeft, IType, Tagged, - typename std::enable_if<!std::is_same<Tagged, void>::value>::type> { +struct Tile_Loop_Type<8, IsLeft, IType, Tagged, + std::enable_if_t<!std::is_void<Tagged>::value>> { template <typename Func, typename Offset, typename ExtentA, typename ExtentB> static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) { @@ -1909,25 +1901,22 @@ struct HostIterateTile<RP, Functor, Tag, ValueType, #endif template <typename... Args> - typename std::enable_if<(sizeof...(Args) == RP::rank && - std::is_same<Tag, void>::value), - void>::type + std::enable_if_t<(sizeof...(Args) == RP::rank && std::is_void<Tag>::value), + void> apply(Args&&... args) const { m_func(args...); } template <typename... Args> - typename std::enable_if<(sizeof...(Args) == RP::rank && - !std::is_same<Tag, void>::value), - void>::type + std::enable_if_t<(sizeof...(Args) == RP::rank && !std::is_void<Tag>::value), + void> apply(Args&&... args) const { m_func(m_tag, args...); } RP const& m_rp; Functor const& m_func; - typename std::conditional<std::is_same<Tag, void>::value, int, Tag>::type - m_tag; + std::conditional_t<std::is_void<Tag>::value, int, Tag> m_tag; }; // For ParallelReduce @@ -2329,17 +2318,15 @@ struct HostIterateTile<RP, Functor, Tag, ValueType, #endif template <typename... Args> - typename std::enable_if<(sizeof...(Args) == RP::rank && - std::is_same<Tag, void>::value), - void>::type + std::enable_if_t<(sizeof...(Args) == RP::rank && std::is_void<Tag>::value), + void> apply(Args&&... args) const { m_func(args..., m_v); } template <typename... Args> - typename std::enable_if<(sizeof...(Args) == RP::rank && - !std::is_same<Tag, void>::value), - void>::type + std::enable_if_t<(sizeof...(Args) == RP::rank && !std::is_void<Tag>::value), + void> apply(Args&&... args) const { m_func(m_tag, args..., m_v); } @@ -2347,8 +2334,7 @@ struct HostIterateTile<RP, Functor, Tag, ValueType, RP const& m_rp; Functor const& m_func; value_type& m_v; - typename std::conditional<std::is_same<Tag, void>::value, int, Tag>::type - m_tag; + std::conditional_t<std::is_void<Tag>::value, int, Tag> m_tag; }; // For ParallelReduce @@ -2751,17 +2737,15 @@ struct HostIterateTile<RP, Functor, Tag, ValueType, #endif template <typename... Args> - typename std::enable_if<(sizeof...(Args) == RP::rank && - std::is_same<Tag, void>::value), - void>::type + std::enable_if_t<(sizeof...(Args) == RP::rank && std::is_void<Tag>::value), + void> apply(Args&&... args) const { m_func(args..., m_v); } template <typename... Args> - typename std::enable_if<(sizeof...(Args) == RP::rank && - !std::is_same<Tag, void>::value), - void>::type + std::enable_if_t<(sizeof...(Args) == RP::rank && !std::is_void<Tag>::value), + void> apply(Args&&... args) const { m_func(m_tag, args..., m_v); } @@ -2769,8 +2753,7 @@ struct HostIterateTile<RP, Functor, Tag, ValueType, RP const& m_rp; Functor const& m_func; value_type* m_v; - typename std::conditional<std::is_same<Tag, void>::value, int, Tag>::type - m_tag; + std::conditional_t<std::is_void<Tag>::value, int, Tag> m_tag; }; // ------------------------------------------------------------------ // diff --git a/packages/kokkos/core/src/impl/KokkosExp_IterateTileGPU.hpp b/packages/kokkos/core/src/impl/KokkosExp_IterateTileGPU.hpp index 688afcc107e4e4ff93a2b415c8209d29bf4c0ba2..957c3b638c5e23cc08b6efdd8a16afe415e366a3 100644 --- a/packages/kokkos/core/src/impl/KokkosExp_IterateTileGPU.hpp +++ b/packages/kokkos/core/src/impl/KokkosExp_IterateTileGPU.hpp @@ -883,9 +883,6 @@ struct DeviceIterateTile<6, PolicyType, Functor, Tag> { namespace Reduce { -template <typename T> -using is_void = std::is_same<T, void>; - template <typename T> struct is_array_type : std::false_type { using value_type = T; diff --git a/packages/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp b/packages/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp index 20fc6268c7dcd30c5c8deac332c61be673c7cc3d..2ffcd626d673420f62989d59f6be397961a8be16 100644 --- a/packages/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp @@ -205,14 +205,29 @@ struct ExecPolicyTraitsWithDefaults : AnalysisResults { }; //------------------------------------------------------------------------------ + +constexpr bool warn_if_deprecated(std::false_type) { return true; } +KOKKOS_DEPRECATED_WITH_COMMENT( + "Invalid WorkTag template argument in execution policy!!") +constexpr bool warn_if_deprecated(std::true_type) { return true; } +#define KOKKOS_IMPL_STATIC_WARNING(...) \ + static_assert( \ + warn_if_deprecated(std::integral_constant<bool, __VA_ARGS__>()), "") + template <typename... Traits> struct PolicyTraits : ExecPolicyTraitsWithDefaults<AnalyzeExecPolicy<void, Traits...>> { using base_t = ExecPolicyTraitsWithDefaults<AnalyzeExecPolicy<void, Traits...>>; using base_t::base_t; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 + KOKKOS_IMPL_STATIC_WARNING(!std::is_empty<typename base_t::work_tag>::value && + !std::is_void<typename base_t::work_tag>::value); +#endif }; +#undef KOKKOS_IMPL_STATIC_WARNING + } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index d481a8dc0f21efa675e0b181a8c6981e1f9afce6..e203c0a2bd1e85797fd41928860b3e1f40ba2024 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -88,7 +88,7 @@ __inline__ __device__ unsigned long long int atomic_compare_exchange( template <typename T> __inline__ __device__ T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) { + std::enable_if_t<sizeof(T) == sizeof(int), const T&> val) { const int tmp = atomicCAS((int*)dest, *((int*)&compare), *((int*)&val)); return *((T*)&tmp); } @@ -96,9 +96,10 @@ __inline__ __device__ T atomic_compare_exchange( template <typename T> __inline__ __device__ T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T&>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T&> + val) { using type = unsigned long long int; const type tmp = atomicCAS((type*)dest, *((type*)&compare), *((type*)&val)); return *((T*)&tmp); @@ -107,8 +108,7 @@ __inline__ __device__ T atomic_compare_exchange( template <typename T> __inline__ __device__ T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8), const T>& val) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; @@ -184,7 +184,7 @@ inline unsigned long long atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) { + std::enable_if_t<sizeof(T) == sizeof(int), const T&> val) { union U { int i; T t; @@ -203,9 +203,9 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(long), - const T&>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(long), + const T&> + val) { union U { long i; T t; @@ -225,10 +225,10 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(long) && - sizeof(T) == sizeof(Impl::cas128_t), - const T&>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long) && + sizeof(T) == sizeof(Impl::cas128_t), + const T&> + val) { union U { Impl::cas128_t i; T t; @@ -248,12 +248,12 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T compare, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>& val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -375,16 +375,14 @@ KOKKOS_INLINE_FUNCTION bool _atomic_compare_exchange_strong_fallback( template <class T, class MemoryOrderSuccess, class MemoryOrderFailure> KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH bool _atomic_compare_exchange_strong( T* dest, T compare, T val, MemoryOrderSuccess, MemoryOrderFailure, - typename std::enable_if< + std::enable_if_t< (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8 || sizeof(T) == 16) && - std::is_same< - typename MemoryOrderSuccess::memory_order, - typename std::remove_cv<MemoryOrderSuccess>::type>::value && - std::is_same< - typename MemoryOrderFailure::memory_order, - typename std::remove_cv<MemoryOrderFailure>::type>::value, - void const**>::type = nullptr) { + std::is_same<typename MemoryOrderSuccess::memory_order, + std::remove_cv_t<MemoryOrderSuccess>>::value && + std::is_same<typename MemoryOrderFailure::memory_order, + std::remove_cv_t<MemoryOrderFailure>>::value, + void const**> = nullptr) { return __atomic_compare_exchange_n(dest, &compare, val, /* weak = */ false, MemoryOrderSuccess::gnu_constant, MemoryOrderFailure::gnu_constant); @@ -394,16 +392,14 @@ template <class T, class MemoryOrderSuccess, class MemoryOrderFailure> KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH bool _atomic_compare_exchange_strong( T* dest, T compare, T val, MemoryOrderSuccess order_success, MemoryOrderFailure order_failure, - typename std::enable_if< + std::enable_if_t< !(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8 || sizeof(T) == 16) && - std::is_same< - typename MemoryOrderSuccess::memory_order, - typename std::remove_cv<MemoryOrderSuccess>::type>::value && - std::is_same< - typename MemoryOrderFailure::memory_order, - typename std::remove_cv<MemoryOrderFailure>::type>::value, - void const**>::type = nullptr) { + std::is_same<typename MemoryOrderSuccess::memory_order, + std::remove_cv_t<MemoryOrderSuccess>>::value && + std::is_same<typename MemoryOrderFailure::memory_order, + std::remove_cv_t<MemoryOrderFailure>>::value, + void const**> = nullptr) { return _atomic_compare_exchange_fallback(dest, compare, val, order_success, order_failure); } diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp index 4bb8b4fd52af0c8beaf8c4dfadddfa7be58c5c54..ad5b010558d94d753bc880ce09195366c794b193 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp @@ -95,7 +95,7 @@ namespace Kokkos { #endif // 32-bit version -template <class T, typename std::enable_if<sizeof(T) == 4, int>::type = 0> +template <class T, std::enable_if_t<sizeof(T) == 4, int> = 0> __inline__ __device__ bool atomic_compare_exchange_weak( T volatile* const dest, T* const expected, T const desired, std::memory_order success_order = std::memory_order_seq_cst, @@ -168,7 +168,7 @@ __inline__ __device__ bool atomic_compare_exchange_weak( } // 64-bit version -template <class T, typename std::enable_if<sizeof(T) == 8, int>::type = 0> +template <class T, std::enable_if_t<sizeof(T) == 8, int> = 0> bool atomic_compare_exchange_weak( T volatile* const dest, T* const expected, T const desired, std::memory_order success_order = std::memory_order_seq_cst, @@ -268,7 +268,7 @@ inline unsigned long long atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) { + std::enable_if_t<sizeof(T) == sizeof(int), const T&> val) { union U { int i; T t; @@ -287,9 +287,9 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(long), - const T&>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(long), + const T&> + val) { union U { long i; T t; @@ -309,10 +309,10 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(long) && - sizeof(T) == sizeof(Impl::cas128_t), - const T&>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long) && + sizeof(T) == sizeof(Impl::cas128_t), + const T&> + val) { union U { Impl::cas128_t i; T t; @@ -332,12 +332,12 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T compare, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>& val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp index cd840983d8a3bd24cace6e411cabc940d44ddfe1..a8f77d8353fad6f6cff5d290c893c35705c86f27 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -78,9 +78,9 @@ __inline__ __device__ unsigned long long int atomic_exchange( /** \brief Atomic exchange for any type with compatible size */ template <typename T> -__inline__ __device__ T atomic_exchange( - volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) { +__inline__ __device__ T +atomic_exchange(volatile T* const dest, + std::enable_if_t<sizeof(T) == sizeof(int), const T&> val) { // int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) ); #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); @@ -93,9 +93,10 @@ __inline__ __device__ T atomic_exchange( template <typename T> __inline__ __device__ T atomic_exchange( volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T&>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T&> + val) { using type = unsigned long long int; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -108,10 +109,9 @@ __inline__ __device__ T atomic_exchange( } template <typename T> -__inline__ __device__ T -atomic_exchange(volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { +__inline__ __device__ T atomic_exchange( + volatile T* const dest, + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8), const T>& val) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -141,7 +141,7 @@ atomic_exchange(volatile T* const dest, template <typename T> __inline__ __device__ void atomic_assign( volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) { + std::enable_if_t<sizeof(T) == sizeof(int), const T&> val) { // (void) __ullAtomicExch( (int*) dest , *((int*)&val) ); (void)atomicExch(((int*)dest), *((int*)&val)); } @@ -149,9 +149,10 @@ __inline__ __device__ void atomic_assign( template <typename T> __inline__ __device__ void atomic_assign( volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T&>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T&> + val) { using type = unsigned long long int; // (void) __ullAtomicExch( (type*) dest , *((type*)&val) ); (void)atomicExch(((type*)dest), *((type*)&val)); @@ -160,9 +161,10 @@ __inline__ __device__ void atomic_assign( template <typename T> __inline__ __device__ void atomic_assign( volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(unsigned long long int), - const T&>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) != sizeof(unsigned long long int), + const T&> + val) { (void)atomic_exchange(dest, val); } @@ -175,10 +177,11 @@ __inline__ __device__ void atomic_assign( #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) template <typename T> -inline T atomic_exchange(volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int) || - sizeof(T) == sizeof(long), - const T&>::type val) { +inline T atomic_exchange( + volatile T* const dest, + std::enable_if_t<sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long), + const T&> + val) { using type = std::conditional_t<sizeof(T) == sizeof(int), int, long>; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); @@ -209,8 +212,7 @@ inline T atomic_exchange(volatile T* const dest, template <typename T> inline T atomic_exchange( volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(Impl::cas128_t), const T&>::type - val) { + std::enable_if_t<sizeof(T) == sizeof(Impl::cas128_t), const T&> val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -236,14 +238,13 @@ inline T atomic_exchange( //---------------------------------------------------------------------------- template <typename T> -inline T atomic_exchange( - volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) +inline T atomic_exchange(volatile T* const dest, + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>& val) { while (!Impl::lock_address_host_space((void*)dest)) ; Kokkos::memory_fence(); @@ -268,10 +269,11 @@ inline T atomic_exchange( } template <typename T> -inline void atomic_assign(volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int) || - sizeof(T) == sizeof(long), - const T&>::type val) { +inline void atomic_assign( + volatile T* const dest, + std::enable_if_t<sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long), + const T&> + val) { using type = std::conditional_t<sizeof(T) == sizeof(int), int, long>; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -301,8 +303,7 @@ inline void atomic_assign(volatile T* const dest, template <typename T> inline void atomic_assign( volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(Impl::cas128_t), const T&>::type - val) { + std::enable_if_t<sizeof(T) == sizeof(Impl::cas128_t), const T&> val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -323,14 +324,13 @@ inline void atomic_assign( #endif template <typename T> -inline void atomic_assign( - volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) +inline void atomic_assign(volatile T* const dest, + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>& val) { while (!Impl::lock_address_host_space((void*)dest)) ; Kokkos::memory_fence(); diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index 9a2b13debc70f24bf6adb34ddee13815458245b3..c188f45427be2d189392264e37daac83c6620eac 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -88,9 +88,9 @@ __inline__ __device__ double atomic_fetch_add(volatile double* const dest, #endif template <typename T> -__inline__ __device__ T atomic_fetch_add( - volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T>::type val) { +__inline__ __device__ T +atomic_fetch_add(volatile T* const dest, + std::enable_if_t<sizeof(T) == sizeof(int), const T> val) { // to work around a bug in the clang cuda compiler, the name here needs to be // different from the one internal to the other overloads union U1 { @@ -113,9 +113,10 @@ __inline__ __device__ T atomic_fetch_add( template <typename T> __inline__ __device__ T atomic_fetch_add( volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T> + val) { // to work around a bug in the clang cuda compiler, the name here needs to be // different from the one internal to the other overloads union U2 { @@ -138,10 +139,9 @@ __inline__ __device__ T atomic_fetch_add( //---------------------------------------------------------------------------- template <typename T> -__inline__ __device__ T -atomic_fetch_add(volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { +__inline__ __device__ T atomic_fetch_add( + volatile T* const dest, + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8), const T>& val) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; @@ -236,7 +236,7 @@ inline unsigned long long int atomic_fetch_add( template <typename T> inline T atomic_fetch_add( volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T>::type val) { + std::enable_if_t<sizeof(T) == sizeof(int), const T> val) { union U { int i; T t; @@ -259,10 +259,11 @@ inline T atomic_fetch_add( } template <typename T> -inline T atomic_fetch_add(volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(long), - const T>::type val) { +inline T atomic_fetch_add( + volatile T* const dest, + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(long), + const T> + val) { union U { long i; T t; @@ -288,10 +289,10 @@ inline T atomic_fetch_add(volatile T* const dest, template <typename T> inline T atomic_fetch_add( volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) != sizeof(long) && - sizeof(T) == sizeof(Impl::cas128_t), - const T>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(long) && + sizeof(T) == sizeof(Impl::cas128_t), + const T> + val) { union U { Impl::cas128_t i; T t; @@ -317,14 +318,13 @@ inline T atomic_fetch_add( //---------------------------------------------------------------------------- template <typename T> -inline T atomic_fetch_add( - volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) +inline T atomic_fetch_add(volatile T* const dest, + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>& val) { while (!Impl::lock_address_host_space((void*)dest)) ; Kokkos::memory_fence(); @@ -365,8 +365,7 @@ T atomic_fetch_add(volatile T* const dest, const T val) { #elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS) template <typename T> -T atomic_fetch_add(volatile T* const dest_v, - typename std::add_const<T>::type val) { +T atomic_fetch_add(volatile T* const dest_v, std::add_const_t<T> val) { T* dest = const_cast<T*>(dest_v); T retval = *dest; *dest += val; diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index 148ed974420ff88d2831a2ac98ac70a8ea5f4bf2..6aaf36970c3d914e2ab87a7c3bd8b640492dc529 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -87,9 +87,9 @@ __inline__ __device__ unsigned int atomic_fetch_sub(volatile double* const dest, #endif template <typename T> -__inline__ __device__ T atomic_fetch_sub( - volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T>::type val) { +__inline__ __device__ T +atomic_fetch_sub(volatile T* const dest, + std::enable_if_t<sizeof(T) == sizeof(int), const T> val) { union U { int i; T t; @@ -110,9 +110,10 @@ __inline__ __device__ T atomic_fetch_sub( template <typename T> __inline__ __device__ T atomic_fetch_sub( volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T> + val) { union U { unsigned long long int i; T t; @@ -133,10 +134,9 @@ __inline__ __device__ T atomic_fetch_sub( //---------------------------------------------------------------------------- template <typename T> -__inline__ __device__ T -atomic_fetch_sub(volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { +__inline__ __device__ T atomic_fetch_sub( + volatile T* const dest, + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8), const T>& val) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; @@ -211,7 +211,7 @@ inline unsigned long long int atomic_fetch_sub( template <typename T> inline T atomic_fetch_sub( volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T>::type val) { + std::enable_if_t<sizeof(T) == sizeof(int), const T> val) { union U { int i; T t; @@ -234,10 +234,11 @@ inline T atomic_fetch_sub( } template <typename T> -inline T atomic_fetch_sub(volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(long), - const T>::type val) { +inline T atomic_fetch_sub( + volatile T* const dest, + std::enable_if_t<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(long), + const T> + val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -264,8 +265,7 @@ inline T atomic_fetch_sub(volatile T* const dest, template <typename T> inline T atomic_fetch_sub( volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8), const T>& val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp index f6bdbca729a335e4218ec3ac9108f0c3046eac05..aac0d12c8173061970c192a3eff41e348059202a 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -188,9 +188,10 @@ struct RShiftOper { template <class Oper, typename T> KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T> + val) { union U { unsigned long long int i; T t; @@ -213,9 +214,10 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( template <class Oper, typename T> KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T* const dest, - typename std::enable_if<sizeof(T) != sizeof(int) && - sizeof(T) == sizeof(unsigned long long int), - const T>::type val) { + std::enable_if_t<sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(unsigned long long int), + const T> + val) { union U { unsigned long long int i; T t; @@ -236,9 +238,9 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( } template <class Oper, typename T> -KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( - const Oper& op, volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T>::type val) { +KOKKOS_INLINE_FUNCTION T +atomic_fetch_oper(const Oper& op, volatile T* const dest, + std::enable_if_t<sizeof(T) == sizeof(int), const T> val) { union U { int i; T t; @@ -258,9 +260,9 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( } template <class Oper, typename T> -KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( - const Oper& op, volatile T* const dest, - typename std::enable_if<sizeof(T) == sizeof(int), const T>::type val) { +KOKKOS_INLINE_FUNCTION T +atomic_oper_fetch(const Oper& op, volatile T* const dest, + std::enable_if_t<sizeof(T) == sizeof(int), const T> val) { union U { int i; T t; @@ -282,8 +284,7 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( template <class Oper, typename T> KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), const T>::type - val) { + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8), const T> val) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST while (!Impl::lock_address_host_space((void*)dest)) ; @@ -344,13 +345,13 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( template <class Oper, typename T> KOKKOS_INLINE_FUNCTION T atomic_oper_fetch(const Oper& op, volatile T* const dest, - typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) + std::enable_if_t<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && \ defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>& val) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST while (!Impl::lock_address_host_space((void*)dest)) diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp index f3b77a297629867a11ef25225b1302f1d68aa937..f4437326dddceab23a47525fc3ae8c89af199f66 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp @@ -72,25 +72,23 @@ namespace Impl { template <class T, class MemoryOrder> KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH T _atomic_load( T* ptr, MemoryOrder, - typename std::enable_if< - (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || - sizeof(T) == 8) && - std::is_same<typename MemoryOrder::memory_order, - typename std::remove_cv<MemoryOrder>::type>::value, - void const**>::type = nullptr) { + std::enable_if_t<(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || + sizeof(T) == 8) && + std::is_same<typename MemoryOrder::memory_order, + std::remove_cv_t<MemoryOrder>>::value, + void const**> = nullptr) { return __atomic_load_n(ptr, MemoryOrder::gnu_constant); } template <class T, class MemoryOrder> KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH T _atomic_load( T* ptr, MemoryOrder, - typename std::enable_if< - !(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || - sizeof(T) == 8) && - std::is_default_constructible<T>::value && - std::is_same<typename MemoryOrder::memory_order, - typename std::remove_cv<MemoryOrder>::type>::value, - void const**>::type = nullptr) { + std::enable_if_t<!(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || + sizeof(T) == 8) && + std::is_default_constructible<T>::value && + std::is_same<typename MemoryOrder::memory_order, + std::remove_cv_t<MemoryOrder>>::value, + void const**> = nullptr) { T rv{}; __atomic_load(ptr, &rv, MemoryOrder::gnu_constant); return rv; @@ -104,9 +102,9 @@ KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH T _atomic_load( template <class T> __device__ __inline__ T _relaxed_atomic_load_impl( - T* ptr, typename std::enable_if<(sizeof(T) == 1 || sizeof(T) == 2 || - sizeof(T) == 4 || sizeof(T) == 8), - void const**>::type = nullptr) { + T* ptr, std::enable_if_t<(sizeof(T) == 1 || sizeof(T) == 2 || + sizeof(T) == 4 || sizeof(T) == 8), + void const**> = nullptr) { return *ptr; } @@ -120,9 +118,9 @@ struct NoOpOper { template <class T> __device__ __inline__ T _relaxed_atomic_load_impl( - T* ptr, typename std::enable_if<!(sizeof(T) == 1 || sizeof(T) == 2 || - sizeof(T) == 4 || sizeof(T) == 8), - void const**>::type = nullptr) { + T* ptr, std::enable_if_t<!(sizeof(T) == 1 || sizeof(T) == 2 || + sizeof(T) == 4 || sizeof(T) == 8), + void const**> = nullptr) { T rv{}; // TODO remove a copy operation here? return Kokkos::Impl::atomic_oper_fetch(NoOpOper<T>{}, ptr, rv); diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp index 264d6beaf5d8a9f3741deafca4c67820c0649b90..ffe018b4d6120212d8c03c1b6d05106e3e6011a3 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp @@ -72,25 +72,23 @@ namespace Impl { template <class T, class MemoryOrder> KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH void _atomic_store( T* ptr, T val, MemoryOrder, - typename std::enable_if< - (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || - sizeof(T) == 8) && - std::is_same<typename MemoryOrder::memory_order, - typename std::remove_cv<MemoryOrder>::type>::value, - void const**>::type = nullptr) { + std::enable_if_t<(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || + sizeof(T) == 8) && + std::is_same<typename MemoryOrder::memory_order, + std::remove_cv_t<MemoryOrder>>::value, + void const**> = nullptr) { __atomic_store_n(ptr, val, MemoryOrder::gnu_constant); } template <class T, class MemoryOrder> KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH void _atomic_store( T* ptr, T val, MemoryOrder, - typename std::enable_if< - !(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || - sizeof(T) == 8) && - std::is_default_constructible<T>::value && - std::is_same<typename MemoryOrder::memory_order, - typename std::remove_cv<MemoryOrder>::type>::value, - void const**>::type = nullptr) { + std::enable_if_t<!(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || + sizeof(T) == 8) && + std::is_default_constructible<T>::value && + std::is_same<typename MemoryOrder::memory_order, + std::remove_cv_t<MemoryOrder>>::value, + void const**> = nullptr) { __atomic_store(ptr, &val, MemoryOrder::gnu_constant); } @@ -103,9 +101,9 @@ KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH void _atomic_store( template <class T> __device__ __inline__ void _relaxed_atomic_store_impl( T* ptr, T val, - typename std::enable_if<(sizeof(T) == 1 || sizeof(T) == 2 || - sizeof(T) == 4 || sizeof(T) == 8), - void const**>::type = nullptr) { + std::enable_if_t<(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || + sizeof(T) == 8), + void const**> = nullptr) { *ptr = val; } @@ -120,9 +118,9 @@ struct StoreOper { template <class T> __device__ __inline__ void _relaxed_atomic_store_impl( T* ptr, T val, - typename std::enable_if<!(sizeof(T) == 1 || sizeof(T) == 2 || - sizeof(T) == 4 || sizeof(T) == 8), - void const**>::type = nullptr) { + std::enable_if_t<!(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || + sizeof(T) == 8), + void const**> = nullptr) { Kokkos::Impl::atomic_oper_fetch(StoreOper<T>{}, ptr, (T &&) val); } diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp index 2f824566b8043a080e8d1f9010256b755547d42a..c5207b51e9fadc631566551c1eaf451a0a1ad081 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp @@ -52,8 +52,6 @@ #include <winsock2.h> #include <windows.h> -#undef VOID - namespace Kokkos { namespace Impl { #ifdef _MSC_VER @@ -77,7 +75,7 @@ __attribute__((aligned(16))) template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) == sizeof(CHAR), const T&>::type val) { + std::enable_if_t<sizeof(T) == sizeof(CHAR), const T&> val) { union U { CHAR i; T t; @@ -92,7 +90,7 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) == sizeof(SHORT), const T&>::type val) { + std::enable_if_t<sizeof(T) == sizeof(SHORT), const T&> val) { union U { SHORT i; T t; @@ -107,7 +105,7 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) == sizeof(LONG), const T&>::type val) { + std::enable_if_t<sizeof(T) == sizeof(LONG), const T&> val) { union U { LONG i; T t; @@ -122,8 +120,7 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) == sizeof(LONGLONG), const T&>::type - val) { + std::enable_if_t<sizeof(T) == sizeof(LONGLONG), const T&> val) { union U { LONGLONG i; T t; @@ -138,8 +135,7 @@ inline T atomic_compare_exchange( template <typename T> inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename std::enable_if<sizeof(T) == sizeof(Impl::cas128_t), const T&>::type - val) { + std::enable_if_t<sizeof(T) == sizeof(Impl::cas128_t), const T&> val) { T compare_and_result(compare); union U { Impl::cas128_t i; diff --git a/packages/kokkos/core/src/impl/Kokkos_BitOps.hpp b/packages/kokkos/core/src/impl/Kokkos_BitOps.hpp index fc58b96a450992177804effe491192904587296b..a41d19aafa303823cf543a048c28c821c05ffdcd 100644 --- a/packages/kokkos/core/src/impl/Kokkos_BitOps.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_BitOps.hpp @@ -57,93 +57,208 @@ namespace Kokkos { namespace Impl { KOKKOS_FORCEINLINE_FUNCTION -int int_log2(unsigned i) { - enum : int { shift = sizeof(unsigned) * CHAR_BIT - 1 }; -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) +int int_log2_fallback(unsigned i) { + constexpr int shift = sizeof(unsigned) * CHAR_BIT - 1; + + int offset = 0; + if (i) { + for (offset = shift; (i & (1 << offset)) == 0; --offset) + ; + } + return offset; +} + +KOKKOS_IMPL_DEVICE_FUNCTION +inline int int_log2_device(unsigned i) { +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) + constexpr int shift = sizeof(unsigned) * CHAR_BIT - 1; return shift - __clz(i); #elif defined(KOKKOS_COMPILER_INTEL) return _bit_scan_reverse(i); +#else + return int_log2_fallback(i); +#endif +} + +KOKKOS_IMPL_HOST_FUNCTION +inline int int_log2_host(unsigned i) { +// duplicating shift to avoid unused warning in else branch +#if defined(KOKKOS_COMPILER_INTEL) + constexpr int shift = sizeof(unsigned) * CHAR_BIT - 1; + (void)shift; + return _bit_scan_reverse(i); #elif defined(KOKKOS_COMPILER_CRAYC) + constexpr int shift = sizeof(unsigned) * CHAR_BIT - 1; return i ? shift - _leadz32(i) : 0; #elif defined(__GNUC__) || defined(__GNUG__) + constexpr int shift = sizeof(unsigned) * CHAR_BIT - 1; return shift - __builtin_clz(i); #else - int offset = 0; - if (i) { - for (offset = shift; (i & (1 << offset)) == 0; --offset) - ; - } - return offset; + return int_log2_fallback(i); #endif } +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +KOKKOS_FORCEINLINE_FUNCTION +int int_log2(unsigned i) { + KOKKOS_IF_ON_DEVICE((return int_log2_device(i);)) + KOKKOS_IF_ON_HOST((return int_log2_host(i);)) +} +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma pop +#endif + /**\brief Find first zero bit. * * If none then return -1 ; */ KOKKOS_FORCEINLINE_FUNCTION -int bit_first_zero(unsigned i) noexcept { - enum : unsigned { full = ~0u }; +int bit_first_zero_fallback(unsigned i) noexcept { + constexpr unsigned full = ~0u; + + int offset = -1; + if (full != i) { + for (offset = 0; i & (1 << offset); ++offset) + ; + } + return offset; +} -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) +KOKKOS_IMPL_DEVICE_FUNCTION +inline int bit_first_zero_device(unsigned i) noexcept { + constexpr unsigned full = ~0u; +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) return full != i ? __ffs(~i) - 1 : -1; #elif defined(KOKKOS_COMPILER_INTEL) return full != i ? _bit_scan_forward(~i) : -1; +#else + (void)full; + return bit_first_zero_fallback(i); +#endif +} + +KOKKOS_IMPL_HOST_FUNCTION +inline int bit_first_zero_host(unsigned i) noexcept { + constexpr unsigned full = ~0u; +#if defined(KOKKOS_COMPILER_INTEL) + return full != i ? _bit_scan_forward(~i) : -1; #elif defined(KOKKOS_COMPILER_CRAYC) return full != i ? _popcnt(i ^ (i + 1)) - 1 : -1; #elif defined(KOKKOS_COMPILER_GNU) || defined(__GNUC__) || defined(__GNUG__) return full != i ? __builtin_ffs(~i) - 1 : -1; #else + (void)full; + return bit_first_zero_fallback(i); +#endif +} + +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +KOKKOS_FORCEINLINE_FUNCTION +int bit_first_zero(unsigned i) noexcept { + KOKKOS_IF_ON_DEVICE((return bit_first_zero_device(i);)) + KOKKOS_IF_ON_HOST((return bit_first_zero_host(i);)) +} +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma pop +#endif + +KOKKOS_FORCEINLINE_FUNCTION +int bit_scan_forward_fallback(unsigned i) { int offset = -1; - if (full != i) { - for (offset = 0; i & (1 << offset); ++offset) + if (i) { + for (offset = 0; (i & (1 << offset)) == 0; ++offset) ; } return offset; -#endif } -KOKKOS_FORCEINLINE_FUNCTION -int bit_scan_forward(unsigned i) { -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) +KOKKOS_IMPL_DEVICE_FUNCTION inline int bit_scan_forward_device(unsigned i) { +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) return __ffs(i) - 1; #elif defined(KOKKOS_COMPILER_INTEL) return _bit_scan_forward(i); +#else + return bit_scan_forward_fallback(i); +#endif +} + +KOKKOS_IMPL_HOST_FUNCTION inline int bit_scan_forward_host(unsigned i) { +#if defined(KOKKOS_COMPILER_INTEL) + return _bit_scan_forward(i); #elif defined(KOKKOS_COMPILER_CRAYC) return i ? _popcnt(~i & (i - 1)) : -1; #elif defined(KOKKOS_COMPILER_GNU) || defined(__GNUC__) || defined(__GNUG__) return __builtin_ffs(i) - 1; #else - int offset = -1; - if (i) { - for (offset = 0; (i & (1 << offset)) == 0; ++offset) - ; - } - return offset; + return bit_scan_forward_fallback(i); #endif } +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +KOKKOS_FORCEINLINE_FUNCTION +int bit_scan_forward(unsigned i) { + KOKKOS_IF_ON_DEVICE((return bit_scan_forward_device(i);)) + KOKKOS_IF_ON_HOST((return bit_scan_forward_host(i);)) +} +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma pop +#endif + /// Count the number of bits set. KOKKOS_FORCEINLINE_FUNCTION -int bit_count(unsigned i) { -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) +int bit_count_fallback(unsigned i) { + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive + i = i - ((i >> 1) & ~0u / 3u); // temp + i = (i & ~0u / 15u * 3u) + ((i >> 2) & ~0u / 15u * 3u); // temp + i = (i + (i >> 4)) & ~0u / 255u * 15u; // temp + + // count + return (int)((i * (~0u / 255u)) >> (sizeof(unsigned) - 1) * CHAR_BIT); +} + +KOKKOS_IMPL_DEVICE_FUNCTION inline int bit_count_device(unsigned i) { +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) return __popc(i); -#elif defined(__INTEL_COMPILER) +#elif defined(KOKKOS_COMPILER_INTEL) + return _popcnt32(i); +#else + return bit_count_fallback(i); +#endif +} + +KOKKOS_IMPL_HOST_FUNCTION inline int bit_count_host(unsigned i) { +#if defined(KOKKOS_COMPILER_INTEL) return _popcnt32(i); #elif defined(KOKKOS_COMPILER_CRAYC) return _popcnt(i); #elif defined(__GNUC__) || defined(__GNUG__) return __builtin_popcount(i); #else - // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive - i = i - ((i >> 1) & ~0u / 3u); // temp - i = (i & ~0u / 15u * 3u) + ((i >> 2) & ~0u / 15u * 3u); // temp - i = (i + (i >> 4)) & ~0u / 255u * 15u; // temp + return bit_count_fallback(i); +#endif +} - // count - return (int)((i * (~0u / 255u)) >> (sizeof(unsigned) - 1) * CHAR_BIT); +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma push +#pragma diag_suppress implicit_return_from_non_void_function #endif +KOKKOS_FORCEINLINE_FUNCTION +int bit_count(unsigned i) { + KOKKOS_IF_ON_DEVICE((return bit_count_device(i);)) + KOKKOS_IF_ON_HOST((return bit_count_host(i);)) } +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma pop +#endif KOKKOS_INLINE_FUNCTION unsigned integral_power_of_two_that_contains(const unsigned N) { @@ -152,15 +267,6 @@ unsigned integral_power_of_two_that_contains(const unsigned N) { } } // namespace Impl - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 - -KOKKOS_DEPRECATED KOKKOS_INLINE_FUNCTION int log2(unsigned i) { - return Impl::int_log2(i); -} - -#endif - } // namespace Kokkos #endif // KOKKOS_BITOPS_HPP diff --git a/packages/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp b/packages/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp index 3251cb0f5c8c5daa3e6693d8bda536a9a3de8d0f..a8fc928d10d4e44e6ba4e29695777d7a3f5b7bdb 100644 --- a/packages/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #include <windows.h> diff --git a/packages/kokkos/core/src/impl/Kokkos_ChaseLev.hpp b/packages/kokkos/core/src/impl/Kokkos_ChaseLev.hpp index e2283f11fd20d08e5fb554581547622d7140a039..1a372d8c954a604a27e30a04930824cfec5a7afc 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ChaseLev.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ChaseLev.hpp @@ -165,8 +165,8 @@ struct ChaseLevDeque { public: template <class _ignore = void, - class = typename std::enable_if< - std::is_default_constructible<CircularBufferT>::value>::type> + class = std::enable_if_t< + std::is_default_constructible<CircularBufferT>::value>> ChaseLevDeque() : m_array() {} explicit ChaseLevDeque(CircularBufferT buffer) : m_array(std::move(buffer)) {} diff --git a/packages/kokkos/core/src/impl/Kokkos_ClockTic.hpp b/packages/kokkos/core/src/impl/Kokkos_ClockTic.hpp index 87f18604da52a62c6c8de22e8c670169ceec643a..c1cb6a7d91b54b951f445131e43347eaaa33a027 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ClockTic.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ClockTic.hpp @@ -77,20 +77,30 @@ namespace Impl { * having different index-seed values. */ -KOKKOS_FORCEINLINE_FUNCTION -uint64_t clock_tic() noexcept { -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) +KOKKOS_IMPL_DEVICE_FUNCTION inline uint64_t clock_tic_device() noexcept { +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) // Return value of 64-bit hi-res clock register. - return clock64(); #elif defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_ARCH_INTEL_GPU) && \ defined(__SYCL_DEVICE_ONLY__) + return intel_get_cycle_counter(); + #elif defined(KOKKOS_ENABLE_OPENMPTARGET) - return uint64_t(omp_get_wtime() * 1.e9); -#elif defined(__i386__) || defined(__x86_64) + + return omp_get_wtime() * 1.e9; + +#else + + return 0; + +#endif +} + +KOKKOS_IMPL_HOST_FUNCTION inline uint64_t clock_tic_host() noexcept { +#if defined(__i386__) || defined(__x86_64) // Return value of 64-bit hi-res clock register. @@ -111,13 +121,17 @@ uint64_t clock_tic() noexcept { #else - return (uint64_t)std::chrono::high_resolution_clock::now() - .time_since_epoch() - .count(); + return std::chrono::high_resolution_clock::now().time_since_epoch().count(); #endif } +KOKKOS_FORCEINLINE_FUNCTION +uint64_t clock_tic() noexcept { + KOKKOS_IF_ON_DEVICE((return clock_tic_device();)) + KOKKOS_IF_ON_HOST((return clock_tic_host();)) +} + } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/impl/Kokkos_Combined_Reducer.hpp b/packages/kokkos/core/src/impl/Kokkos_Combined_Reducer.hpp index 4ec8513191f21e07896bac21274e3af088dfe518..21a202994fbcdad4fe4d20d89776795ee7cd81e2 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Combined_Reducer.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Combined_Reducer.hpp @@ -89,10 +89,6 @@ struct CombinedReducerValueItemImpl { constexpr value_type& ref() & noexcept { return m_value; } KOKKOS_FORCEINLINE_FUNCTION constexpr value_type const& ref() const& noexcept { return m_value; } - KOKKOS_FORCEINLINE_FUNCTION - value_type volatile& ref() volatile& noexcept { return m_value; } - KOKKOS_FORCEINLINE_FUNCTION - value_type const volatile& ref() const volatile& noexcept { return m_value; } }; //============================================================================== @@ -133,15 +129,6 @@ struct CombinedReducerValueImpl<std::integer_sequence<size_t, Idxs...>, KOKKOS_INLINE_FUNCTION ValueType const& get() const& noexcept { return this->CombinedReducerValueItemImpl<Idx, ValueType>::ref(); } - template <size_t Idx, class ValueType> - KOKKOS_INLINE_FUNCTION ValueType volatile& get() volatile& noexcept { - return this->CombinedReducerValueItemImpl<Idx, ValueType>::ref(); - } - template <size_t Idx, class ValueType> - KOKKOS_INLINE_FUNCTION ValueType const volatile& get() const - volatile& noexcept { - return this->CombinedReducerValueItemImpl<Idx, ValueType>::ref(); - } }; //============================================================================== @@ -175,12 +162,6 @@ struct CombinedReducerStorageImpl { m_reducer.join(dest, src); return _fold_comma_emulation_return{}; } - - KOKKOS_INLINE_FUNCTION constexpr _fold_comma_emulation_return _join( - value_type volatile& dest, value_type const volatile& src) const { - m_reducer.join(dest, src); - return _fold_comma_emulation_return{}; - } }; // </editor-fold> end CombinedReducerStorage }}}1 @@ -193,28 +174,20 @@ struct _construct_combined_reducer_from_args_tag {}; template <class T> KOKKOS_INLINE_FUNCTION auto _get_value_from_combined_reducer_ctor_arg( - T&& arg) noexcept -> - typename std::enable_if< - !is_view<typename std::decay<T>::type>::value && - !is_reducer<typename std::decay<T>::type>::value, - typename std::decay<T>::type>::type { + T&& arg) noexcept + -> std::enable_if_t<!is_view<std::decay_t<T>>::value && + !is_reducer<std::decay_t<T>>::value, + std::decay_t<T>> { return arg; } template <class T> KOKKOS_INLINE_FUNCTION auto _get_value_from_combined_reducer_ctor_arg( - T&& arg) noexcept -> - typename std::enable_if<is_view<typename std::decay<T>::type>::value, - typename std::decay<T>::type>::type::value_type { - return arg(); -} - -template <class T> -KOKKOS_INLINE_FUNCTION auto _get_value_from_combined_reducer_ctor_arg( - T&& arg) noexcept -> - typename std::enable_if<is_reducer<typename std::decay<T>::type>::value, - typename std::decay<T>::type>::type::value_type { - return arg.reference(); + T&&) noexcept -> + typename std::enable_if_t<is_view<std::decay_t<T>>::value || + is_reducer<std::decay_t<T>>::value, + std::decay_t<T>>::value_type { + return typename std::decay_t<T>::value_type{}; } template <class IdxSeq, class Space, class...> @@ -264,14 +237,6 @@ struct CombinedReducerImpl<std::integer_sequence<size_t, Idxs...>, Space, src.template get<Idxs, typename Reducers::value_type>())...); } - KOKKOS_FUNCTION void join(value_type volatile& dest, - value_type const volatile& src) const noexcept { - emulate_fold_comma_operator( - this->CombinedReducerStorageImpl<Idxs, Reducers>::_join( - dest.template get<Idxs, typename Reducers::value_type>(), - src.template get<Idxs, typename Reducers::value_type>())...); - } - KOKKOS_FUNCTION constexpr void init(value_type& dest) const noexcept { emulate_fold_comma_operator( this->CombinedReducerStorageImpl<Idxs, Reducers>::_init( @@ -294,13 +259,26 @@ struct CombinedReducerImpl<std::integer_sequence<size_t, Idxs...>, Space, return m_value_view; } - KOKKOS_FUNCTION - constexpr static void write_value_back_to_original_references( - value_type const& value, + template <class ExecutionSpace, int Idx, class View> + static void write_one_value_back( + const ExecutionSpace& exec_space, View const& view, + typename View::const_value_type& value) noexcept { + if (Kokkos::SpaceAccessibility<typename View::memory_space, + Space>::assignable) + view() = value; + else + Kokkos::deep_copy(exec_space, view, value); + } + + template <class ExecutionSpace> + static void write_value_back_to_original_references( + const ExecutionSpace& exec_space, value_type const& value, Reducers const&... reducers_that_reference_original_values) noexcept { emulate_fold_comma_operator( - (reducers_that_reference_original_values.view()() = - value.template get<Idxs, typename Reducers::value_type>())...); + (write_one_value_back<ExecutionSpace, Idxs>( + exec_space, reducers_that_reference_original_values.view(), + value.template get<Idxs, typename Reducers::value_type>()), + 0)...); } }; @@ -441,9 +419,8 @@ struct CombinedReductionFunctorWrapper // <editor-fold desc="_make_reducer_from_arg"> {{{2 template <class Space, class Reducer> -KOKKOS_INLINE_FUNCTION constexpr typename std::enable_if< - Kokkos::is_reducer<typename std::decay<Reducer>::type>::value, - typename std::decay<Reducer>::type>::type +KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + Kokkos::is_reducer<std::decay_t<Reducer>>::value, std::decay_t<Reducer>> _make_reducer_from_arg(Reducer&& arg_reducer) noexcept { return arg_reducer; } @@ -456,21 +433,20 @@ struct _wrap_with_kokkos_sum { }; template <class Space, class T> -struct _wrap_with_kokkos_sum< - Space, T, typename std::enable_if<Kokkos::is_view<T>::value>::type> { - using type = Kokkos::Sum<typename T::value_type, Space>; +struct _wrap_with_kokkos_sum<Space, T, + std::enable_if_t<Kokkos::is_view<T>::value>> { + using type = Kokkos::Sum<typename T::value_type, typename T::memory_space>; }; // TODO better error message for the case when a const& to a scalar is passed in // (this is needed in general, though) template <class Space, class T> -KOKKOS_INLINE_FUNCTION constexpr typename std::enable_if< - !Kokkos::is_reducer<typename std::decay<T>::type>::value, - _wrap_with_kokkos_sum<Space, typename std::decay<T>::type>>::type::type +KOKKOS_INLINE_FUNCTION constexpr typename std::enable_if_t< + !Kokkos::is_reducer<std::decay_t<T>>::value, + _wrap_with_kokkos_sum<Space, std::decay_t<T>>>::type _make_reducer_from_arg(T&& arg_scalar) noexcept { return - typename _wrap_with_kokkos_sum<Space, typename std::decay<T>::type>::type{ - arg_scalar}; + typename _wrap_with_kokkos_sum<Space, std::decay_t<T>>::type{arg_scalar}; } // This can't be an alias template because GCC doesn't know how to mangle @@ -533,6 +509,8 @@ KOKKOS_INLINE_FUNCTION constexpr auto make_wrapped_combined_functor( //---------------------------------------- } +template <typename FunctorType> +using functor_has_value_t = typename FunctorType::value_type; } // end namespace Impl //============================================================================== @@ -546,9 +524,8 @@ template <class PolicyType, class Functor, class ReturnType1, class ReturnType2, auto parallel_reduce(std::string const& label, PolicyType const& policy, Functor const& functor, ReturnType1&& returnType1, ReturnType2&& returnType2, - ReturnTypes&&... returnTypes) noexcept -> - typename std::enable_if< - Kokkos::is_execution_policy<PolicyType>::value>::type { + ReturnTypes&&... returnTypes) noexcept + -> std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value> { //---------------------------------------- // Since we don't support asynchronous combined reducers yet for various // reasons, we actually just want to work with the pointers and references @@ -570,7 +547,7 @@ auto parallel_reduce(std::string const& label, PolicyType const& policy, using combined_functor_type = decltype(combined_functor); static_assert( - Impl::FunctorDeclaresValueType<combined_functor_type, void>::value, + is_detected<Impl::functor_has_value_t, combined_functor_type>::value, "value_type not properly detected"); using reduce_adaptor_t = Impl::ParallelReduceAdaptor<PolicyType, combined_functor_type, @@ -584,9 +561,12 @@ auto parallel_reduce(std::string const& label, PolicyType const& policy, "Kokkos::parallel_reduce: fence due to result being value, not view", combined_reducer); combined_reducer.write_value_back_to_original_references( - value, Impl::_make_reducer_from_arg<space_type>(returnType1), + policy.space(), value, + Impl::_make_reducer_from_arg<space_type>(returnType1), Impl::_make_reducer_from_arg<space_type>(returnType2), Impl::_make_reducer_from_arg<space_type>(returnTypes)...); + policy.space().fence( + "Kokkos::parallel_reduce: fence after copying values back"); //---------------------------------------- } @@ -594,9 +574,8 @@ template <class PolicyType, class Functor, class ReturnType1, class ReturnType2, class... ReturnTypes> auto parallel_reduce(PolicyType const& policy, Functor const& functor, ReturnType1&& returnType1, ReturnType2&& returnType2, - ReturnTypes&&... returnTypes) noexcept -> - typename std::enable_if< - Kokkos::is_execution_policy<PolicyType>::value>::type { + ReturnTypes&&... returnTypes) noexcept + -> std::enable_if_t<Kokkos::is_execution_policy<PolicyType>::value> { //---------------------------------------- Kokkos::parallel_reduce("", policy, functor, std::forward<ReturnType1>(returnType1), diff --git a/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.cpp b/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.cpp index edaae0fd47750bda2b0218fe91b2eb0a3ba51330..ca56352f4e8fd45228185115a23f3cad7cbcaeee 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.cpp @@ -41,24 +41,28 @@ // ************************************************************************ //@HEADER */ + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + +#include <impl/Kokkos_Command_Line_Parsing.hpp> +#include <impl/Kokkos_Error.hpp> + +#include <cstring> #include <iostream> +#include <regex> #include <string> #include <sstream> -#include <cstring> -#include <impl/Kokkos_Command_Line_Parsing.hpp> -/** Duplicates of Kokkos_Error.cpp/hpp, reproduced here - * for use in non-Kokkos applications - */ + namespace { -void traceback_callstack(std::ostream& msg) { - msg << std::endl << "Traceback functionality not available" << std::endl; -} -void throw_runtime_exception(const std::string& msg) { - std::ostringstream o; - o << msg; - traceback_callstack(o); - throw std::runtime_error(o.str()); -} + +auto const regex_true = std::regex( + "(yes|true|1)", std::regex_constants::icase | std::regex_constants::egrep); + +auto const regex_false = std::regex( + "(no|false|0)", std::regex_constants::icase | std::regex_constants::egrep); + } // namespace bool Kokkos::Impl::is_unsigned_int(const char* str) { @@ -85,49 +89,204 @@ bool Kokkos::Impl::check_arg(char const* arg, char const* expected) { return true; } -bool Kokkos::Impl::check_int_arg(char const* arg, char const* expected, - int* value) { - if (!check_arg(arg, expected)) return false; - std::size_t arg_len = std::strlen(arg); - std::size_t exp_len = std::strlen(expected); - bool okay = true; - if (arg_len == exp_len || arg[exp_len] != '=') okay = false; - char const* number = arg + exp_len + 1; - if (!Kokkos::Impl::is_unsigned_int(number) || strlen(number) == 0) - okay = false; - *value = std::stoi(number); - if (!okay) { - std::ostringstream ss; - ss << "Error: expecting an '=INT' after command line argument '" << expected - << "'"; - ss << ". Raised by Kokkos::initialize(int narg, char* argc[])."; - throw_runtime_exception(ss.str()); +bool Kokkos::Impl::check_env_bool(char const* name, bool& val) { + char const* var = std::getenv(name); + + if (!var) { + return false; + } + + if (std::regex_match(var, regex_true)) { + val = true; + return true; + } + + if (!std::regex_match(var, regex_false)) { + std::stringstream ss; + ss << "Error: cannot convert environment variable '" << name << "=" << var + << "' to a boolean." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); } + + val = false; return true; } -bool Kokkos::Impl::check_str_arg(char const* arg, char const* expected, - std::string& value) { - if (!check_arg(arg, expected)) return false; - std::size_t arg_len = std::strlen(arg); - std::size_t exp_len = std::strlen(expected); - bool okay = true; - if (arg_len == exp_len || arg[exp_len] != '=') okay = false; - char const* remain = arg + exp_len + 1; - value = remain; - if (!okay) { - std::ostringstream ss; - ss << "Error: expecting an '=STRING' after command line argument '" - << expected << "'"; - ss << ". Raised by Kokkos::initialize(int narg, char* argc[])."; - throw_runtime_exception(ss.str()); + +bool Kokkos::Impl::check_env_int(char const* name, int& val) { + char const* var = std::getenv(name); + + if (!var) { + return false; + } + + errno = 0; + char* var_end; + val = std::strtol(var, &var_end, 10); + + if (var == var_end) { + std::stringstream ss; + ss << "Error: cannot convert environment variable '" << name << '=' << var + << "' to an integer." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + + if (errno == ERANGE) { + std::stringstream ss; + ss << "Error: converted value for environment variable '" << name << '=' + << var << "' falls out of range." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + + return true; +} + +bool Kokkos::Impl::check_arg_bool(char const* arg, char const* name, + bool& val) { + auto const len = std::strlen(name); + if (std::strncmp(arg, name, len) != 0) { + return false; + } + auto const arg_len = strlen(arg); + if (arg_len == len) { + val = true; // --kokkos-foo without =BOOL interpreted as fool=true + return true; + } + if (arg_len <= len + 1 || arg[len] != '=') { + std::stringstream ss; + ss << "Error: command line argument '" << arg + << "' is not recognized as a valid boolean." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); } + + std::advance(arg, len + 1); + if (std::regex_match(arg, regex_true)) { + val = true; + return true; + } + if (!std::regex_match(arg, regex_false)) { + std::stringstream ss; + ss << "Error: cannot convert command line argument '" << name << "=" << arg + << "' to a boolean." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + val = false; + return true; +} + +bool Kokkos::Impl::check_arg_int(char const* arg, char const* name, int& val) { + auto const len = std::strlen(name); + if (std::strncmp(arg, name, len) != 0) { + return false; + } + auto const arg_len = strlen(arg); + if (arg_len <= len + 1 || arg[len] != '=') { + std::stringstream ss; + ss << "Error: command line argument '" << arg + << "' is not recognized as a valid integer." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + + std::advance(arg, len + 1); + + errno = 0; + char* arg_end; + val = std::strtol(arg, &arg_end, 10); + + if (arg == arg_end) { + std::stringstream ss; + ss << "Error: cannot convert command line argument '" << name << '=' << arg + << "' to an integer." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + + if (errno == ERANGE) { + std::stringstream ss; + ss << "Error: converted value for command line argument '" << name << '=' + << arg << "' falls out of range." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + + return true; +} + +bool Kokkos::Impl::check_arg_str(char const* arg, char const* name, + std::string& val) { + auto const len = std::strlen(name); + if (std::strncmp(arg, name, len) != 0) { + return false; + } + auto const arg_len = strlen(arg); + if (arg_len <= len + 1 || arg[len] != '=') { + std::stringstream ss; + ss << "Error: command line argument '" << arg + << "' is not recognized as a valid string." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + + std::advance(arg, len + 1); + + val = arg; return true; } -void Kokkos::Impl::warn_deprecated_command_line_argument(std::string deprecated, - std::string valid) { - std::cerr - << "Warning: command line argument '" << deprecated - << "' is deprecated. Use '" << valid - << "' instead. Raised by Kokkos::initialize(int narg, char* argc[])." - << std::endl; + +void Kokkos::Impl::warn_deprecated_environment_variable( + std::string deprecated) { + std::cerr << "Warning: environment variable '" << deprecated + << "' is deprecated." + << " Raised by Kokkos::initialize()." << std::endl; +} + +void Kokkos::Impl::warn_deprecated_environment_variable( + std::string deprecated, std::string use_instead) { + std::cerr << "Warning: environment variable '" << deprecated + << "' is deprecated." + << " Use '" << use_instead << "' instead." + << " Raised by Kokkos::initialize()." << std::endl; +} + +void Kokkos::Impl::warn_deprecated_command_line_argument( + std::string deprecated) { + std::cerr << "Warning: command line argument '" << deprecated + << "' is deprecated." + << " Raised by Kokkos::initialize()." << std::endl; +} + +void Kokkos::Impl::warn_deprecated_command_line_argument( + std::string deprecated, std::string use_instead) { + std::cerr << "Warning: command line argument '" << deprecated + << "' is deprecated." + << " Use '" << use_instead << "' instead." + << " Raised by Kokkos::initialize()." << std::endl; +} + +namespace { +std::vector<std::regex> do_not_warn_regular_expressions{ + std::regex{"--kokkos-tool.*", std::regex::egrep}, +}; +} + +void Kokkos::Impl::do_not_warn_not_recognized_command_line_argument( + std::regex ignore) { + do_not_warn_regular_expressions.push_back(std::move(ignore)); +} + +void Kokkos::Impl::warn_not_recognized_command_line_argument( + std::string not_recognized) { + for (auto const& ignore : do_not_warn_regular_expressions) { + if (std::regex_match(not_recognized, ignore)) { + return; + } + } + std::cerr << "Warning: command line argument '" << not_recognized + << "' is not recognized." + << " Raised by Kokkos::initialize()." << std::endl; } diff --git a/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.hpp b/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.hpp index 7e1d3049e3668b1ce812d1738bd43968930402fd..b22bc3e342812abf546dd68960f5333791779dbb 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.hpp @@ -46,17 +46,25 @@ #define KOKKOS_COMMAND_LINE_PARSING_HPP #include <string> -#include <iosfwd> +#include <regex> namespace Kokkos { namespace Impl { bool is_unsigned_int(const char* str); bool check_arg(char const* arg, char const* expected); -// void throw_runtime_exception(const std::string& msg); -bool check_int_arg(char const* arg, char const* expected, int* value); -bool check_str_arg(char const* arg, char const* expected, std::string& value); +bool check_arg_bool(char const* arg, char const* name, bool& val); +bool check_arg_int(char const* arg, char const* name, int& val); +bool check_arg_str(char const* arg, char const* name, std::string& val); +bool check_env_bool(char const* name, bool& val); +bool check_env_int(char const* name, int& val); +void warn_deprecated_environment_variable(std::string deprecated); +void warn_deprecated_environment_variable(std::string deprecated, + std::string use_instead); +void warn_deprecated_command_line_argument(std::string deprecated); void warn_deprecated_command_line_argument(std::string deprecated, - std::string valid); + std::string use_instead); +void warn_not_recognized_command_line_argument(std::string not_recognized); +void do_not_warn_not_recognized_command_line_argument(std::regex ignore); } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp index 0a3b649fe98aac76b3240162f07e5b58b01c7fbb..f624e7a14cb21b4a395898125536ec9b55bfeaae 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp @@ -42,10 +42,18 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> #include <impl/Kokkos_Error.hpp> -#include <impl/Kokkos_ExecSpaceInitializer.hpp> #include <impl/Kokkos_Command_Line_Parsing.hpp> +#include <impl/Kokkos_ParseCommandLineArgumentsAndEnvironmentVariables.hpp> +#include <impl/Kokkos_DeviceManagement.hpp> +#include <impl/Kokkos_ExecSpaceManager.hpp> + +#include <algorithm> #include <cctype> #include <cstring> #include <iostream> @@ -55,6 +63,7 @@ #include <functional> #include <list> #include <cerrno> +#include <random> #include <regex> #ifndef _WIN32 #include <unistd.h> @@ -65,6 +74,7 @@ //---------------------------------------------------------------------------- namespace { bool g_is_initialized = false; +bool g_is_finalized = false; bool g_show_warnings = true; bool g_tune_internals = false; // When compiling with clang/LLVM and using the GNU (GCC) C++ Standard Library @@ -78,10 +88,7 @@ bool g_tune_internals = false; // segmented array. using hook_function_type = std::function<void()>; std::stack<hook_function_type, std::list<hook_function_type>> finalize_hooks; -} // namespace -namespace Kokkos { -namespace Impl { /** * The category is only used in printing, tools * get all metadata free of category @@ -100,50 +107,129 @@ void declare_configuration_metadata(const std::string& category, metadata_map[category][key] = value; } -ExecSpaceManager& ExecSpaceManager::get_instance() { +void combine(Kokkos::InitializationSettings& out, + Kokkos::InitializationSettings const& in) { +#define KOKKOS_IMPL_COMBINE_SETTING(NAME) \ + if (in.has_##NAME()) { \ + out.set_##NAME(in.get_##NAME()); \ + } \ + static_assert(true, "no-op to require trailing semicolon") + KOKKOS_IMPL_COMBINE_SETTING(num_threads); + KOKKOS_IMPL_COMBINE_SETTING(map_device_id_by); + KOKKOS_IMPL_COMBINE_SETTING(device_id); + KOKKOS_IMPL_COMBINE_SETTING(num_devices); + KOKKOS_IMPL_COMBINE_SETTING(skip_device); + KOKKOS_IMPL_COMBINE_SETTING(disable_warnings); + KOKKOS_IMPL_COMBINE_SETTING(tune_internals); + KOKKOS_IMPL_COMBINE_SETTING(tools_help); + KOKKOS_IMPL_COMBINE_SETTING(tools_libs); + KOKKOS_IMPL_COMBINE_SETTING(tools_args); +#undef KOKKOS_IMPL_COMBINE_SETTING +} + +void combine(Kokkos::InitializationSettings& out, + Kokkos::Tools::InitArguments const& in) { + using Kokkos::Tools::InitArguments; + if (in.help != InitArguments::PossiblyUnsetOption::unset) { + out.set_tools_help(in.help == InitArguments::PossiblyUnsetOption::on); + } + if (in.lib != InitArguments::unset_string_option) { + out.set_tools_libs(in.lib); + } + if (in.args != InitArguments::unset_string_option) { + out.set_tools_args(in.args); + } +} + +void combine(Kokkos::Tools::InitArguments& out, + Kokkos::InitializationSettings const& in) { + using Kokkos::Tools::InitArguments; + if (in.has_tools_help()) { + out.help = in.get_tools_help() ? InitArguments::PossiblyUnsetOption::on + : InitArguments::PossiblyUnsetOption::off; + } + if (in.has_tools_libs()) { + out.lib = in.get_tools_libs(); + } + if (in.has_tools_args()) { + out.args = in.get_tools_args(); + } +} + +int get_device_count() { +#if defined(KOKKOS_ENABLE_CUDA) + return Kokkos::Cuda::detect_device_count(); +#elif defined(KOKKOS_ENABLE_HIP) + return Kokkos::Experimental::HIP::detect_device_count(); +#elif defined(KOKKOS_ENABLE_SYCL) + return sycl::device::get_devices(sycl::info::device_type::gpu).size(); +#elif defined(KOKKOS_ENABLE_OPENACC) + return acc_get_num_devices( + Kokkos::Experimental::Impl::OpenACC_Traits::dev_type); +#else + Kokkos::abort("implementation bug"); + return -1; +#endif +} + +unsigned get_process_id() { +#ifdef _WIN32 + return unsigned(GetCurrentProcessId()); +#else + return unsigned(getpid()); +#endif +} + +bool is_valid_num_threads(int x) { return x > 0; } + +bool is_valid_device_id(int x) { return x >= 0; } + +bool is_valid_map_device_id_by(std::string const& x) { + return x == "mpi_rank" || x == "random"; +} + +} // namespace + +Kokkos::Impl::ExecSpaceManager& Kokkos::Impl::ExecSpaceManager::get_instance() { static ExecSpaceManager space_initializer = {}; return space_initializer; } -void ExecSpaceManager::register_space_factory( - const std::string name, std::unique_ptr<ExecSpaceInitializerBase> space) { +void Kokkos::Impl::ExecSpaceManager::register_space_factory( + const std::string name, std::unique_ptr<ExecSpaceBase> space) { exec_space_factory_list[name] = std::move(space); } -void ExecSpaceManager::initialize_spaces(const Kokkos::InitArguments& args) { +void Kokkos::Impl::ExecSpaceManager::initialize_spaces( + const InitializationSettings& settings) { // Note: the names of the execution spaces, used as keys in the map, encode - // the ordering of the initialization code from the old initializtion stuff. + // the ordering of the initialization code from the old initialization stuff. // Eventually, we may want to do something less brittle than this, but for now // we're just preserving compatibility with the old implementation. for (auto& to_init : exec_space_factory_list) { - to_init.second->initialize(args); + to_init.second->initialize(settings); } } -void ExecSpaceManager::finalize_spaces(const bool all_spaces) { +void Kokkos::Impl::ExecSpaceManager::finalize_spaces() { for (auto& to_finalize : exec_space_factory_list) { - to_finalize.second->finalize(all_spaces); + to_finalize.second->finalize(); } } -void ExecSpaceManager::static_fence() { - for (auto& to_fence : exec_space_factory_list) { - to_fence.second->fence(); - } -} -void ExecSpaceManager::static_fence(const std::string& name) { +void Kokkos::Impl::ExecSpaceManager::static_fence(const std::string& name) { for (auto& to_fence : exec_space_factory_list) { - to_fence.second->fence(name); + to_fence.second->static_fence(name); } } -void ExecSpaceManager::print_configuration(std::ostream& msg, - const bool detail) { - for (auto& to_print : exec_space_factory_list) { - to_print.second->print_configuration(msg, detail); +void Kokkos::Impl::ExecSpaceManager::print_configuration(std::ostream& os, + bool verbose) { + for (auto const& to_print : exec_space_factory_list) { + to_print.second->print_configuration(os, verbose); } } -int get_ctest_gpu(const char* local_rank_str) { +int Kokkos::Impl::get_ctest_gpu(const char* local_rank_str) { auto const* ctest_kokkos_device_type = std::getenv("CTEST_KOKKOS_DEVICE_TYPE"); if (!ctest_kokkos_device_type) { @@ -232,61 +318,142 @@ int get_ctest_gpu(const char* local_rank_str) { return std::stoi(id.c_str()); } -// function to extract gpu # from args -int get_gpu(const InitArguments& args) { - int use_gpu = args.device_id; - const int ndevices = [](int num_devices) -> int { - if (num_devices > 0) return num_devices; -#if defined(KOKKOS_ENABLE_CUDA) - return Cuda::detect_device_count(); -#elif defined(KOKKOS_ENABLE_HIP) - return Experimental::HIP::detect_device_count(); -#elif defined(KOKKOS_ENABLE_SYCL) - return sycl::device::get_devices(sycl::info::device_type::gpu).size(); -#else - return num_devices; -#endif - }(args.ndevices); - const int skip_device = args.skip_device; - - // if the exact device is not set, but ndevices was given, assign round-robin - // using on-node MPI rank - if (use_gpu < 0) { - auto const* local_rank_str = - std::getenv("OMPI_COMM_WORLD_LOCAL_RANK"); // OpenMPI - if (!local_rank_str) - local_rank_str = std::getenv("MV2_COMM_WORLD_LOCAL_RANK"); // MVAPICH2 - if (!local_rank_str) - local_rank_str = std::getenv("SLURM_LOCALID"); // SLURM - - auto const* ctest_kokkos_device_type = - std::getenv("CTEST_KOKKOS_DEVICE_TYPE"); // CTest - auto const* ctest_resource_group_count_str = - std::getenv("CTEST_RESOURCE_GROUP_COUNT"); // CTest - if (ctest_kokkos_device_type && ctest_resource_group_count_str && - local_rank_str) { - // Use the device assigned by CTest - use_gpu = get_ctest_gpu(local_rank_str); - } else if (ndevices > 0) { - // Use the device assigned by the rank - if (local_rank_str) { - auto local_rank = std::stoi(local_rank_str); - use_gpu = local_rank % ndevices; - } else { - // user only gave use ndevices, but the MPI environment variable wasn't - // set. start with GPU 0 at this point - use_gpu = 0; +std::vector<int> Kokkos::Impl::get_visible_devices( + Kokkos::InitializationSettings const& settings, int device_count) { + std::vector<int> visible_devices; + char* env_visible_devices = std::getenv("KOKKOS_VISIBLE_DEVICES"); + if (env_visible_devices) { + std::stringstream ss(env_visible_devices); + for (int i; ss >> i;) { + visible_devices.push_back(i); + if (ss.peek() == ',') ss.ignore(); + } + for (auto id : visible_devices) { + if (id < 0) { + ss << "Error: Invalid device id '" << id + << "' in environment variable 'KOKKOS_VISIBLE_DEVICES=" + << env_visible_devices << "'." + << " Device id cannot be negative!" + << " Raised by Kokkos::initialize().\n"; + } + if (id >= device_count) { + ss << "Error: Invalid device id '" << id + << "' in environment variable 'KOKKOS_VISIBLE_DEVICES=" + << env_visible_devices << "'." + << " Device id must be smaller than the number of GPUs available" + << " for execution '" << device_count << "'!" + << " Raised by Kokkos::initialize().\n"; + } + } + } else { + int num_devices = + settings.has_num_devices() ? settings.get_num_devices() : device_count; + if (num_devices > device_count) { + std::stringstream ss; + ss << "Error: Specified number of devices '" << num_devices + << "' exceeds the actual number of GPUs available for execution '" + << device_count << "'." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + for (int i = 0; i < num_devices; ++i) { + visible_devices.push_back(i); + } + if (settings.has_skip_device()) { + if (visible_devices.size() == 1 && settings.get_skip_device() == 0) { + Kokkos::abort( + "Error: skipping the only GPU available for execution.\n" + " Raised by Kokkos::initialize().\n"); } + visible_devices.erase( + std::remove(visible_devices.begin(), visible_devices.end(), + settings.get_skip_device()), + visible_devices.end()); + } + } + if (visible_devices.empty()) { + Kokkos::abort( + "Error: no GPU available for execution.\n" + " Raised by Kokkos::initialize().\n"); + } + return visible_devices; +} + +int Kokkos::Impl::get_gpu(const InitializationSettings& settings) { + std::vector<int> visible_devices = + get_visible_devices(settings, get_device_count()); + int const num_devices = visible_devices.size(); + // device_id is provided + if (settings.has_device_id()) { + int const id = settings.get_device_id(); + if (id < 0) { + std::stringstream ss; + ss << "Error: Requested GPU with invalid id '" << id << "'." + << " Device id cannot be negative!" + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + if (id >= num_devices) { + std::stringstream ss; + ss << "Error: Requested GPU with id '" << id << "' but only " + << num_devices << "GPU(s) available!" + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); } - // shift assignments over by one so no one is assigned to "skip_device" - if (use_gpu >= skip_device) ++use_gpu; + return visible_devices[settings.get_device_id()]; } - return use_gpu; + + // either random or round-robin assignment based on local MPI rank + if (settings.has_map_device_id_by() && + !is_valid_map_device_id_by(settings.get_map_device_id_by())) { + std::stringstream ss; + ss << "Error: map_device_id_by setting '" << settings.get_map_device_id_by() + << "' is not recognized." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + + if (settings.has_map_device_id_by() && + settings.get_map_device_id_by() == "random") { + std::default_random_engine gen(get_process_id()); + std::uniform_int_distribution<int> distribution(0, num_devices - 1); + return visible_devices[distribution(gen)]; + } + + // either map_device_id_by is not specified or it is mpi_rank + if (settings.has_map_device_id_by() && + settings.get_map_device_id_by() != "mpi_rank") { + Kokkos::abort("implementation bug"); + } + + auto const* local_rank_str = + std::getenv("OMPI_COMM_WORLD_LOCAL_RANK"); // OpenMPI + if (!local_rank_str) + local_rank_str = std::getenv("MV2_COMM_WORLD_LOCAL_RANK"); // MVAPICH2 + if (!local_rank_str) local_rank_str = std::getenv("SLURM_LOCALID"); // SLURM + + // use first GPU available for execution if unable to detect local MPI rank + if (!local_rank_str) { + if (settings.has_map_device_id_by()) { + std::cerr << "Warning: unable to detect local MPI rank." + << " Falling back to the first GPU available for execution." + << " Raised by Kokkos::initialize()." << std::endl; + } + return visible_devices[0]; + } + + // use device assigned by CTest when resource allocation is activated + if (std::getenv("CTEST_KOKKOS_DEVICE_TYPE") && + std::getenv("CTEST_RESOURCE_GROUP_COUNT")) { + return get_ctest_gpu(local_rank_str); + } + + return visible_devices[std::stoi(local_rank_str) % visible_devices.size()]; } namespace { -void initialize_backends(const InitArguments& args) { +void initialize_backends(const Kokkos::InitializationSettings& settings) { // This is an experimental setting // For KNL in Flat mode this variable should be set, so that // memkind allocates high bandwidth memory correctly. @@ -294,10 +461,10 @@ void initialize_backends(const InitArguments& args) { setenv("MEMKIND_HBW_NODES", "1", 0); #endif - Impl::ExecSpaceManager::get_instance().initialize_spaces(args); + Kokkos::Impl::ExecSpaceManager::get_instance().initialize_spaces(settings); } -void initialize_profiling(const Tools::InitArguments& args) { +void initialize_profiling(const Kokkos::Tools::InitArguments& args) { auto initialization_status = Kokkos::Tools::Impl::initialize_tools_subsystem(args); if (initialization_status.result == @@ -310,7 +477,7 @@ void initialize_profiling(const Tools::InitArguments& args) { Kokkos::Tools::Impl::InitializationStatus::InitializationResult:: success) { Kokkos::Tools::parseArgs(args.args); - for (const auto& category_value : Kokkos::Impl::metadata_map) { + for (const auto& category_value : metadata_map) { for (const auto& key_value : category_value.second) { Kokkos::Tools::declareMetadata(key_value.first, key_value.second); } @@ -329,9 +496,12 @@ std::string version_string_from_int(int version_number) { << "." << version_number % 100; return str_builder.str(); } -void pre_initialize_internal(const InitArguments& args) { - if (args.disable_warnings) g_show_warnings = false; - if (args.tune_internals) g_tune_internals = true; + +void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { + if (settings.has_disable_warnings() && settings.get_disable_warnings()) + g_show_warnings = false; + if (settings.has_tune_internals() && settings.get_tune_internals()) + g_tune_internals = true; declare_configuration_metadata("version_info", "Kokkos Version", version_string_from_int(KOKKOS_VERSION)); #ifdef KOKKOS_COMPILER_APPLECC @@ -379,26 +549,6 @@ void pre_initialize_internal(const InitArguments& args) { std::to_string(KOKKOS_COMPILER_MSVC)); declare_configuration_metadata("tools_only", "compiler_family", "msvc"); #endif -#ifdef KOKKOS_ENABLE_ISA_KNC - declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_KNC", - "yes"); -#else - declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_KNC", "no"); -#endif -#ifdef KOKKOS_ENABLE_ISA_POWERPCLE - declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_POWERPCLE", - "yes"); -#else - declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_POWERPCLE", - "no"); -#endif -#ifdef KOKKOS_ENABLE_ISA_X86_64 - declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_X86_64", - "yes"); -#else - declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_X86_64", - "no"); -#endif #ifdef KOKKOS_ENABLE_GNU_ATOMICS declare_configuration_metadata("atomics", "KOKKOS_ENABLE_GNU_ATOMICS", "yes"); @@ -468,13 +618,6 @@ void pre_initialize_internal(const InitArguments& args) { declare_configuration_metadata("memory", "KOKKOS_ENABLE_INTEL_MM_ALLOC", "no"); #endif -#ifdef KOKKOS_ENABLE_POSIX_MEMALIGN - declare_configuration_metadata("memory", "KOKKOS_ENABLE_POSIX_MEMALIGN", - "yes"); -#else - declare_configuration_metadata("memory", "KOKKOS_ENABLE_POSIX_MEMALIGN", - "no"); -#endif #ifdef KOKKOS_ENABLE_ASM declare_configuration_metadata("options", "KOKKOS_ENABLE_ASM", "yes"); @@ -512,23 +655,34 @@ void pre_initialize_internal(const InitArguments& args) { declare_configuration_metadata("options", "KOKKOS_ENABLE_LIBRT", "yes"); #else declare_configuration_metadata("options", "KOKKOS_ENABLE_LIBRT", "no"); +#endif +#ifdef KOKKOS_ENABLE_LIBDL + declare_configuration_metadata("options", "KOKKOS_ENABLE_LIBDL", "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_LIBDL", "no"); #endif declare_configuration_metadata("architecture", "Default Device", typeid(Kokkos::DefaultExecutionSpace).name()); } -void post_initialize_internal(const InitArguments& args) { - initialize_profiling(args.impl_get_tools_init_arguments()); +void post_initialize_internal(const Kokkos::InitializationSettings& settings) { + Kokkos::Tools::InitArguments tools_init_arguments; + combine(tools_init_arguments, settings); + initialize_profiling(tools_init_arguments); g_is_initialized = true; + if (settings.has_print_configuration() && + settings.get_print_configuration()) { + ::Kokkos::print_configuration(std::cout); + } } -void initialize_internal(const InitArguments& args) { - pre_initialize_internal(args); - initialize_backends(args); - post_initialize_internal(args); +void initialize_internal(const Kokkos::InitializationSettings& settings) { + pre_initialize_internal(settings); + initialize_backends(settings); + post_initialize_internal(settings); } -void finalize_internal(const bool all_spaces = false) { +void finalize_internal() { typename decltype(finalize_hooks)::size_type numSuccessfulCalls = 0; while (!finalize_hooks.empty()) { auto f = finalize_hooks.top(); @@ -558,124 +712,168 @@ void finalize_internal(const bool all_spaces = false) { Kokkos::Profiling::finalize(); - Impl::ExecSpaceManager::get_instance().finalize_spaces(all_spaces); + Kokkos::Impl::ExecSpaceManager::get_instance().finalize_spaces(); g_is_initialized = false; + g_is_finalized = true; g_show_warnings = true; g_tune_internals = false; } void fence_internal(const std::string& name) { - Impl::ExecSpaceManager::get_instance().static_fence(name); + Kokkos::Impl::ExecSpaceManager::get_instance().static_fence(name); } -unsigned get_process_id() { -#ifdef _WIN32 - return unsigned(GetCurrentProcessId()); -#else - return unsigned(getpid()); -#endif +void print_help_message() { + auto const help_message = R"( +-------------------------------------------------------------------------------- +-------------Kokkos command line arguments-------------------------------------- +-------------------------------------------------------------------------------- +This program is using Kokkos. You can use the following command line flags to +control its behavior: + +Kokkos Core Options: + --kokkos-help : print this message + --kokkos-disable-warnings : disable kokkos warning messages + --kokkos-print-configuration : print configuration + --kokkos-tune-internals : allow Kokkos to autotune policies and declare + tuning features through the tuning system. If + left off, Kokkos uses heuristics + --kokkos-num-threads=INT : specify total number of threads to use for + parallel regions on the host. + --kokkos-device-id=INT : specify device id to be used by Kokkos. + --kokkos-map-device-id-by=(random|mpi_rank) + : strategy to select device-id automatically from + available devices. + - random: choose a random device from available. + - mpi_rank: choose device-id based on a round robin + assignment of local MPI ranks. + Works with OpenMPI, MVAPICH, SLURM, and + derived implementations. + +Kokkos Tools Options: + --kokkos-tools-libs=STR : Specify which of the tools to use. Must either + be full path to library or name of library if the + path is present in the runtime library search path + (e.g. LD_LIBRARY_PATH) + --kokkos-tools-help : Query the (loaded) kokkos-tool for its command-line + option support (which should then be passed via + --kokkos-tools-args="...") + --kokkos-tools-args=STR : A single (quoted) string of options which will be + whitespace delimited and passed to the loaded + kokkos-tool as command-line arguments. E.g. + `<EXE> --kokkos-tools-args="-c input.txt"` will + pass `<EXE> -c input.txt` as argc/argv to tool + +Except for --kokkos[-tools]-help, you can alternatively set the corresponding +environment variable of a flag (all letters in upper-case and underscores +instead of hyphens). For example, to disable warning messages, you can either +specify --kokkos-disable-warnings or set the KOKKOS_DISABLE_WARNINGS +environment variable to yes. + +Join us on Slack, visit https://kokkosteam.slack.com +Report bugs to https://github.com/kokkos/kokkos/issues +-------------------------------------------------------------------------------- +)"; + std::cout << help_message << std::endl; } -void parse_command_line_arguments(int& narg, char* arg[], - InitArguments& arguments) { - auto& num_threads = arguments.num_threads; - auto& numa = arguments.num_numa; - auto& device = arguments.device_id; - auto& ndevices = arguments.ndevices; - auto& skip_device = arguments.skip_device; - auto& disable_warnings = arguments.disable_warnings; - auto& tune_internals = arguments.tune_internals; - auto& tool_help = arguments.tool_help; - auto& tool_args = arguments.tool_args; - auto& tool_lib = arguments.tool_lib; - - bool kokkos_threads_found = false; - bool kokkos_numa_found = false; - bool kokkos_device_found = false; - bool kokkos_ndevices_found = false; - auto tools_init_arguments = arguments.impl_get_tools_init_arguments(); - Tools::Impl::parse_command_line_arguments(narg, arg, tools_init_arguments); - if (tools_init_arguments.tune_internals != - Kokkos::Tools::InitArguments::PossiblyUnsetOption::unset) { - tune_internals = (tools_init_arguments.tune_internals == - Kokkos::Tools::InitArguments::PossiblyUnsetOption::on); - } - if (tools_init_arguments.help != - Kokkos::Tools::InitArguments::PossiblyUnsetOption::unset) { - tool_help = (tools_init_arguments.help == - Kokkos::Tools::InitArguments::PossiblyUnsetOption::on); - } - if (tools_init_arguments.lib != - Kokkos::Tools::InitArguments::unset_string_option) { - tool_lib = tools_init_arguments.lib; - } - if (tools_init_arguments.args != - Kokkos::Tools::InitArguments::unset_string_option) { - tool_args = tools_init_arguments.args; - } +} // namespace - int iarg = 0; +void Kokkos::Impl::parse_command_line_arguments( + int& argc, char* argv[], InitializationSettings& settings) { + Tools::InitArguments tools_init_arguments; + combine(tools_init_arguments, settings); + Tools::Impl::parse_command_line_arguments(argc, argv, tools_init_arguments); + combine(settings, tools_init_arguments); + + int num_threads; + int device_id; + int num_devices; // deprecated + int skip_device; // deprecated + std::string map_device_id_by; + bool disable_warnings; + bool print_configuration; + bool tune_internals; - while (iarg < narg) { - if (check_int_arg(arg[iarg], "--kokkos-threads", &num_threads)) { - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; + auto get_flag = [](std::string s) -> std::string { + return s.erase(s.find('=')); + }; + + bool help_flag = false; + + int iarg = 0; + while (iarg < argc) { + bool remove_flag = false; + + if (check_arg(argv[iarg], "--kokkos-numa") || + check_arg(argv[iarg], "--numa")) { + warn_deprecated_command_line_argument(get_flag(argv[iarg])); + // remove flag if prefixed with '--kokkos-' + remove_flag = std::string(argv[iarg]).find("--kokkos-") == 0; + } else if (check_arg_int(argv[iarg], "--kokkos-num-threads", num_threads) || + check_arg_int(argv[iarg], "--num-threads", num_threads) || + check_arg_int(argv[iarg], "--kokkos-threads", num_threads) || + check_arg_int(argv[iarg], "--threads", num_threads)) { + if (get_flag(argv[iarg]) != "--kokkos-num-threads") { + warn_deprecated_command_line_argument(get_flag(argv[iarg]), + "--kokkos-num-threads"); } - kokkos_threads_found = true; - narg--; - } else if (!kokkos_threads_found && - check_int_arg(arg[iarg], "--threads", &num_threads)) { - iarg++; - } else if (check_int_arg(arg[iarg], "--kokkos-numa", &numa)) { - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; + if (!is_valid_num_threads(num_threads)) { + std::stringstream ss; + ss << "Error: command line argument '" << argv[iarg] << "' is invalid." + << " The number of threads must be greater than or equal to one." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); } - kokkos_numa_found = true; - narg--; - } else if (!kokkos_numa_found && - check_int_arg(arg[iarg], "--numa", &numa)) { - iarg++; - } else if (check_int_arg(arg[iarg], "--kokkos-device-id", &device) || - check_int_arg(arg[iarg], "--kokkos-device", &device)) { - if (check_arg(arg[iarg], "--kokkos-device")) { - warn_deprecated_command_line_argument("--kokkos-device", + settings.set_num_threads(num_threads); + remove_flag = std::string(argv[iarg]).find("--kokkos-") == 0; + } else if (check_arg_int(argv[iarg], "--kokkos-device-id", device_id) || + check_arg_int(argv[iarg], "--device-id", device_id) || + check_arg_int(argv[iarg], "--kokkos-device", device_id) || + check_arg_int(argv[iarg], "--device", device_id)) { + if (get_flag(argv[iarg]) != "--kokkos-device-id") { + warn_deprecated_command_line_argument(get_flag(argv[iarg]), "--kokkos-device-id"); } - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; + if (!is_valid_device_id(device_id)) { + std::stringstream ss; + ss << "Error: command line argument '" << argv[iarg] << "' is invalid." + << " The device id must be greater than or equal to zero." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); } - kokkos_device_found = true; - narg--; - } else if (!kokkos_device_found && - (check_int_arg(arg[iarg], "--device-id", &device) || - check_int_arg(arg[iarg], "--device", &device))) { - if (check_arg(arg[iarg], "--device")) { - warn_deprecated_command_line_argument("--device", "--device-id"); + settings.set_device_id(device_id); + remove_flag = std::string(argv[iarg]).find("--kokkos-") == 0; + } else if (check_arg(argv[iarg], "--kokkos-num-devices") || + check_arg(argv[iarg], "--num-devices") || + check_arg(argv[iarg], "--kokkos-ndevices") || + check_arg(argv[iarg], "--ndevices")) { + if (check_arg(argv[iarg], "--num-devices")) { + warn_deprecated_command_line_argument("--num-devices", + "--kokkos-num-devices"); } - iarg++; - } else if (check_arg(arg[iarg], "--kokkos-num-devices") || - check_arg(arg[iarg], "--num-devices") || - check_arg(arg[iarg], "--kokkos-ndevices") || - check_arg(arg[iarg], "--ndevices")) { - if (check_arg(arg[iarg], "--ndevices")) { - warn_deprecated_command_line_argument("--ndevices", "--num-devices"); + if (check_arg(argv[iarg], "--ndevices")) { + warn_deprecated_command_line_argument("--ndevices", + "--kokkos-num-devices"); } - if (check_arg(arg[iarg], "--kokkos-ndevices")) { + if (check_arg(argv[iarg], "--kokkos-ndevices")) { warn_deprecated_command_line_argument("--kokkos-ndevices", "--kokkos-num-devices"); } + warn_deprecated_command_line_argument( + "--kokkos-num-devices", "--kokkos-map-device-id-by=mpi_rank"); // Find the number of device (expecting --device=XX) - if (!((strncmp(arg[iarg], "--kokkos-num-devices=", 21) == 0) || - (strncmp(arg[iarg], "--num-ndevices=", 14) == 0) || - (strncmp(arg[iarg], "--kokkos-ndevices=", 18) == 0) || - (strncmp(arg[iarg], "--ndevices=", 11) == 0))) + if (!((strncmp(argv[iarg], "--kokkos-num-devices=", 21) == 0) || + (strncmp(argv[iarg], "--num-devices=", 14) == 0) || + (strncmp(argv[iarg], "--kokkos-ndevices=", 18) == 0) || + (strncmp(argv[iarg], "--ndevices=", 11) == 0))) throw_runtime_exception( "Error: expecting an '=INT[,INT]' after command line argument " - "'--num-devices/--kokkos-num-devices'. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); + "'--kokkos-num-devices'." + " Raised by Kokkos::initialize()."); - char* num1 = strchr(arg[iarg], '=') + 1; + char* num1 = strchr(argv[iarg], '=') + 1; char* num2 = strpbrk(num1, ","); int num1_len = num2 == nullptr ? strlen(num1) : num2 - num1; char* num1_only = new char[num1_len + 1]; @@ -685,400 +883,269 @@ void parse_command_line_arguments(int& narg, char* arg[], if (!is_unsigned_int(num1_only) || (strlen(num1_only) == 0)) { throw_runtime_exception( "Error: expecting an integer number after command line argument " - "'--kokkos-numdevices'. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); + "'--kokkos-num-devices'." + " Raised by Kokkos::initialize()."); + } + if (check_arg(argv[iarg], "--kokkos-num-devices") || + check_arg(argv[iarg], "--kokkos-ndevices")) { + num_devices = std::stoi(num1_only); + settings.set_num_devices(num_devices); + settings.set_map_device_id_by("mpi_rank"); } - if (check_arg(arg[iarg], "--kokkos-num-devices") || - check_arg(arg[iarg], "--kokkos-ndevices") || !kokkos_ndevices_found) - ndevices = std::stoi(num1_only); delete[] num1_only; if (num2 != nullptr) { if ((!is_unsigned_int(num2 + 1)) || (strlen(num2) == 1)) throw_runtime_exception( "Error: expecting an integer number after command line argument " - "'--kokkos-num-devices=XX,'. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); + "'--kokkos-num-devices=XX,'." + " Raised by Kokkos::initialize()."); - if (check_arg(arg[iarg], "--kokkos-num-devices") || - check_arg(arg[iarg], "--kokkos-ndevices") || !kokkos_ndevices_found) + if (check_arg(argv[iarg], "--kokkos-num-devices") || + check_arg(argv[iarg], "--kokkos-ndevices")) { skip_device = std::stoi(num2 + 1); - } - - // Remove the --kokkos-num-devices argument from the list but leave - // --num-devices - if (check_arg(arg[iarg], "--kokkos-num-devices") || - check_arg(arg[iarg], "--kokkos-ndevices")) { - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; + settings.set_skip_device(skip_device); } - kokkos_ndevices_found = true; - narg--; - } else { - iarg++; - } - } else if (check_arg(arg[iarg], "--kokkos-disable-warnings")) { - disable_warnings = true; - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; } - narg--; - } else if (check_arg(arg[iarg], "--kokkos-tune-internals")) { - tune_internals = true; - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; + remove_flag = std::string(argv[iarg]).find("--kokkos-") == 0; + } else if (check_arg_bool(argv[iarg], "--kokkos-disable-warnings", + disable_warnings)) { + settings.set_disable_warnings(disable_warnings); + remove_flag = true; + } else if (check_arg_bool(argv[iarg], "--kokkos-print-configuration", + print_configuration)) { + settings.set_print_configuration(print_configuration); + remove_flag = true; + } else if (check_arg_bool(argv[iarg], "--kokkos-tune-internals", + tune_internals)) { + settings.set_tune_internals(tune_internals); + remove_flag = true; + } else if (check_arg(argv[iarg], "--kokkos-help") || + check_arg(argv[iarg], "--help")) { + help_flag = true; + remove_flag = std::string(argv[iarg]).find("--kokkos-") == 0; + } else if (check_arg_str(argv[iarg], "--kokkos-map-device-id-by", + map_device_id_by)) { + if (!is_valid_map_device_id_by(map_device_id_by)) { + std::stringstream ss; + ss << "Warning: command line argument '--kokkos-map-device-id-by=" + << map_device_id_by << "' is not recognized." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); } - narg--; - } else if (check_arg(arg[iarg], "--kokkos-help") || - check_arg(arg[iarg], "--help")) { - auto const help_message = R"( - -------------------------------------------------------------------------------- - -------------Kokkos command line arguments-------------------------------------- - -------------------------------------------------------------------------------- - The following arguments exist also without prefix 'kokkos' (e.g. --help). - The prefixed arguments will be removed from the list by Kokkos::initialize(), - the non-prefixed ones are not removed. Prefixed versions take precedence over - non prefixed ones, and the last occurrence of an argument overwrites prior - settings. - - --kokkos-help : print this message - --kokkos-disable-warnings : disable kokkos warning messages - --kokkos-tune-internals : allow Kokkos to autotune policies and declare - tuning features through the tuning system. If - left off, Kokkos uses heuristics - --kokkos-threads=INT : specify total number of threads or - number of threads per NUMA region if - used in conjunction with '--numa' option. - --kokkos-numa=INT : specify number of NUMA regions used by process. - --kokkos-device-id=INT : specify device id to be used by Kokkos. - --kokkos-num-devices=INT[,INT] : used when running MPI jobs. Specify number of - devices per node to be used. Process to device - mapping happens by obtaining the local MPI rank - and assigning devices round-robin. The optional - second argument allows for an existing device - to be ignored. This is most useful on workstations - with multiple GPUs of which one is used to drive - screen output. - --kokkos-tools-library : Equivalent to KOKKOS_PROFILE_LIBRARY environment - variable. Must either be full path to library or - name of library if the path is present in the - runtime library search path (e.g. LD_LIBRARY_PATH) - --kokkos-tools-help : Query the (loaded) kokkos-tool for its command-line - option support (which should then be passed via - --kokkos-tools-args="...") - --kokkos-tools-args=STR : A single (quoted) string of options which will be - whitespace delimited and passed to the loaded - kokkos-tool as command-line arguments. E.g. - `<EXE> --kokkos-tools-args="-c input.txt"` will - pass `<EXE> -c input.txt` as argc/argv to tool - -------------------------------------------------------------------------------- -)"; - std::cout << help_message << std::endl; + settings.set_map_device_id_by(map_device_id_by); + remove_flag = true; + } else if (std::regex_match(argv[iarg], + std::regex("-?-kokkos.*", std::regex::egrep))) { + warn_not_recognized_command_line_argument(argv[iarg]); + } - // Remove the --kokkos-help argument from the list but leave --help - if (check_arg(arg[iarg], "--kokkos-help")) { - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; - } - narg--; - } else { - iarg++; + if (remove_flag) { + // Shift the remainder of the argv list by one. Note that argv has + // (argc + 1) arguments, the last one always being nullptr. The following + // loop moves the trailing nullptr element as well + for (int k = iarg; k < argc; ++k) { + argv[k] = argv[k + 1]; } - } else + argc--; + } else { iarg++; + } + } + + if (help_flag) { + print_help_message(); } + if ((tools_init_arguments.args == Kokkos::Tools::InitArguments::unset_string_option) && - narg > 0) - tool_args = arg[0]; + argc > 0) { + settings.set_tools_args(argv[0]); + } } -void parse_environment_variables(InitArguments& arguments) { - auto& num_threads = arguments.num_threads; - auto& numa = arguments.num_numa; - auto& device = arguments.device_id; - auto& ndevices = arguments.ndevices; - auto& skip_device = arguments.skip_device; - auto& disable_warnings = arguments.disable_warnings; - auto& tune_internals = arguments.tune_internals; - auto& tool_lib = arguments.tool_lib; - auto& tool_args = arguments.tool_args; - auto& tool_help = arguments.tool_help; - char* endptr; - - auto tools_init_arguments = arguments.impl_get_tools_init_arguments(); +void Kokkos::Impl::parse_environment_variables( + InitializationSettings& settings) { + Tools::InitArguments tools_init_arguments; + combine(tools_init_arguments, settings); auto init_result = Tools::Impl::parse_environment_variables(tools_init_arguments); - if (init_result.result == Kokkos::Tools::Impl::InitializationStatus:: - environment_argument_mismatch) { + if (init_result.result == + Tools::Impl::InitializationStatus::environment_argument_mismatch) { Impl::throw_runtime_exception(init_result.error_message); } + combine(settings, tools_init_arguments); - tool_lib = tools_init_arguments.lib; - - if (tools_init_arguments.tune_internals != - Kokkos::Tools::InitArguments::PossiblyUnsetOption::unset) { - tune_internals = (tools_init_arguments.tune_internals == - Kokkos::Tools::InitArguments::PossiblyUnsetOption::on) - ? true - : false; - } - if (tools_init_arguments.help != - Kokkos::Tools::InitArguments::PossiblyUnsetOption::unset) { - tool_help = (tools_init_arguments.help == - Kokkos::Tools::InitArguments::PossiblyUnsetOption::on) - ? true - : false; + if (std::getenv("KOKKOS_NUMA")) { + warn_deprecated_environment_variable("KOKKOS_NUMA"); } - if (tools_init_arguments.lib != - Kokkos::Tools::InitArguments::unset_string_option) { - tool_lib = tools_init_arguments.lib; + int num_threads; + if (check_env_int("KOKKOS_NUM_THREADS", num_threads)) { + if (!is_valid_num_threads(num_threads)) { + std::stringstream ss; + ss << "Error: environment variable 'KOKKOS_NUM_THREADS=" << num_threads + << "' is invalid." + << " The number of threads must be greater than or equal to one." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + settings.set_num_threads(num_threads); } - if (tools_init_arguments.args != - Kokkos::Tools::InitArguments::unset_string_option) { - tool_args = tools_init_arguments.args; + int device_id; + if (check_env_int("KOKKOS_DEVICE_ID", device_id)) { + if (!is_valid_device_id(device_id)) { + std::stringstream ss; + ss << "Error: environment variable 'KOKKOS_DEVICE_ID" << device_id + << "' is invalid." + << " The device id must be greater than or equal to zero." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); + } + settings.set_device_id(device_id); } - - auto env_num_threads_str = std::getenv("KOKKOS_NUM_THREADS"); - if (env_num_threads_str != nullptr) { - errno = 0; - auto env_num_threads = std::strtol(env_num_threads_str, &endptr, 10); - if (endptr == env_num_threads_str) - Impl::throw_runtime_exception( - "Error: cannot convert KOKKOS_NUM_THREADS to an integer. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); - if (errno == ERANGE) - Impl::throw_runtime_exception( - "Error: KOKKOS_NUM_THREADS out of range of representable values by " - "an integer. Raised by Kokkos::initialize(int narg, char* argc[])."); - if ((num_threads != -1) && (env_num_threads != num_threads)) - Impl::throw_runtime_exception( - "Error: expecting a match between --kokkos-threads and " - "KOKKOS_NUM_THREADS if both are set. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); - else - num_threads = env_num_threads; + int num_devices; + int rand_devices; + bool has_num_devices = check_env_int("KOKKOS_NUM_DEVICES", num_devices); + bool has_rand_devices = check_env_int("KOKKOS_RAND_DEVICES", rand_devices); + if (has_rand_devices && has_num_devices) { + Impl::throw_runtime_exception( + "Error: cannot specify both KOKKOS_NUM_DEVICES and " + "KOKKOS_RAND_DEVICES." + " Raised by Kokkos::initialize()."); } - auto env_numa_str = std::getenv("KOKKOS_NUMA"); - if (env_numa_str != nullptr) { - errno = 0; - auto env_numa = std::strtol(env_numa_str, &endptr, 10); - if (endptr == env_numa_str) - Impl::throw_runtime_exception( - "Error: cannot convert KOKKOS_NUMA to an integer. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); - if (errno == ERANGE) - Impl::throw_runtime_exception( - "Error: KOKKOS_NUMA out of range of representable values by an " - "integer. Raised by Kokkos::initialize(int narg, char* argc[])."); - if ((numa != -1) && (env_numa != numa)) - Impl::throw_runtime_exception( - "Error: expecting a match between --kokkos-numa and KOKKOS_NUMA if " - "both are set. Raised by Kokkos::initialize(int narg, char* " - "argc[])."); - else - numa = env_numa; + if (has_num_devices) { + warn_deprecated_environment_variable("KOKKOS_NUM_DEVICES", + "KOKKOS_MAP_DEVICE_ID_BY=mpi_rank"); + settings.set_map_device_id_by("mpi_rank"); + settings.set_num_devices(num_devices); } - auto env_device_str = std::getenv("KOKKOS_DEVICE_ID"); - if (env_device_str != nullptr) { - errno = 0; - auto env_device = std::strtol(env_device_str, &endptr, 10); - if (endptr == env_device_str) - Impl::throw_runtime_exception( - "Error: cannot convert KOKKOS_DEVICE_ID to an integer. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); - if (errno == ERANGE) - Impl::throw_runtime_exception( - "Error: KOKKOS_DEVICE_ID out of range of representable values by an " - "integer. Raised by Kokkos::initialize(int narg, char* argc[])."); - if ((device != -1) && (env_device != device)) - Impl::throw_runtime_exception( - "Error: expecting a match between --kokkos-device and " - "KOKKOS_DEVICE_ID if both are set. Raised by Kokkos::initialize(int " - "narg, char* argc[])."); - else - device = env_device; + if (has_rand_devices) { + warn_deprecated_environment_variable("KOKKOS_RAND_DEVICES", + "KOKKOS_MAP_DEVICE_ID_BY=random"); + settings.set_map_device_id_by("random"); + settings.set_num_devices(rand_devices); } - auto env_rdevices_str = std::getenv("KOKKOS_RAND_DEVICES"); - auto env_ndevices_str = std::getenv("KOKKOS_NUM_DEVICES"); - if (env_ndevices_str != nullptr || env_rdevices_str != nullptr) { - errno = 0; - if (env_ndevices_str != nullptr && env_rdevices_str != nullptr) { - Impl::throw_runtime_exception( - "Error: cannot specify both KOKKOS_NUM_DEVICES and " - "KOKKOS_RAND_DEVICES. " - "Raised by Kokkos::initialize(int narg, char* argc[])."); - } - int rdevices = -1; - if (env_ndevices_str != nullptr) { - auto env_ndevices = std::strtol(env_ndevices_str, &endptr, 10); - if (endptr == env_ndevices_str) - Impl::throw_runtime_exception( - "Error: cannot convert KOKKOS_NUM_DEVICES to an integer. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); - if (errno == ERANGE) - Impl::throw_runtime_exception( - "Error: KOKKOS_NUM_DEVICES out of range of representable values by " - "an integer. Raised by Kokkos::initialize(int narg, char* " - "argc[])."); - if ((ndevices != -1) && (env_ndevices != ndevices)) - Impl::throw_runtime_exception( - "Error: expecting a match between --kokkos-ndevices and " - "KOKKOS_NUM_DEVICES if both are set. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); - else - ndevices = env_ndevices; - } else { // you set KOKKOS_RAND_DEVICES - auto env_rdevices = std::strtol(env_rdevices_str, &endptr, 10); - if (endptr == env_ndevices_str) - Impl::throw_runtime_exception( - "Error: cannot convert KOKKOS_RAND_DEVICES to an integer. Raised " - "by Kokkos::initialize(int narg, char* argc[])."); - if (errno == ERANGE) - Impl::throw_runtime_exception( - "Error: KOKKOS_RAND_DEVICES out of range of representable values " - "by an integer. Raised by Kokkos::initialize(int narg, char* " - "argc[])."); - else - rdevices = env_rdevices; - } - // Skip device - auto env_skip_device_str = std::getenv("KOKKOS_SKIP_DEVICE"); - if (env_skip_device_str != nullptr) { - errno = 0; - auto env_skip_device = std::strtol(env_skip_device_str, &endptr, 10); - if (endptr == env_skip_device_str) - Impl::throw_runtime_exception( - "Error: cannot convert KOKKOS_SKIP_DEVICE to an integer. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); - if (errno == ERANGE) - Impl::throw_runtime_exception( - "Error: KOKKOS_SKIP_DEVICE out of range of representable values by " - "an integer. Raised by Kokkos::initialize(int narg, char* " - "argc[])."); - if ((skip_device != 9999) && (env_skip_device != skip_device)) - Impl::throw_runtime_exception( - "Error: expecting a match between --kokkos-ndevices and " - "KOKKOS_SKIP_DEVICE if both are set. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); - else - skip_device = env_skip_device; - } - if (rdevices > 0) { - if (skip_device > 0 && rdevices == 1) - Impl::throw_runtime_exception( - "Error: cannot KOKKOS_SKIP_DEVICE the only KOKKOS_RAND_DEVICE. " - "Raised by Kokkos::initialize(int narg, char* argc[])."); - - std::srand(get_process_id()); - while (device < 0) { - int test_device = std::rand() % rdevices; - if (test_device != skip_device) device = test_device; - } + if (has_num_devices || has_rand_devices) { + int skip_device; + if (check_env_int("KOKKOS_SKIP_DEVICE", skip_device)) { + settings.set_skip_device(skip_device); } } - char* env_disablewarnings_str = std::getenv("KOKKOS_DISABLE_WARNINGS"); - if (env_disablewarnings_str != nullptr) { - std::string env_str(env_disablewarnings_str); // deep-copies string - for (char& c : env_str) { - c = toupper(c); + bool disable_warnings; + if (check_env_bool("KOKKOS_DISABLE_WARNINGS", disable_warnings)) { + settings.set_disable_warnings(disable_warnings); + } + bool print_configuration; + if (check_env_bool("KOKKOS_PRINT_CONFIGURATION", print_configuration)) { + settings.set_print_configuration(print_configuration); + } + bool tune_internals; + if (check_env_bool("KOKKOS_TUNE_INTERNALS", tune_internals)) { + settings.set_tune_internals(tune_internals); + } + char const* map_device_id_by = std::getenv("KOKKOS_MAP_DEVICE_ID_BY"); + if (map_device_id_by != nullptr) { + if (std::getenv("KOKKOS_DEVICE_ID")) { + std::cerr << "Warning: environment variable KOKKOS_MAP_DEVICE_ID_BY" + << "ignored since KOKKOS_DEVICE_ID is specified." + << " Raised by Kokkos::initialize()." << std::endl; + } + if (!is_valid_map_device_id_by(map_device_id_by)) { + std::stringstream ss; + ss << "Warning: environment variable 'KOKKOS_MAP_DEVICE_ID_BY=" + << map_device_id_by << "' is not recognized." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); } - const auto _rc = std::regex_constants::icase | std::regex_constants::egrep; - const auto _re = std::regex("^(true|on|yes|[1-9])$", _rc); - if (std::regex_match(env_str, _re)) - disable_warnings = true; - else if (disable_warnings) - Impl::throw_runtime_exception( - "Error: expecting a match between --kokkos-disable-warnings and " - "KOKKOS_DISABLE_WARNINGS if both are set. Raised by " - "Kokkos::initialize(int narg, char* argc[])."); + settings.set_map_device_id_by(map_device_id_by); } } -} // namespace - -} // namespace Impl -} // namespace Kokkos - //---------------------------------------------------------------------------- -namespace Kokkos { - -void initialize(int& narg, char* arg[]) { - InitArguments arguments; - Impl::parse_command_line_arguments(narg, arg, arguments); - Impl::parse_environment_variables(arguments); - Impl::initialize_internal(arguments); +void Kokkos::initialize(int& argc, char* argv[]) { + InitializationSettings settings; + Impl::parse_environment_variables(settings); + Impl::parse_command_line_arguments(argc, argv, settings); + initialize_internal(settings); } -void initialize(InitArguments arguments) { - Impl::parse_environment_variables(arguments); - Impl::initialize_internal(arguments); +void Kokkos::initialize(InitializationSettings const& settings) { + InitializationSettings tmp; + Impl::parse_environment_variables(tmp); + combine(tmp, settings); + initialize_internal(tmp); } -namespace Impl { - -void pre_initialize(const InitArguments& args) { - pre_initialize_internal(args); +void Kokkos::Impl::pre_initialize(const InitializationSettings& settings) { + pre_initialize_internal(settings); } -void post_initialize(const InitArguments& args) { - post_initialize_internal(args); +void Kokkos::Impl::post_initialize(const InitializationSettings& settings) { + post_initialize_internal(settings); } -} // namespace Impl - -void push_finalize_hook(std::function<void()> f) { finalize_hooks.push(f); } +void Kokkos::push_finalize_hook(std::function<void()> f) { + finalize_hooks.push(f); +} -void finalize() { Impl::finalize_internal(); } +void Kokkos::finalize() { finalize_internal(); } -void finalize_all() { - enum : bool { all_spaces = true }; - Impl::finalize_internal(all_spaces); -} +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +KOKKOS_DEPRECATED void Kokkos::finalize_all() { finalize_internal(); } +#endif -void fence() { Impl::fence_internal("Kokkos::fence: Unnamed Global Fence"); } -void fence(const std::string& name) { Impl::fence_internal(name); } +#ifdef KOKKOS_COMPILER_INTEL +void Kokkos::fence() { fence("Kokkos::fence: Unnamed Global Fence"); } +#endif +void Kokkos::fence(const std::string& name) { fence_internal(name); } -void print_helper(std::ostringstream& out, +namespace { +void print_helper(std::ostream& os, const std::map<std::string, std::string>& print_me) { for (const auto& kv : print_me) { - out << kv.first << ": " << kv.second << '\n'; + os << kv.first << ": " << kv.second << '\n'; } } +} // namespace -void print_configuration(std::ostream& out, const bool detail) { - std::ostringstream msg; +void Kokkos::print_configuration(std::ostream& os, bool verbose) { + print_helper(os, metadata_map["version_info"]); - print_helper(msg, Kokkos::Impl::metadata_map["version_info"]); + os << "Compiler:\n"; + print_helper(os, metadata_map["compiler_version"]); - msg << "Compiler:" << std::endl; - print_helper(msg, Kokkos::Impl::metadata_map["compiler_version"]); + os << "Architecture:\n"; + print_helper(os, metadata_map["architecture"]); - msg << "Architecture:" << std::endl; - print_helper(msg, Kokkos::Impl::metadata_map["architecture"]); + os << "Atomics:\n"; + print_helper(os, metadata_map["atomics"]); - msg << "Atomics:" << std::endl; - print_helper(msg, Kokkos::Impl::metadata_map["atomics"]); + os << "Vectorization:\n"; + print_helper(os, metadata_map["vectorization"]); - msg << "Vectorization:" << std::endl; - print_helper(msg, Kokkos::Impl::metadata_map["vectorization"]); + os << "Memory:\n"; + print_helper(os, metadata_map["memory"]); - msg << "Memory:" << std::endl; - print_helper(msg, Kokkos::Impl::metadata_map["memory"]); + os << "Options:\n"; + print_helper(os, metadata_map["options"]); - msg << "Options:" << std::endl; - print_helper(msg, Kokkos::Impl::metadata_map["options"]); + Impl::ExecSpaceManager::get_instance().print_configuration(os, verbose); +} - Impl::ExecSpaceManager::get_instance().print_configuration(msg, detail); +KOKKOS_ATTRIBUTE_NODISCARD bool Kokkos::is_initialized() noexcept { + return g_is_initialized; +} - out << msg.str() << std::endl; +KOKKOS_ATTRIBUTE_NODISCARD bool Kokkos::is_finalized() noexcept { + return g_is_finalized; } -bool is_initialized() noexcept { return g_is_initialized; } +bool Kokkos::show_warnings() noexcept { return g_show_warnings; } + +bool Kokkos::tune_internals() noexcept { return g_tune_internals; } -bool show_warnings() noexcept { return g_show_warnings; } -bool tune_internals() noexcept { return g_tune_internals; } +namespace Kokkos { #ifdef KOKKOS_COMPILER_PGI namespace Impl { @@ -1088,5 +1155,8 @@ namespace Impl { void _kokkos_pgi_compiler_bug_workaround() {} } // end namespace Impl #endif - } // namespace Kokkos + +Kokkos::Impl::InitializationSettingsHelper<std::string>::storage_type const + Kokkos::Impl::InitializationSettingsHelper<std::string>::unspecified = + "some string we don't expect user would ever provide"; diff --git a/packages/kokkos/core/src/impl/Kokkos_ExecSpaceInitializer.hpp b/packages/kokkos/core/src/impl/Kokkos_DeviceManagement.hpp similarity index 75% rename from packages/kokkos/core/src/impl/Kokkos_ExecSpaceInitializer.hpp rename to packages/kokkos/core/src/impl/Kokkos_DeviceManagement.hpp index 1a0b10e40fe5e280746c3c0443202a4413585a0c..34421f0fe58447c47af872cb259ddab3d0f79413 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ExecSpaceInitializer.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_DeviceManagement.hpp @@ -42,26 +42,21 @@ //@HEADER */ -#ifndef KOKKOS_EXEC_SPACE_INITIALIZER_HPP -#define KOKKOS_EXEC_SPACE_INITIALIZER_HPP +#ifndef KOKKOS_DEVICE_MANAGEMENT_HPP +#define KOKKOS_DEVICE_MANAGEMENT_HPP -#include <iosfwd> +#include <vector> namespace Kokkos { +class InitializationSettings; namespace Impl { - -class ExecSpaceInitializerBase { - public: - virtual void initialize(const InitArguments &args) = 0; - virtual void finalize(const bool all_spaces) = 0; - virtual void fence() = 0; - virtual void fence(const std::string &) = 0; - virtual void print_configuration(std::ostream &msg, const bool detail) = 0; - ExecSpaceInitializerBase() = default; - virtual ~ExecSpaceInitializerBase() = default; -}; - +int get_gpu(const Kokkos::InitializationSettings& settings); +// This declaration is provided for testing purposes only +int get_ctest_gpu(const char* local_rank_str); +// ditto +std::vector<int> get_visible_devices( + Kokkos::InitializationSettings const& settings, int device_count); } // namespace Impl } // namespace Kokkos -#endif // KOKKOS_EXEC_SPACE_INITIALIZER_HPP +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_EBO.hpp b/packages/kokkos/core/src/impl/Kokkos_EBO.hpp index dc8e5e4d830623b4fa794966da4b724467e181dc..87d6c044a57756a9bb49d1b930f364c116c9dfb9 100644 --- a/packages/kokkos/core/src/impl/Kokkos_EBO.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_EBO.hpp @@ -79,21 +79,19 @@ struct EBOBaseImpl; template <class T, template <class...> class CtorNotOnDevice> struct EBOBaseImpl<T, true, CtorNotOnDevice> { - template < - class... Args, class _ignored = void, - typename std::enable_if<std::is_void<_ignored>::value && - std::is_constructible<T, Args...>::value && - !CtorNotOnDevice<Args...>::value, - int>::type = 0> + template <class... Args, class _ignored = void, + std::enable_if_t<std::is_void<_ignored>::value && + std::is_constructible<T, Args...>::value && + !CtorNotOnDevice<Args...>::value, + int> = 0> KOKKOS_FORCEINLINE_FUNCTION constexpr explicit EBOBaseImpl( Args&&...) noexcept {} - template < - class... Args, class _ignored = void, - typename std::enable_if<std::is_void<_ignored>::value && - std::is_constructible<T, Args...>::value && - CtorNotOnDevice<Args...>::value, - long>::type = 0> + template <class... Args, class _ignored = void, + std::enable_if_t<std::is_void<_ignored>::value && + std::is_constructible<T, Args...>::value && + CtorNotOnDevice<Args...>::value, + long> = 0> inline constexpr explicit EBOBaseImpl(Args&&...) noexcept {} KOKKOS_DEFAULTED_FUNCTION @@ -139,22 +137,20 @@ template <class T, template <class...> class CTorsNotOnDevice> struct EBOBaseImpl<T, false, CTorsNotOnDevice> { T m_ebo_object; - template < - class... Args, class _ignored = void, - typename std::enable_if<std::is_void<_ignored>::value && - !CTorsNotOnDevice<Args...>::value && - std::is_constructible<T, Args...>::value, - int>::type = 0> + template <class... Args, class _ignored = void, + std::enable_if_t<std::is_void<_ignored>::value && + !CTorsNotOnDevice<Args...>::value && + std::is_constructible<T, Args...>::value, + int> = 0> KOKKOS_FORCEINLINE_FUNCTION constexpr explicit EBOBaseImpl( Args&&... args) noexcept(noexcept(T(std::forward<Args>(args)...))) : m_ebo_object(std::forward<Args>(args)...) {} - template < - class... Args, class _ignored = void, - typename std::enable_if<std::is_void<_ignored>::value && - CTorsNotOnDevice<Args...>::value && - std::is_constructible<T, Args...>::value, - long>::type = 0> + template <class... Args, class _ignored = void, + std::enable_if_t<std::is_void<_ignored>::value && + CTorsNotOnDevice<Args...>::value && + std::is_constructible<T, Args...>::value, + long> = 0> inline constexpr explicit EBOBaseImpl(Args&&... args) noexcept( noexcept(T(std::forward<Args>(args)...))) : m_ebo_object(std::forward<Args>(args)...) {} diff --git a/packages/kokkos/core/src/impl/Kokkos_Error.cpp b/packages/kokkos/core/src/impl/Kokkos_Error.cpp index a28d008587a82ff33b9cb9f23125c8a906f4be66..750228331578bab658f14106d80b9cb9e231409a 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Error.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Error.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <cstring> #include <cstdlib> @@ -138,6 +142,9 @@ void Experimental::RawMemoryAllocationFailure::print_error_message( case AllocationMechanism::CudaHostAlloc: o << "cudaHostAlloc()."; break; case AllocationMechanism::HIPMalloc: o << "hipMalloc()."; break; case AllocationMechanism::HIPHostMalloc: o << "hipHostMalloc()."; break; + case AllocationMechanism::HIPMallocManaged: + o << "hipMallocManaged()."; + break; case AllocationMechanism::SYCLMallocDevice: o << "sycl::malloc_device()."; break; diff --git a/packages/kokkos/core/src/impl/Kokkos_Error.hpp b/packages/kokkos/core/src/impl/Kokkos_Error.hpp index 5d7c60fba9a2b7bec65b6a3ba48c50d94af0d6cd..63b40f297e08bc7b61214c14e6024e53b33e9832 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Error.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Error.hpp @@ -67,12 +67,65 @@ namespace Impl { [[noreturn]] void host_abort(const char *const); -void throw_runtime_exception(const std::string &); +#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDA_ARCH__) + +#if defined(__APPLE__) || defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) +// cuda_abort does not abort when building for macOS. +// required to workaround failures in random number generator unit tests with +// pre-volta architectures +#define KOKKOS_IMPL_ABORT_NORETURN +#else +// cuda_abort aborts when building for other platforms than macOS +#define KOKKOS_IMPL_ABORT_NORETURN [[noreturn]] +#endif + +#elif defined(KOKKOS_COMPILER_NVHPC) + +#define KOKKOS_IMPL_ABORT_NORETURN + +#elif defined(KOKKOS_ENABLE_HIP) && defined(__HIP_DEVICE_COMPILE__) +// HIP aborts +#define KOKKOS_IMPL_ABORT_NORETURN [[noreturn]] +#elif defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__) +// FIXME_SYCL SYCL doesn't abort +#define KOKKOS_IMPL_ABORT_NORETURN +#elif !defined(KOKKOS_ENABLE_OPENMPTARGET) +// Host aborts +#define KOKKOS_IMPL_ABORT_NORETURN [[noreturn]] +#else +// Everything else does not abort +#define KOKKOS_IMPL_ABORT_NORETURN +#endif + +#ifdef KOKKOS_ENABLE_SYCL // FIXME_SYCL +#define KOKKOS_IMPL_ABORT_NORETURN_DEVICE +#else +#define KOKKOS_IMPL_ABORT_NORETURN_DEVICE KOKKOS_IMPL_ABORT_NORETURN +#endif + +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) +KOKKOS_IMPL_ABORT_NORETURN_DEVICE inline KOKKOS_IMPL_DEVICE_FUNCTION void +device_abort(const char *const msg) { +#if defined(KOKKOS_ENABLE_CUDA) + ::Kokkos::Impl::cuda_abort(msg); +#elif defined(KOKKOS_ENABLE_HIP) + ::Kokkos::Impl::hip_abort(msg); +#elif defined(KOKKOS_ENABLE_SYCL) + ::Kokkos::Impl::sycl_abort(msg); +#elif defined(KOKKOS_ENABLE_OPENMPTARGET) + printf("%s", msg); // FIXME_OPENMPTARGET +#else +#error faulty logic +#endif +} +#endif + +[[noreturn]] void throw_runtime_exception(const std::string &msg); void traceback_callstack(std::ostream &); std::string human_memory_size(size_t arg_bytes); -void throw_runtime_exception(const std::string &msg); } // namespace Impl @@ -97,6 +150,7 @@ class RawMemoryAllocationFailure : public std::bad_alloc { CudaHostAlloc, HIPMalloc, HIPHostMalloc, + HIPMallocManaged, SYCLMallocDevice, SYCLMallocShared, SYCLMallocHost @@ -171,48 +225,16 @@ class RawMemoryAllocationFailure : public std::bad_alloc { //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDA_ARCH__) - -#if defined(__APPLE__) || defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) -// cuda_abort does not abort when building for macOS. -// required to workaround failures in random number generator unit tests with -// pre-volta architectures -#define KOKKOS_IMPL_ABORT_NORETURN -#else -// cuda_abort aborts when building for other platforms than macOS -#define KOKKOS_IMPL_ABORT_NORETURN [[noreturn]] -#endif - -#elif defined(KOKKOS_ENABLE_HIP) && defined(__HIP_DEVICE_COMPILE__) -// HIP aborts -#define KOKKOS_IMPL_ABORT_NORETURN [[noreturn]] -#elif defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__) -// FIXME_SYCL SYCL doesn't abort -#define KOKKOS_IMPL_ABORT_NORETURN -#elif !defined(KOKKOS_ENABLE_OPENMPTARGET) -// Host aborts -#define KOKKOS_IMPL_ABORT_NORETURN [[noreturn]] -#else -// Everything else does not abort -#define KOKKOS_IMPL_ABORT_NORETURN -#endif - namespace Kokkos { + KOKKOS_IMPL_ABORT_NORETURN KOKKOS_INLINE_FUNCTION void abort( const char *const message) { -#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDA_ARCH__) - Kokkos::Impl::cuda_abort(message); -#elif defined(KOKKOS_ENABLE_HIP) && defined(__HIP_DEVICE_COMPILE__) - Kokkos::Impl::hip_abort(message); -#elif defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__) - Kokkos::Impl::sycl_abort(message); -#elif !defined(KOKKOS_ENABLE_OPENMPTARGET) - Kokkos::Impl::host_abort(message); -#else - (void)message; // FIXME_OPENMPTARGET -#endif + KOKKOS_IF_ON_HOST(::Kokkos::Impl::host_abort(message);) + KOKKOS_IF_ON_DEVICE(::Kokkos::Impl::device_abort(message);) } +#undef KOKKOS_IMPL_ABORT_NORETURN + } // namespace Kokkos //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp b/packages/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp index 1c337b9575fadaa34f4d97028eac92ea886fcf2d..75b89c73a9cb03b36e955fff45e46719caa7b4a1 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp @@ -42,19 +42,25 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> #include <sstream> namespace Kokkos { namespace Impl { -PerTeamValue::PerTeamValue(int arg) : value(arg) {} +PerTeamValue::PerTeamValue(size_t arg) : value(arg) {} -PerThreadValue::PerThreadValue(int arg) : value(arg) {} +PerThreadValue::PerThreadValue(size_t arg) : value(arg) {} } // namespace Impl -Impl::PerTeamValue PerTeam(const int& arg) { return Impl::PerTeamValue(arg); } +Impl::PerTeamValue PerTeam(const size_t& arg) { + return Impl::PerTeamValue(arg); +} -Impl::PerThreadValue PerThread(const int& arg) { +Impl::PerThreadValue PerThread(const size_t& arg) { return Impl::PerThreadValue(arg); } diff --git a/packages/kokkos/core/src/impl/Kokkos_ExecSpaceManager.hpp b/packages/kokkos/core/src/impl/Kokkos_ExecSpaceManager.hpp new file mode 100644 index 0000000000000000000000000000000000000000..354bdde9bf7696e621226260f2dfd279dea566fd --- /dev/null +++ b/packages/kokkos/core/src/impl/Kokkos_ExecSpaceManager.hpp @@ -0,0 +1,162 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXEC_SPACE_MANAGER_HPP +#define KOKKOS_EXEC_SPACE_MANAGER_HPP + +#include <impl/Kokkos_InitializationSettings.hpp> +#include <Kokkos_DetectionIdiom.hpp> +#include <Kokkos_Concepts.hpp> + +#include <iosfwd> +#include <map> +#include <string> + +namespace { + +template <class T> +using public_member_types_t = std::enable_if_t< + Kokkos::is_execution_space<typename T::execution_space>::value && + Kokkos::is_memory_space<typename T::memory_space>::value && + Kokkos::is_device<typename T::device_type>::value && + Kokkos::is_array_layout<typename T::array_layout>::value && + std::is_integral<typename T::size_type>::value && + Kokkos::is_memory_space<typename T::scratch_memory_space>::value>; + +template <class T> +using print_configuration_t = std::enable_if_t< + std::is_void<decltype(std::declval<T const&>().print_configuration( + std::declval<std::ostream&>()))>::value && + std::is_void<decltype(std::declval<T const&>().print_configuration( + std::declval<std::ostream&>(), false))>::value>; + +template <class T> +using initialize_finalize_t = std::enable_if_t< + std::is_void<decltype(T::impl_initialize( + std::declval<Kokkos::InitializationSettings const&>()))>::value && + std::is_void<decltype(T::impl_finalize())>::value>; + +template <class T> +using fence_t = std::enable_if_t< + std::is_void<decltype(std::declval<T const&>().fence())>::value && + std::is_void<decltype(std::declval<T const&>().fence("name"))>::value && + std::is_void<decltype(T::impl_static_fence("name"))>::value>; + +#define STATIC_ASSERT(...) static_assert(__VA_ARGS__, "") // FIXME C++17 + +template <class ExecutionSpace> +constexpr bool check_valid_execution_space() { + using Kokkos::is_detected; + STATIC_ASSERT(std::is_default_constructible<ExecutionSpace>::value); + STATIC_ASSERT(is_detected<public_member_types_t, ExecutionSpace>::value); + STATIC_ASSERT(is_detected<print_configuration_t, ExecutionSpace>::value); + STATIC_ASSERT(is_detected<initialize_finalize_t, ExecutionSpace>::value); + STATIC_ASSERT(is_detected<fence_t, ExecutionSpace>::value); +#ifndef KOKKOS_ENABLE_HPX // FIXME_HPX + STATIC_ASSERT(sizeof(ExecutionSpace) <= 2 * sizeof(void*)); +#endif + return true; +} + +#undef STATIC_ASSERT + +} // namespace + +namespace Kokkos { +namespace Impl { + +struct ExecSpaceBase { + virtual void initialize(InitializationSettings const&) = 0; + virtual void finalize() = 0; + virtual void static_fence(std::string const&) = 0; + virtual void print_configuration(std::ostream& os, bool verbose) = 0; + virtual ~ExecSpaceBase() = default; +}; + +template <class ExecutionSpace> +struct ExecSpaceDerived : ExecSpaceBase { + static_assert(check_valid_execution_space<ExecutionSpace>(), ""); + void initialize(InitializationSettings const& settings) final { + ExecutionSpace::impl_initialize(settings); + } + void finalize() final { ExecutionSpace::impl_finalize(); } + void static_fence(std::string const& label) final { + ExecutionSpace::impl_static_fence(label); + } + void print_configuration(std::ostream& os, bool verbose) final { + ExecutionSpace().print_configuration(os, verbose); + } +}; + +/* ExecSpaceManager - Responsible for initializing all the registered + * backends. Backends are registered using the register_space_initializer() + * function which should be called from a global context so that it is called + * prior to initialize_spaces() which is called from Kokkos::initialize() + */ +class ExecSpaceManager { + std::map<std::string, std::unique_ptr<ExecSpaceBase>> exec_space_factory_list; + ExecSpaceManager() = default; + + public: + void register_space_factory(std::string name, + std::unique_ptr<ExecSpaceBase> ptr); + void initialize_spaces(const Kokkos::InitializationSettings& settings); + void finalize_spaces(); + void static_fence(const std::string&); + void print_configuration(std::ostream& os, bool verbose); + static ExecSpaceManager& get_instance(); +}; + +template <class ExecutionSpace> +int initialize_space_factory(std::string name) { + auto space_ptr = std::make_unique<ExecSpaceDerived<ExecutionSpace>>(); + ExecSpaceManager::get_instance().register_space_factory(name, + std::move(space_ptr)); + return 1; +} + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp deleted file mode 100644 index 504fba0268815669acff6a2a925bff4df0b0faae..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp +++ /dev/null @@ -1,2055 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_FUNCTORADAPTER_HPP -#define KOKKOS_FUNCTORADAPTER_HPP - -#include <cstddef> -#include <Kokkos_Core_fwd.hpp> -#include <impl/Kokkos_Traits.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template <class FunctorType, class Enable = void> -struct ReduceFunctorHasInit { - enum : bool { value = false }; -}; - -// The else clause idiom failed with NVCC+MSVC, causing some symbols not being -// compiled for the device. The code in there is anyway sketchy, and likely not -// standard compliant (just happens to work on all compilers we ever used) -// We intend to replace all of this long term with proper detection idiom. -#if defined(KOKKOS_COMPILER_MSVC) || defined(KOKKOS_IMPL_WINDOWS_CUDA) -template <class> -using impl_void_t_workaround = void; - -template <class F> -using init_archetype = decltype(&F::init); - -template <class FunctorType> -struct ReduceFunctorHasInit< - FunctorType, impl_void_t_workaround<init_archetype<FunctorType>>> { - enum : bool { value = true }; -}; -#else -template <class FunctorType> -struct ReduceFunctorHasInit< - FunctorType, - typename std::enable_if<0 < sizeof(&FunctorType::init)>::type> { - enum : bool { value = true }; -}; -#endif - -template <class FunctorType, class Enable = void> -struct ReduceFunctorHasJoin { - enum : bool { value = false }; -}; - -#if defined(KOKKOS_COMPILER_MSVC) || defined(KOKKOS_IMPL_WINDOWS_CUDA) -template <class F> -using join_archetype = decltype(&F::join); - -template <class FunctorType> -struct ReduceFunctorHasJoin< - FunctorType, impl_void_t_workaround<join_archetype<FunctorType>>> { - enum : bool { value = true }; -}; -#else -template <class FunctorType> -struct ReduceFunctorHasJoin< - FunctorType, - typename std::enable_if<0 < sizeof(&FunctorType::join)>::type> { - enum : bool { value = true }; -}; -#endif - -template <class FunctorType, class Enable = void> -struct ReduceFunctorHasFinal { - enum : bool { value = false }; -}; - -#if defined(KOKKOS_COMPILER_MSVC) || defined(KOKKOS_IMPL_WINDOWS_CUDA) -template <class F> -using final_archetype = decltype(&F::final); - -template <class FunctorType> -struct ReduceFunctorHasFinal< - FunctorType, impl_void_t_workaround<final_archetype<FunctorType>>> { - enum : bool { value = true }; -}; -#else -template <class FunctorType> -struct ReduceFunctorHasFinal< - FunctorType, - typename std::enable_if<0 < sizeof(&FunctorType::final)>::type> { - enum : bool { value = true }; -}; -#endif - -template <class FunctorType, class Enable = void> -struct ReduceFunctorHasShmemSize { - enum : bool { value = false }; -}; - -#if defined(KOKKOS_COMPILER_MSVC) || defined(KOKKOS_IMPL_WINDOWS_CUDA) -template <class F> -using shmemsize_archetype = decltype(&F::team_shmem_size); - -template <class FunctorType> -struct ReduceFunctorHasShmemSize< - FunctorType, impl_void_t_workaround<shmemsize_archetype<FunctorType>>> { - enum : bool { value = true }; -}; -#else -template <class FunctorType> -struct ReduceFunctorHasShmemSize< - FunctorType, - typename std::enable_if<0 < sizeof(&FunctorType::team_shmem_size)>::type> { - enum : bool { value = true }; -}; -#endif - -template <class FunctorType, class ArgTag, class Enable = void> -struct FunctorDeclaresValueType : public std::false_type {}; - -template <class FunctorType, class ArgTag> -struct FunctorDeclaresValueType<FunctorType, ArgTag, - void_t<typename FunctorType::value_type>> - : public std::true_type {}; - -/** \brief Query Functor and execution policy argument tag for value type. - * - * If C++11 enabled and 'value_type' is not explicitly declared then attempt - * to deduce the type from FunctorType::operator(). - */ -template <class FunctorType, class ArgTag, - bool Dec = FunctorDeclaresValueType<FunctorType, ArgTag>::value> -struct FunctorValueTraits { - using value_type = void; - using pointer_type = void; - using reference_type = void; - using functor_type = void; - - enum { StaticValueSize = 0 }; - - KOKKOS_FORCEINLINE_FUNCTION static unsigned value_count(const FunctorType&) { - return 0; - } - - KOKKOS_FORCEINLINE_FUNCTION static unsigned value_size(const FunctorType&) { - return 0; - } -}; - -template <class ArgTag> -struct FunctorValueTraits<void, ArgTag, false> { - using value_type = void; - using pointer_type = void; - using reference_type = void; - using functor_type = void; -}; - -/** \brief FunctorType::value_type is explicitly declared so use it. - * - * Two options for declaration - * - * 1) A plain-old-data (POD) type - * using value_type = {pod_type}; - * - * 2) An array of POD of a runtime specified count. - * using value_type = {pod_type}[]; - * const unsigned value_count ; - */ -template <class FunctorType, class ArgTag> -struct FunctorValueTraits<FunctorType, ArgTag, - true /* == exists FunctorType::value_type */> { - using value_type = - typename std::remove_extent<typename FunctorType::value_type>::type; - using functor_type = FunctorType; - - static_assert((sizeof(value_type) < sizeof(int)) || - 0 == (sizeof(value_type) % sizeof(int)), - "Reduction functor's declared value_type requires: 0 == " - "sizeof(value_type) % sizeof(int)"); - - /* this cast to bool is needed for correctness by NVCC */ - enum : bool { - IsArray = static_cast<bool>( - std::is_array<typename FunctorType::value_type>::value) - }; - - // If not an array then what is the sizeof(value_type) - enum { StaticValueSize = IsArray ? 0 : sizeof(value_type) }; - - using pointer_type = value_type*; - - // The reference_type for an array is 'value_type *' - // The reference_type for a single value is 'value_type &' - - using reference_type = std::conditional_t<IsArray, value_type*, value_type&>; - - // Number of values if single value - template <class F> - KOKKOS_FORCEINLINE_FUNCTION static - typename std::enable_if<std::is_same<F, FunctorType>::value && !IsArray, - unsigned>::type - value_count(const F&) { - return 1; - } - - // Number of values if an array, protect via templating because - // 'f.value_count' will only exist when the functor declares the value_type to - // be an array. - template <class F> - KOKKOS_FORCEINLINE_FUNCTION static - typename std::enable_if<std::is_same<F, FunctorType>::value && IsArray, - unsigned>::type - value_count(const F& f) { - return f.value_count; - } - - // Total size of the value - KOKKOS_INLINE_FUNCTION static unsigned value_size(const FunctorType& f) { - return value_count(f) * sizeof(value_type); - } -}; - -template <class FunctorType, class ArgTag> -struct FunctorValueTraits<FunctorType, ArgTag, - false /* == exists FunctorType::value_type */ - > { - private: - struct VOIDTAG { - }; // Allow declaration of non-matching operator() with void argument tag. - struct REJECTTAG { - }; // Reject tagged operator() when using non-tagged execution policy. - - using tag_type = - std::conditional_t<std::is_same<ArgTag, void>::value, VOIDTAG, ArgTag>; - - //---------------------------------------- - // parallel_for operator without a tag: - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember) - const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember) - const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const ArgMember&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, - ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, const ArgMember&, - const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, const ArgMember&, - const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class TagType, class ArgMember> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - //---------------------------------------- - // parallel_for operator with a tag: - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, - ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, - ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, - ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&) const) {} - - template <class ArgMember> - KOKKOS_INLINE_FUNCTION static VOIDTAG deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&) const) {} - - //---------------------------------------- - // parallel_reduce operator without a tag: - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(ArgMember, ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember, - T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const ArgMember&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, T&) const) { - } - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, - T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, ArgMember, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - //---------------------------------------- - // parallel_reduce operator with a tag: - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, ArgMember, ArgMember, ArgMember, - ArgMember, ArgMember, ArgMember, ArgMember, - ArgMember, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, T&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, - const ArgMember&, const ArgMember&, T&) const) {} - - //---------------------------------------- - // parallel_scan operator without a tag: - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, T&, bool) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, T&, bool) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, ArgMember, T&, bool) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, const ArgMember&, T&, bool) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(const TagType&, ArgMember, T&, bool) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, const ArgMember&, T&, bool) - const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(ArgMember, T&, const bool&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const ArgMember&, T&, const bool&) const) { - } - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, - void (FunctorType::*)(TagType, ArgMember, T&, const bool&) const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(TagType, const ArgMember&, T&, const bool&) - const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, ArgMember, T&, const bool&) - const) {} - - template <class TagType, class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static REJECTTAG deduce_reduce_type( - VOIDTAG, void (FunctorType::*)(const TagType&, const ArgMember&, T&, - const bool&) const) {} - //---------------------------------------- - // parallel_scan operator with a tag: - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, ArgMember, T&, bool) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(const tag_type&, ArgMember, T&, bool) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, const ArgMember&, T&, bool) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, const ArgMember&, T&, - bool) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, - void (FunctorType::*)(tag_type, ArgMember, T&, const bool&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, ArgMember, T&, - const bool&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(tag_type, const ArgMember&, T&, - const bool&) const) {} - - template <class ArgMember, class T> - KOKKOS_INLINE_FUNCTION static T deduce_reduce_type( - tag_type, void (FunctorType::*)(const tag_type&, const ArgMember&, T&, - const bool&) const) {} - //---------------------------------------- - - using ValueType = - decltype(deduce_reduce_type(tag_type(), &FunctorType::operator())); - - enum { IS_VOID = std::is_same<VOIDTAG, ValueType>::value }; - enum { IS_REJECT = std::is_same<REJECTTAG, ValueType>::value }; - - public: - using value_type = std::conditional_t<IS_VOID || IS_REJECT, void, ValueType>; - using pointer_type = - std::conditional_t<IS_VOID || IS_REJECT, void, ValueType*>; - using reference_type = - std::conditional_t<IS_VOID || IS_REJECT, void, ValueType&>; - using functor_type = FunctorType; - - static_assert( - IS_VOID || IS_REJECT || - ((sizeof(ValueType) > sizeof(int)) - ? 0 == sizeof(ValueType) % sizeof(int) - : true), - "Reduction functor's value_type deduced from functor::operator() " - "requires: 0 == sizeof(value_type) % sizeof(int)"); - - enum { StaticValueSize = IS_VOID || IS_REJECT ? 0 : sizeof(ValueType) }; - - KOKKOS_FORCEINLINE_FUNCTION static unsigned value_size(const FunctorType&) { - return StaticValueSize; - } - - KOKKOS_FORCEINLINE_FUNCTION static unsigned value_count(const FunctorType&) { - return IS_VOID || IS_REJECT ? 0 : 1; - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** Function signatures for FunctorType::init function with a tag. - * reference_type is 'value_type &' for scalar and 'value_type *' for array. - */ -template <class FunctorType, class ArgTag> -struct FunctorValueInitFunction { - using reference_type = - typename FunctorValueTraits<FunctorType, ArgTag>::reference_type; - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, reference_type) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, reference_type) const); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag, - reference_type)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag const&, - reference_type)); -}; - -/** Function signatures for FunctorType::init function without a tag. - * reference_type is 'value_type &' for scalar and 'value_type *' for array. - */ -template <class FunctorType> -struct FunctorValueInitFunction<FunctorType, void> { - using reference_type = - typename FunctorValueTraits<FunctorType, void>::reference_type; - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(reference_type) const); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(reference_type)); -}; - -// Adapter for value initialization function. -// If a proper FunctorType::init is declared then use it, -// otherwise use default constructor. -template <class FunctorType, class ArgTag, - class T = typename FunctorValueTraits<FunctorType, ArgTag>:: - reference_type // FIXME Fix FunctorValueTraits for multi-dim - // operator - , - class Enable = void> -struct FunctorValueInit; - -/* No 'init' function provided for single value */ -template <class FunctorType, class ArgTag, class T, class Enable> -struct FunctorValueInit<FunctorType, ArgTag, T&, Enable> { - KOKKOS_FORCEINLINE_FUNCTION static T& init(const FunctorType&, void* p) { - return *(new (p) T()); - }; -}; - -/* No 'init' function provided for array value */ -template <class FunctorType, class ArgTag, class T, class Enable> -struct FunctorValueInit<FunctorType, ArgTag, T*, Enable> { - KOKKOS_FORCEINLINE_FUNCTION static T* init(const FunctorType& f, void* p) { - const int n = FunctorValueTraits<FunctorType, ArgTag>::value_count(f); - for (int i = 0; i < n; ++i) { - new (((T*)p) + i) T(); - } - return (T*)p; - } -}; - -/* 'init' function provided for single value */ -template <class FunctorType, class T> -struct FunctorValueInit< - FunctorType, void, - T& - // First substitution failure when FunctorType::init does not exist. - // Second substitution failure when FunctorType::init is not compatible. - , - decltype(FunctorValueInitFunction<FunctorType, void>::enable_if( - &FunctorType::init))> { - KOKKOS_FORCEINLINE_FUNCTION static T& init(const FunctorType& f, void* p) { - f.init(*((T*)p)); - return *((T*)p); - } -}; - -/* 'init' function provided for array value */ -template <class FunctorType, class T> -struct FunctorValueInit< - FunctorType, void, - T* - // First substitution failure when FunctorType::init does not exist. - // Second substitution failure when FunctorType::init is not compatible - , - decltype(FunctorValueInitFunction<FunctorType, void>::enable_if( - &FunctorType::init))> { - KOKKOS_FORCEINLINE_FUNCTION static T* init(const FunctorType& f, void* p) { - f.init((T*)p); - return (T*)p; - } -}; - -/* 'init' function provided for single value */ -template <class FunctorType, class ArgTag, class T> -struct FunctorValueInit< - FunctorType, ArgTag, - T& - // First substitution failure when FunctorType::init does not exist. - // Second substitution failure when FunctorType::init is not compatible. - , - typename std::enable_if< - !std::is_same<ArgTag, void>::value, - decltype(FunctorValueInitFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::init))>::type> { - KOKKOS_FORCEINLINE_FUNCTION static T& init(const FunctorType& f, void* p) { - f.init(ArgTag(), *((T*)p)); - return *((T*)p); - } -}; - -/* 'init' function provided for array value */ -template <class FunctorType, class ArgTag, class T> -struct FunctorValueInit< - FunctorType, ArgTag, - T* - // First substitution failure when FunctorType::init does not exist. - // Second substitution failure when FunctorType::init is not compatible - , - typename std::enable_if< - !std::is_same<ArgTag, void>::value, - decltype(FunctorValueInitFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::init))>::type> { - KOKKOS_FORCEINLINE_FUNCTION static T* init(const FunctorType& f, void* p) { - f.init(ArgTag(), (T*)p); - return (T*)p; - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -// Signatures for compatible FunctorType::join with tag and not an array -template <class FunctorType, class ArgTag, - bool IsArray = - 0 == FunctorValueTraits<FunctorType, ArgTag>::StaticValueSize> -struct FunctorValueJoinFunction { - using value_type = - typename FunctorValueTraits<FunctorType, ArgTag>::value_type; - - using vref_type = volatile value_type&; - using cvref_type = const volatile value_type&; - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, vref_type, cvref_type) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, vref_type, cvref_type) const); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag, vref_type, - cvref_type)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag const&, - vref_type, cvref_type)); -}; - -// Signatures for compatible FunctorType::join with tag and is an array -template <class FunctorType, class ArgTag> -struct FunctorValueJoinFunction<FunctorType, ArgTag, true> { - using value_type = - typename FunctorValueTraits<FunctorType, ArgTag>::value_type; - - using vptr_type = volatile value_type*; - using cvptr_type = const volatile value_type*; - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, vptr_type, cvptr_type) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, vptr_type, cvptr_type) const); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag, vptr_type, - cvptr_type)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag const&, - vptr_type, cvptr_type)); -}; - -// Signatures for compatible FunctorType::join without tag and not an array -template <class FunctorType> -struct FunctorValueJoinFunction<FunctorType, void, false> { - using value_type = typename FunctorValueTraits<FunctorType, void>::value_type; - - using vref_type = volatile value_type&; - using cvref_type = const volatile value_type&; - - KOKKOS_INLINE_FUNCTION static void enable_if(void (FunctorType::*)(vref_type, - cvref_type) - const); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(vref_type, cvref_type)); -}; - -// Signatures for compatible FunctorType::join without tag and is an array -template <class FunctorType> -struct FunctorValueJoinFunction<FunctorType, void, true> { - using value_type = typename FunctorValueTraits<FunctorType, void>::value_type; - - using vptr_type = volatile value_type*; - using cvptr_type = const volatile value_type*; - - KOKKOS_INLINE_FUNCTION static void enable_if(void (FunctorType::*)(vptr_type, - cvptr_type) - const); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(vptr_type, cvptr_type)); -}; - -template <class FunctorType, class ArgTag, - class T = - typename FunctorValueTraits<FunctorType, ArgTag>::reference_type, - class Enable = void> -struct FunctorValueJoin; - -/* No 'join' function provided, single value */ -template <class FunctorType, class ArgTag, class T, class Enable> -struct FunctorValueJoin<FunctorType, ArgTag, T&, Enable> { - KOKKOS_FORCEINLINE_FUNCTION - FunctorValueJoin(const FunctorType&) {} - - KOKKOS_FORCEINLINE_FUNCTION static void join(const FunctorType& /*f*/, - volatile void* const lhs, - const volatile void* const rhs) { - *((volatile T*)lhs) += *((const volatile T*)rhs); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(volatile T& lhs, const volatile T& rhs) const { lhs += rhs; } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(T& lhs, const T& rhs) const { lhs += rhs; } -}; - -/* No 'join' function provided, array of values */ -template <class FunctorType, class ArgTag, class T, class Enable> -struct FunctorValueJoin<FunctorType, ArgTag, T*, Enable> { - const FunctorType& f; - - KOKKOS_FORCEINLINE_FUNCTION - FunctorValueJoin(const FunctorType& f_) : f(f_) {} - - KOKKOS_FORCEINLINE_FUNCTION static void join(const FunctorType& f_, - volatile void* const lhs, - const volatile void* const rhs) { - const int n = FunctorValueTraits<FunctorType, ArgTag>::value_count(f_); - - for (int i = 0; i < n; ++i) { - ((volatile T*)lhs)[i] += ((const volatile T*)rhs)[i]; - } - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(volatile T* const lhs, const volatile T* const rhs) const { - const int n = FunctorValueTraits<FunctorType, ArgTag>::value_count(f); - - for (int i = 0; i < n; ++i) { - lhs[i] += rhs[i]; - } - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(T* lhs, const T* rhs) const { - const int n = FunctorValueTraits<FunctorType, ArgTag>::value_count(f); - - for (int i = 0; i < n; ++i) { - lhs[i] += rhs[i]; - } - } -}; - -/* 'join' function provided, single value */ -template <class FunctorType, class ArgTag, class T> -struct FunctorValueJoin< - FunctorType, ArgTag, - T& - // First substitution failure when FunctorType::join does not exist. - // Second substitution failure when enable_if( & Functor::join ) does not - // exist - , - decltype(FunctorValueJoinFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::join))> { - const FunctorType& f; - - KOKKOS_FORCEINLINE_FUNCTION - FunctorValueJoin(const FunctorType& f_) : f(f_) {} - - KOKKOS_FORCEINLINE_FUNCTION static void join(const FunctorType& f_, - volatile void* const lhs, - const volatile void* const rhs) { - f_.join(ArgTag(), *((volatile T*)lhs), *((const volatile T*)rhs)); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(volatile T& lhs, const volatile T& rhs) const { - f.join(ArgTag(), lhs, rhs); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(T& lhs, const T& rhs) const { f.join(ArgTag(), lhs, rhs); } -}; - -/* 'join' function provided, no tag, single value */ -template <class FunctorType, class T> -struct FunctorValueJoin< - FunctorType, void, - T& - // First substitution failure when FunctorType::join does not exist. - // Second substitution failure when enable_if( & Functor::join ) does not - // exist - , - decltype(FunctorValueJoinFunction<FunctorType, void>::enable_if( - &FunctorType::join))> { - const FunctorType& f; - - KOKKOS_FORCEINLINE_FUNCTION - FunctorValueJoin(const FunctorType& f_) : f(f_) {} - - KOKKOS_FORCEINLINE_FUNCTION static void join(const FunctorType& f_, - volatile void* const lhs, - const volatile void* const rhs) { - f_.join(*((volatile T*)lhs), *((const volatile T*)rhs)); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(volatile T& lhs, const volatile T& rhs) const { - f.join(lhs, rhs); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(T& lhs, const T& rhs) const { f.join(lhs, rhs); } -}; - -/* 'join' function provided for array value */ -template <class FunctorType, class ArgTag, class T> -struct FunctorValueJoin< - FunctorType, ArgTag, - T* - // First substitution failure when FunctorType::join does not exist. - // Second substitution failure when enable_if( & Functor::join ) does not - // exist - , - decltype(FunctorValueJoinFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::join))> { - const FunctorType& f; - - KOKKOS_FORCEINLINE_FUNCTION - FunctorValueJoin(const FunctorType& f_) : f(f_) {} - - KOKKOS_FORCEINLINE_FUNCTION static void join(const FunctorType& f_, - volatile void* const lhs, - const volatile void* const rhs) { - f_.join(ArgTag(), (volatile T*)lhs, (const volatile T*)rhs); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(volatile T* const lhs, const volatile T* const rhs) const { - f.join(ArgTag(), lhs, rhs); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(T* lhs, const T* rhs) const { f.join(ArgTag(), lhs, rhs); } -}; - -/* 'join' function provided, no tag, array value */ -template <class FunctorType, class T> -struct FunctorValueJoin< - FunctorType, void, - T* - // First substitution failure when FunctorType::join does not exist. - // Second substitution failure when enable_if( & Functor::join ) does not - // exist - , - decltype(FunctorValueJoinFunction<FunctorType, void>::enable_if( - &FunctorType::join))> { - const FunctorType& f; - - KOKKOS_FORCEINLINE_FUNCTION - FunctorValueJoin(const FunctorType& f_) : f(f_) {} - - KOKKOS_FORCEINLINE_FUNCTION static void join(const FunctorType& f_, - volatile void* const lhs, - const volatile void* const rhs) { - f_.join((volatile T*)lhs, (const volatile T*)rhs); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(volatile T* const lhs, const volatile T* const rhs) const { - f.join(lhs, rhs); - } - KOKKOS_FORCEINLINE_FUNCTION - void operator()(T* lhs, const T* rhs) const { f.join(lhs, rhs); } -}; - -} // namespace Impl -} // namespace Kokkos - -namespace Kokkos { - -namespace Impl { - -template <typename ValueType, class JoinOp, class Enable = void> -struct JoinLambdaAdapter { - using value_type = ValueType; - const JoinOp& lambda; - KOKKOS_INLINE_FUNCTION - JoinLambdaAdapter(const JoinOp& lambda_) : lambda(lambda_) {} - - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, const volatile value_type& src) const { - lambda(dst, src); - } - - KOKKOS_INLINE_FUNCTION - void join(value_type& dst, const value_type& src) const { lambda(dst, src); } - - KOKKOS_INLINE_FUNCTION - void operator()(volatile value_type& dst, - const volatile value_type& src) const { - lambda(dst, src); - } - - KOKKOS_INLINE_FUNCTION - void operator()(value_type& dst, const value_type& src) const { - lambda(dst, src); - } -}; - -template <typename ValueType, class JoinOp> -struct JoinLambdaAdapter<ValueType, JoinOp, - decltype(FunctorValueJoinFunction< - JoinOp, void>::enable_if(&JoinOp::join))> { - using value_type = ValueType; - static_assert( - std::is_same<ValueType, typename JoinOp::value_type>::value, - "JoinLambdaAdapter static_assert Fail: ValueType != JoinOp::value_type"); - - const JoinOp& lambda; - KOKKOS_INLINE_FUNCTION - JoinLambdaAdapter(const JoinOp& lambda_) : lambda(lambda_) {} - - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, const volatile value_type& src) const { - lambda.join(dst, src); - } - - KOKKOS_INLINE_FUNCTION - void join(value_type& dst, const value_type& src) const { - lambda.join(dst, src); - } - - KOKKOS_INLINE_FUNCTION - void operator()(volatile value_type& dst, - const volatile value_type& src) const { - lambda.join(dst, src); - } - - KOKKOS_INLINE_FUNCTION - void operator()(value_type& dst, const value_type& src) const { - lambda.join(dst, src); - } -}; - -template <typename ValueType> -struct JoinAdd { - using value_type = ValueType; - - KOKKOS_DEFAULTED_FUNCTION - JoinAdd() = default; - - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, const volatile value_type& src) const { - dst += src; - } - KOKKOS_INLINE_FUNCTION - void operator()(value_type& dst, const value_type& src) const { dst += src; } - KOKKOS_INLINE_FUNCTION - void operator()(volatile value_type& dst, - const volatile value_type& src) const { - dst += src; - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template <class FunctorType, class ArgTag, - class T = - typename FunctorValueTraits<FunctorType, ArgTag>::reference_type> -struct FunctorValueOps; - -template <class FunctorType, class ArgTag, class T> -struct FunctorValueOps<FunctorType, ArgTag, T&> { - KOKKOS_FORCEINLINE_FUNCTION static T* pointer(T& r) { return &r; } - - KOKKOS_FORCEINLINE_FUNCTION static T& reference(void* p) { return *((T*)p); } - - KOKKOS_FORCEINLINE_FUNCTION static void copy(const FunctorType&, - void* const lhs, - const void* const rhs) { - *((T*)lhs) = *((const T*)rhs); - } -}; - -/* No 'join' function provided, array of values */ -template <class FunctorType, class ArgTag, class T> -struct FunctorValueOps<FunctorType, ArgTag, T*> { - KOKKOS_FORCEINLINE_FUNCTION static T* pointer(T* p) { return p; } - - KOKKOS_FORCEINLINE_FUNCTION static T* reference(void* p) { return ((T*)p); } - - KOKKOS_FORCEINLINE_FUNCTION static void copy(const FunctorType& f, - void* const lhs, - const void* const rhs) { - const int n = FunctorValueTraits<FunctorType, ArgTag>::value_count(f); - for (int i = 0; i < n; ++i) { - ((T*)lhs)[i] = ((const T*)rhs)[i]; - } - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -// Compatible functions for 'final' function and value_type not an array -template <class FunctorType, class ArgTag, - bool IsArray = - 0 == FunctorValueTraits<FunctorType, ArgTag>::StaticValueSize> -struct FunctorFinalFunction { - using value_type = - typename FunctorValueTraits<FunctorType, ArgTag>::value_type; - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, value_type&) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, value_type&) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, value_type&)); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, value_type&)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag, value_type&)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag const&, - value_type&)); - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, value_type const&) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, value_type const&) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, value_type const&)); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, value_type const&)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag, - value_type const&)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag const&, - value_type const&)); -}; - -// Compatible functions for 'final' function and value_type is an array -template <class FunctorType, class ArgTag> -struct FunctorFinalFunction<FunctorType, ArgTag, true> { - using value_type = - typename FunctorValueTraits<FunctorType, ArgTag>::value_type; - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, value_type*) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, value_type*) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, value_type*)); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, value_type*)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag, value_type*)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag const&, - value_type*)); - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, value_type const*) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, value_type const*) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag, value_type const*)); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(ArgTag const&, value_type const*)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag, - value_type const*)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ArgTag const&, - value_type const*)); -}; - -template <class FunctorType> -struct FunctorFinalFunction<FunctorType, void, false> { - using value_type = typename FunctorValueTraits<FunctorType, void>::value_type; - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(value_type&) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(value_type&)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(value_type&)); - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(const value_type&) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(const value_type&)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(const value_type&)); -}; - -template <class FunctorType> -struct FunctorFinalFunction<FunctorType, void, true> { - using value_type = typename FunctorValueTraits<FunctorType, void>::value_type; - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(value_type*) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(value_type*)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(value_type*)); - - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(const value_type*) const); - KOKKOS_INLINE_FUNCTION static void enable_if( - void (FunctorType::*)(const value_type*)); - KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(const value_type*)); -}; - -/* No 'final' function provided */ -template <class FunctorType, class ArgTag, - class ResultType = - typename FunctorValueTraits<FunctorType, ArgTag>::reference_type, - class Enable = void> -struct FunctorFinal { - KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType&, void*) {} -}; - -/* 'final' function provided for single value but no tag*/ -template <class FunctorType, class ArgTag, class T> -struct FunctorFinal< - FunctorType, ArgTag, - T& - // First substitution failure when FunctorType::final does not exist. - // Second substitution failure when FunctorType::final is not compatible. - , - typename std::enable_if< - std::is_same<ArgTag, void>::value, - decltype(FunctorFinalFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::final))>::type> { - KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType& f, void* p) { - f.final(*((T*)p)); - } -}; - -/* 'final' function provided for array value but no tag*/ -template <class FunctorType, class ArgTag, class T> -struct FunctorFinal< - FunctorType, ArgTag, - T* - // First substitution failure when FunctorType::final does not exist. - // Second substitution failure when FunctorType::final is not compatible. - , - typename std::enable_if< - std::is_same<ArgTag, void>::value, - decltype(FunctorFinalFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::final))>::type> { - KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType& f, void* p) { - f.final((T*)p); - } -}; - -/* 'final' function provided for single value and with tag */ -template <class FunctorType, class ArgTag, class T> -struct FunctorFinal< - FunctorType, ArgTag, - T& - // First substitution failure when FunctorType::final does not exist. - // Second substitution failure when FunctorType::final is not compatible. - , - typename std::enable_if< - !std::is_same<ArgTag, void>::value, - decltype(FunctorFinalFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::final))>::type> { - KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType& f, void* p) { - f.final(ArgTag(), *((T*)p)); - } -}; - -/* 'final' function provided for array value and with tag */ -template <class FunctorType, class ArgTag, class T> -struct FunctorFinal< - FunctorType, ArgTag, - T* - // First substitution failure when FunctorType::final does not exist. - // Second substitution failure when FunctorType::final is not compatible. - , - typename std::enable_if< - !std::is_same<ArgTag, void>::value, - decltype(FunctorFinalFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::final))>::type> { - KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType& f, void* p) { - f.final(ArgTag(), (T*)p); - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* KOKKOS_FUNCTORADAPTER_HPP */ diff --git a/packages/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp b/packages/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp index 7140154e0f6f276dc928dc5f3a73cda97f6e2cec..6569e49014fa2410a66e0025f0e0b6a2d4ffaad6 100644 --- a/packages/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp @@ -61,6 +61,35 @@ struct FunctorPatternInterface { struct SCAN {}; }; +template <typename T> +struct DeduceFunctorPatternInterface; + +template <class FunctorType, class ExecPolicy, class ExecutionSpace> +struct DeduceFunctorPatternInterface< + ParallelFor<FunctorType, ExecPolicy, ExecutionSpace>> { + using type = FunctorPatternInterface::FOR; +}; + +template <class FunctorType, class ExecPolicy, class ReducerType, + class ExecutionSpace> +struct DeduceFunctorPatternInterface< + ParallelReduce<FunctorType, ExecPolicy, ReducerType, ExecutionSpace>> { + using type = FunctorPatternInterface::REDUCE; +}; + +template <class FunctorType, class ExecPolicy, class ExecutionSpace> +struct DeduceFunctorPatternInterface< + ParallelScan<FunctorType, ExecPolicy, ExecutionSpace>> { + using type = FunctorPatternInterface::SCAN; +}; + +template <class FunctorType, class ExecPolicy, class ReturnType, + class ExecutionSpace> +struct DeduceFunctorPatternInterface<ParallelScanWithTotal< + FunctorType, ExecPolicy, ReturnType, ExecutionSpace>> { + using type = FunctorPatternInterface::SCAN; +}; + /** \brief Query Functor and execution policy argument tag for value type. * * If 'value_type' is not explicitly declared in the functor @@ -79,17 +108,16 @@ struct FunctorAnalysis { //---------------------------------------- - struct VOID {}; + struct void_tag {}; template <typename P = Policy, typename = std::false_type> struct has_work_tag { using type = void; - using wtag = VOID; + using wtag = void_tag; }; template <typename P> - struct has_work_tag<P, - typename std::is_same<typename P::work_tag, void>::type> { + struct has_work_tag<P, typename std::is_void<typename P::work_tag>::type> { using type = typename P::work_tag; using wtag = typename P::work_tag; }; @@ -108,7 +136,7 @@ struct FunctorAnalysis { template <typename T> struct has_execution_space< - T, typename std::is_same<typename T::execution_space, void>::type> { + T, typename std::is_void<typename T::execution_space>::type> { using type = typename T::execution_space; enum : bool { value = true }; }; @@ -130,8 +158,8 @@ struct FunctorAnalysis { }; template <typename F> - struct has_value_type< - F, typename std::is_same<typename F::value_type, void>::type> { + struct has_value_type<F, + typename std::is_void<typename F::value_type>::type> { using type = typename F::value_type; static_assert(!std::is_reference<type>::value && @@ -147,7 +175,7 @@ struct FunctorAnalysis { template <typename F, typename P = PatternInterface, typename V = typename has_value_type<F>::type, - bool T = std::is_same<Tag, void>::value> + bool T = std::is_void<Tag>::value> struct deduce_value_type { using type = V; }; @@ -288,50 +316,46 @@ struct FunctorAnalysis { using candidate_type = typename deduce_value_type<Functor>::type; enum { - candidate_is_void = std::is_same<candidate_type, void>::value, + candidate_is_void = std::is_void<candidate_type>::value, candidate_is_array = std::rank<candidate_type>::value == 1 }; //---------------------------------------- public: - using execution_space = typename std::conditional< - functor_has_space::value, typename functor_has_space::type, - typename std::conditional<policy_has_space::value, - typename policy_has_space::type, - Kokkos::DefaultExecutionSpace>::type>::type; + using execution_space = + std::conditional_t<functor_has_space::value, + typename functor_has_space::type, + std::conditional_t<policy_has_space::value, + typename policy_has_space::type, + Kokkos::DefaultExecutionSpace>>; - using value_type = typename std::remove_extent<candidate_type>::type; + using value_type = std::remove_extent_t<candidate_type>; static_assert(!std::is_const<value_type>::value, "Kokkos functor operator reduce argument cannot be const"); private: // Stub to avoid defining a type 'void &' - using ValueType = - typename std::conditional<candidate_is_void, VOID, value_type>::type; + using ValueType = std::conditional_t<candidate_is_void, void_tag, value_type>; public: - using pointer_type = - typename std::conditional<candidate_is_void, void, ValueType*>::type; + using pointer_type = std::conditional_t<candidate_is_void, void, ValueType*>; - using reference_type = typename std::conditional< + using reference_type = std::conditional_t< candidate_is_array, ValueType*, - typename std::conditional<!candidate_is_void, ValueType&, - void>::type>::type; + std::conditional_t<!candidate_is_void, ValueType&, void>>; private: template <bool IsArray, class FF> - KOKKOS_INLINE_FUNCTION static constexpr - typename std::enable_if<IsArray, unsigned>::type - get_length(FF const& f) { + KOKKOS_INLINE_FUNCTION static constexpr std::enable_if_t<IsArray, unsigned> + get_length(FF const& f) { return f.value_count; } template <bool IsArray, class FF> - KOKKOS_INLINE_FUNCTION static constexpr - typename std::enable_if<!IsArray, unsigned>::type - get_length(FF const&) { + KOKKOS_INLINE_FUNCTION static constexpr std::enable_if_t<!IsArray, unsigned> + get_length(FF const&) { return candidate_is_void ? 0 : 1; } @@ -367,29 +391,52 @@ struct FunctorAnalysis { } private: - enum INTERFACE : int { - DISABLE = 0, - NO_TAG_NOT_ARRAY = 1, - NO_TAG_IS_ARRAY = 2, - HAS_TAG_NOT_ARRAY = 3, - HAS_TAG_IS_ARRAY = 4, - DEDUCED = - !std::is_same<PatternInterface, REDUCE>::value - ? DISABLE - : (std::is_same<Tag, void>::value - ? (candidate_is_array ? NO_TAG_IS_ARRAY : NO_TAG_NOT_ARRAY) - : (candidate_is_array ? HAS_TAG_IS_ARRAY - : HAS_TAG_NOT_ARRAY)) - }; - //---------------------------------------- // parallel_reduce join operator - template <class F, INTERFACE> - struct has_join_function; + template <class F, bool is_array = candidate_is_array> + struct has_join_no_tag_function; + + template <class F> + struct has_join_no_tag_function<F, /*is_array*/ false> { + using ref_type = ValueType&; + using cref_type = const ValueType&; + + KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(ref_type, + cref_type) const); + + KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ref_type, cref_type)); + + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { + f->join(*dst, *src); + } + }; + + template <class F> + struct has_join_no_tag_function<F, /*is_array*/ true> { + using ref_type = ValueType*; + using cref_type = const ValueType*; + + KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(ref_type, + cref_type) const); + + KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ref_type, cref_type)); + + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { + f->join(dst, src); + } + }; + + template <class F, bool is_array = candidate_is_array> + struct has_volatile_join_no_tag_function; template <class F> - struct has_join_function<F, NO_TAG_NOT_ARRAY> { + struct KOKKOS_DEPRECATED_WITH_COMMENT( + "Reduce/scan join() taking `volatile`-qualified parameters is " + "deprecated. Remove the `volatile` qualifier.") + has_volatile_join_no_tag_function<F, /*is_array*/ false> { using vref_type = volatile ValueType&; using cvref_type = const volatile ValueType&; @@ -399,15 +446,17 @@ struct FunctorAnalysis { KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(vref_type, cvref_type)); - KOKKOS_INLINE_FUNCTION static void join(F const* const f, - ValueType volatile* dst, - ValueType volatile const* src) { + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { f->join(*dst, *src); } }; template <class F> - struct has_join_function<F, NO_TAG_IS_ARRAY> { + struct KOKKOS_DEPRECATED_WITH_COMMENT( + "Reduce/scan join() taking `volatile`-qualified parameters is " + "deprecated. Remove the `volatile` qualifier.") + has_volatile_join_no_tag_function<F, /*is_array*/ true> { using vref_type = volatile ValueType*; using cvref_type = const volatile ValueType*; @@ -417,15 +466,71 @@ struct FunctorAnalysis { KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(vref_type, cvref_type)); - KOKKOS_INLINE_FUNCTION static void join(F const* const f, - ValueType volatile* dst, - ValueType volatile const* src) { + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { f->join(dst, src); } }; + template <class F, bool is_array = candidate_is_array> + struct has_join_tag_function; + template <class F> - struct has_join_function<F, HAS_TAG_NOT_ARRAY> { + struct has_join_tag_function<F, /*is_array*/ false> { + using ref_type = ValueType&; + using cref_type = const ValueType&; + + KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(WTag, ref_type, + cref_type) const); + + KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(WTag, ref_type, + cref_type)); + + KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(WTag const&, + ref_type, + cref_type) const); + + KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(WTag const&, ref_type, + cref_type)); + + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { + f->join(WTag(), *dst, *src); + } + }; + + template <class F> + struct has_join_tag_function<F, /*is_array*/ true> { + using ref_type = ValueType*; + using cref_type = const ValueType*; + + KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(WTag, ref_type, + cref_type) const); + + KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(WTag, ref_type, + cref_type)); + + KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(WTag const&, + ref_type, + cref_type) const); + + KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(WTag const&, ref_type, + cref_type)); + + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { + f->join(WTag(), dst, src); + } + }; + + template <class F, bool is_array = candidate_is_array> + struct has_volatile_join_tag_function; + + template <class F> + struct KOKKOS_DEPRECATED_WITH_COMMENT( + "Reduce/scan join() taking `volatile`-qualified parameters is " + "deprecated. Remove the `volatile` qualifier.") + has_volatile_join_tag_function<F, /*is_array*/ false> { using vref_type = volatile ValueType&; using cvref_type = const volatile ValueType&; @@ -443,15 +548,17 @@ struct FunctorAnalysis { vref_type, cvref_type)); - KOKKOS_INLINE_FUNCTION static void join(F const* const f, - ValueType volatile* dst, - ValueType volatile const* src) { + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { f->join(WTag(), *dst, *src); } }; template <class F> - struct has_join_function<F, HAS_TAG_IS_ARRAY> { + struct KOKKOS_DEPRECATED_WITH_COMMENT( + "Reduce/scan join() taking `volatile`-qualified parameters is " + "deprecated. Remove the `volatile` qualifier.") + has_volatile_join_tag_function<F, /*is_array*/ true> { using vref_type = volatile ValueType*; using cvref_type = const volatile ValueType*; @@ -469,47 +576,112 @@ struct FunctorAnalysis { vref_type, cvref_type)); - KOKKOS_INLINE_FUNCTION static void join(F const* const f, - ValueType volatile* dst, - ValueType volatile const* src) { + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { f->join(WTag(), dst, src); } }; - template <class F = Functor, INTERFACE = DEDUCED, typename = void> - struct DeduceJoin { + template <class F, class = void> + struct detected_join_no_tag { enum : bool { value = false }; + }; - KOKKOS_INLINE_FUNCTION static void join(F const* const f, - ValueType volatile* dst, - ValueType volatile const* src) { + template <class F> + struct detected_join_no_tag< + F, decltype(has_join_no_tag_function<F>::enable_if(&F::join))> { + enum : bool { value = true }; + }; + + template <class F, class = void> + struct detected_volatile_join_no_tag { + enum : bool { value = false }; + }; + + template <class F> + struct detected_volatile_join_no_tag< + F, decltype(has_volatile_join_no_tag_function<F>::enable_if(&F::join))> { + enum : bool { value = true }; + }; + + template <class F, class = void> + struct detected_join_tag { + enum : bool { value = false }; + }; + + template <class F> + struct detected_join_tag<F, decltype(has_join_tag_function<F>::enable_if( + &F::join))> { + enum : bool { value = true }; + }; + + template <class F, class = void> + struct detected_volatile_join_tag { + enum : bool { value = false }; + }; + + template <class F> + struct detected_volatile_join_tag< + F, decltype(has_volatile_join_tag_function<F>::enable_if(&F::join))> { + enum : bool { value = true }; + }; + + template <class F = Functor, typename = void> + struct DeduceJoinNoTag { + enum : bool { value = false }; + + KOKKOS_INLINE_FUNCTION static void join(F const* const f, ValueType* dst, + ValueType const* src) { const int n = FunctorAnalysis::value_count(*f); for (int i = 0; i < n; ++i) dst[i] += src[i]; } }; template <class F> - struct DeduceJoin<F, DISABLE, void> { - enum : bool { value = false }; + struct DeduceJoinNoTag<F, std::enable_if_t<(is_reducer<F>::value || + (!is_reducer<F>::value && + std::is_void<Tag>::value)) && + detected_join_no_tag<F>::value>> + : public has_join_no_tag_function<F> { + enum : bool { value = true }; + }; - KOKKOS_INLINE_FUNCTION static void join(F const* const, ValueType volatile*, - ValueType volatile const*) {} + template <class F> + struct DeduceJoinNoTag< + F, + std::enable_if_t<(is_reducer<F>::value || + (!is_reducer<F>::value && std::is_void<Tag>::value)) && + (!detected_join_no_tag<F>::value && + detected_volatile_join_no_tag<F>::value)>> + : public has_volatile_join_no_tag_function<F> { + enum : bool { value = true }; }; - template <class F, INTERFACE I> - struct DeduceJoin<F, I, - decltype(has_join_function<F, I>::enable_if(&F::join))> - : public has_join_function<F, I> { + template <class F = Functor, typename = void> + struct DeduceJoin : public DeduceJoinNoTag<F> {}; + + template <class F> + struct DeduceJoin< + F, std::enable_if_t<!is_reducer<F>::value && detected_join_tag<F>::value>> + : public has_join_tag_function<F> { + enum : bool { value = true }; + }; + + template <class F> + struct DeduceJoin<F, std::enable_if_t<!is_reducer<F>::value && + (!detected_join_tag<F>::value && + detected_volatile_join_tag<F>::value)>> + : public has_volatile_join_tag_function<F> { enum : bool { value = true }; }; //---------------------------------------- - template <class, INTERFACE> - struct has_init_function; + template <class, bool is_array = candidate_is_array> + struct has_init_no_tag_function; template <class F> - struct has_init_function<F, NO_TAG_NOT_ARRAY> { + struct has_init_no_tag_function<F, /*is_array*/ false> { KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(ValueType&) const); KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ValueType&)); @@ -520,7 +692,7 @@ struct FunctorAnalysis { }; template <class F> - struct has_init_function<F, NO_TAG_IS_ARRAY> { + struct has_init_no_tag_function<F, /*is_array*/ true> { KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(ValueType*) const); KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ValueType*)); @@ -530,8 +702,11 @@ struct FunctorAnalysis { } }; + template <class, bool is_array = candidate_is_array> + struct has_init_tag_function; + template <class F> - struct has_init_function<F, HAS_TAG_NOT_ARRAY> { + struct has_init_tag_function<F, /*is_array*/ false> { KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(WTag, ValueType&) const); @@ -549,7 +724,7 @@ struct FunctorAnalysis { }; template <class F> - struct has_init_function<F, HAS_TAG_IS_ARRAY> { + struct has_init_tag_function<F, /*is_array*/ true> { KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(WTag, ValueType*) const); @@ -566,37 +741,46 @@ struct FunctorAnalysis { } }; - template <class F = Functor, INTERFACE = DEDUCED, typename = void> - struct DeduceInit { + template <class F = Functor, typename = void> + struct DeduceInitNoTag { enum : bool { value = false }; - KOKKOS_INLINE_FUNCTION static void init(F const* const, ValueType* dst) { - new (dst) ValueType(); + KOKKOS_INLINE_FUNCTION static void init(F const* const f, ValueType* dst) { + const int n = FunctorAnalysis::value_count(*f); + for (int i = 0; i < n; ++i) new (&dst[i]) ValueType(); } }; template <class F> - struct DeduceInit<F, DISABLE, void> { - enum : bool { value = false }; - - KOKKOS_INLINE_FUNCTION static void init(F const* const, ValueType*) {} + struct DeduceInitNoTag< + F, std::enable_if_t<is_reducer<F>::value || (!is_reducer<F>::value && + std::is_void<Tag>::value), + decltype(has_init_no_tag_function<F>::enable_if( + &F::init))>> + : public has_init_no_tag_function<F> { + enum : bool { value = true }; }; - template <class F, INTERFACE I> - struct DeduceInit<F, I, - decltype(has_init_function<F, I>::enable_if(&F::init))> - : public has_init_function<F, I> { + template <class F = Functor, typename = void> + struct DeduceInit : public DeduceInitNoTag<F> {}; + + template <class F> + struct DeduceInit< + F, + std::enable_if_t<!is_reducer<F>::value, + decltype(has_init_tag_function<F>::enable_if(&F::init))>> + : public has_init_tag_function<F> { enum : bool { value = true }; }; //---------------------------------------- - template <class, INTERFACE> - struct has_final_function; + template <class, bool is_array = candidate_is_array> + struct has_final_no_tag_function; // No tag, not array template <class F> - struct has_final_function<F, NO_TAG_NOT_ARRAY> { + struct has_final_no_tag_function<F, /*is_array*/ false> { KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(ValueType&) const); KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ValueType&)); @@ -608,7 +792,7 @@ struct FunctorAnalysis { // No tag, is array template <class F> - struct has_final_function<F, NO_TAG_IS_ARRAY> { + struct has_final_no_tag_function<F, /*is_array*/ true> { KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(ValueType*) const); KOKKOS_INLINE_FUNCTION static void enable_if(void (*)(ValueType*)); @@ -618,9 +802,12 @@ struct FunctorAnalysis { } }; + template <class, bool is_array = candidate_is_array> + struct has_final_tag_function; + // Has tag, not array template <class F> - struct has_final_function<F, HAS_TAG_NOT_ARRAY> { + struct has_final_tag_function<F, /*is_array*/ false> { KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(WTag, ValueType&) const); @@ -639,7 +826,7 @@ struct FunctorAnalysis { // Has tag, is array template <class F> - struct has_final_function<F, HAS_TAG_IS_ARRAY> { + struct has_final_tag_function<F, /*is_array*/ true> { KOKKOS_INLINE_FUNCTION static void enable_if(void (F::*)(WTag, ValueType*) const); @@ -656,18 +843,32 @@ struct FunctorAnalysis { } }; - template <class F = Functor, INTERFACE = DEDUCED, typename = void> - struct DeduceFinal { + template <class F = Functor, typename = void> + struct DeduceFinalNoTag { enum : bool { value = false }; KOKKOS_INLINE_FUNCTION static void final(F const* const, ValueType*) {} }; - template <class F, INTERFACE I> - struct DeduceFinal<F, I, - decltype(has_final_function<F, I>::enable_if(&F::final))> - : public has_final_function<F, I> { + template <class F> + struct DeduceFinalNoTag< + F, std::enable_if_t<is_reducer<F>::value || (!is_reducer<F>::value && + std::is_void<Tag>::value), + decltype(has_final_no_tag_function<F>::enable_if( + &F::final))>> + : public has_final_no_tag_function<F> { + enum : bool { value = true }; + }; + + template <class F = Functor, typename = void> + struct DeduceFinal : public DeduceFinalNoTag<F> {}; + + template <class F> + struct DeduceFinal<F, std::enable_if_t<!is_reducer<F>::value, + decltype(has_final_tag_function< + F>::enable_if(&F::final))>> + : public has_final_tag_function<F> { enum : bool { value = true }; }; @@ -681,8 +882,7 @@ struct FunctorAnalysis { }; template <class F> - struct DeduceTeamShmem< - F, typename std::enable_if<0 < sizeof(&F::team_shmem_size)>::type> { + struct DeduceTeamShmem<F, std::enable_if_t<0 < sizeof(&F::team_shmem_size)>> { enum : bool { value = true }; static size_t team_shmem_size(F const* const f, int team_size) { @@ -691,8 +891,9 @@ struct FunctorAnalysis { }; template <class F> - struct DeduceTeamShmem< - F, typename std::enable_if<0 < sizeof(&F::shmem_size)>::type> { + struct DeduceTeamShmem<F, + std::enable_if_t<(0 < sizeof(&F::shmem_size)) && + !(0 < sizeof(&F::team_shmem_size))>> { enum : bool { value = true }; static size_t team_shmem_size(F const* const f, int team_size) { @@ -713,54 +914,44 @@ struct FunctorAnalysis { enum { has_init_member_function = DeduceInit<>::value }; enum { has_final_member_function = DeduceFinal<>::value }; - template <class MemorySpace = typename execution_space::memory_space> + static_assert((Kokkos::is_reducer<Functor>::value && + has_join_member_function) || + !Kokkos::is_reducer<Functor>::value, + "Reducer must have a join member function!"); + struct Reducer { private: Functor const* const m_functor; - ValueType* const m_result; - - template <bool IsArray> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<IsArray, - typename FunctorAnalysis::ValueType*>::type - ref() const noexcept { - return m_result; - } template <bool IsArray> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<!IsArray, - typename FunctorAnalysis::ValueType&>::type - ref() const noexcept { - return *m_result; - } - - template <bool IsArray> - KOKKOS_INLINE_FUNCTION constexpr typename std::enable_if<IsArray, int>::type - len() const noexcept { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t<IsArray, int> len() const + noexcept { return m_functor->value_count; } template <bool IsArray> - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if<!IsArray, int>::type - len() const noexcept { + KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t<!IsArray, int> len() const + noexcept { return candidate_is_void ? 0 : 1; } public: using reducer = Reducer; - using value_type = FunctorAnalysis::value_type; - using memory_space = MemorySpace; + using value_type = std::remove_const_t<FunctorAnalysis::value_type>; + using pointer_type = value_type*; using reference_type = FunctorAnalysis::reference_type; using functor_type = Functor; // Adapts a functor - KOKKOS_INLINE_FUNCTION constexpr value_type* data() const noexcept { - return m_result; + template <bool is_array = candidate_is_array> + KOKKOS_INLINE_FUNCTION static std::enable_if_t<is_array, reference_type> + reference(ValueType* dst) noexcept { + return dst; } - KOKKOS_INLINE_FUNCTION constexpr reference_type reference() const noexcept { - return Reducer::template ref<candidate_is_array>(); + template <bool is_array = candidate_is_array> + KOKKOS_INLINE_FUNCTION static std::enable_if_t<!is_array, reference_type> + reference(ValueType* dst) noexcept { + return *dst; } KOKKOS_INLINE_FUNCTION constexpr int length() const noexcept { @@ -774,14 +965,14 @@ struct FunctorAnalysis { } KOKKOS_INLINE_FUNCTION - void join(ValueType volatile* dst, ValueType volatile const* src) const - noexcept { + void join(ValueType* dst, ValueType const* src) const noexcept { DeduceJoin<>::join(m_functor, dst, src); } - KOKKOS_INLINE_FUNCTION - void init(ValueType* dst) const noexcept { + KOKKOS_INLINE_FUNCTION reference_type init(ValueType* const dst) const + noexcept { DeduceInit<>::init(m_functor, dst); + return reference(dst); } KOKKOS_INLINE_FUNCTION @@ -793,13 +984,11 @@ struct FunctorAnalysis { Reducer(Reducer&&) = default; Reducer& operator=(Reducer const&) = delete; Reducer& operator=(Reducer&&) = delete; - - template <class S> - using rebind = Reducer<S>; + ~Reducer() = default; KOKKOS_INLINE_FUNCTION explicit constexpr Reducer( - Functor const* arg_functor = 0, ValueType* arg_value = nullptr) noexcept - : m_functor(arg_functor), m_result(arg_value) {} + Functor const* arg_functor) noexcept + : m_functor(arg_functor) {} }; }; diff --git a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index 6fc649cfc4452caebe3408edb02db546d16d0f37..d533ec05cdcf5a5d9b808e9ae497d698c7d2c1b0 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #include <cstddef> diff --git a/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp b/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp index 4f93eebc0ed3f7c8061bf62b24db7bed5a0a5fde..9ad2dae55181622aa38458e1409a296711476ffd 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #include <impl/Kokkos_HostBarrier.hpp> diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp index 1728fe90c8fbe404724b6962a4be88692a9245ed..4bf90489193e5e0938a30d1fb96751fc7bc87dea 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #include <impl/Kokkos_Error.hpp> @@ -50,7 +54,7 @@ /*--------------------------------------------------------------------------*/ -#if defined(__INTEL_COMPILER) && !defined(KOKKOS_ENABLE_CUDA) +#if defined(KOKKOS_COMPILER_INTEL) && !defined(KOKKOS_ENABLE_CUDA) // Intel specialized allocator does not interoperate with CUDA memory allocation @@ -60,34 +64,6 @@ /*--------------------------------------------------------------------------*/ -#if defined(KOKKOS_ENABLE_POSIX_MEMALIGN) - -#include <unistd.h> -#include <sys/mman.h> - -/* mmap flags for private anonymous memory allocation */ - -#if defined(MAP_ANONYMOUS) && defined(MAP_PRIVATE) -#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) -#elif defined(MAP_ANON) && defined(MAP_PRIVATE) -#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON) -#endif - -// mmap flags for huge page tables -// the Cuda driver does not interoperate with MAP_HUGETLB -#if defined(KOKKOS_IMPL_POSIX_MMAP_FLAGS) -#if defined(MAP_HUGETLB) && !defined(KOKKOS_ENABLE_CUDA) -#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE \ - (KOKKOS_IMPL_POSIX_MMAP_FLAGS | MAP_HUGETLB) -#else -#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_IMPL_POSIX_MMAP_FLAGS -#endif -#endif - -#endif - -/*--------------------------------------------------------------------------*/ - #include <cstddef> #include <cstdlib> #include <cstdint> @@ -101,11 +77,6 @@ #include <impl/Kokkos_Error.hpp> #include <Kokkos_Atomic.hpp> -#if (defined(KOKKOS_ENABLE_ASM) || defined(KOKKOS_ENABLE_TM)) && \ - defined(KOKKOS_ENABLE_ISA_X86_64) && !defined(KOKKOS_COMPILER_PGI) -#include <immintrin.h> -#endif - //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -116,10 +87,6 @@ HostSpace::HostSpace() : m_alloc_mech( #if defined(KOKKOS_ENABLE_INTEL_MM_ALLOC) HostSpace::INTEL_MM_ALLOC -#elif defined(KOKKOS_IMPL_POSIX_MMAP_FLAGS) - HostSpace::POSIX_MMAP -#elif defined(KOKKOS_ENABLE_POSIX_MEMALIGN) - HostSpace::POSIX_MEMALIGN #else HostSpace::STD_MALLOC #endif @@ -136,23 +103,12 @@ HostSpace::HostSpace(const HostSpace::AllocationMechanism &arg_alloc_mech) else if (arg_alloc_mech == HostSpace::INTEL_MM_ALLOC) { m_alloc_mech = HostSpace::INTEL_MM_ALLOC; } -#elif defined(KOKKOS_ENABLE_POSIX_MEMALIGN) - else if (arg_alloc_mech == HostSpace::POSIX_MEMALIGN) { - m_alloc_mech = HostSpace::POSIX_MEMALIGN; - } -#elif defined(KOKKOS_IMPL_POSIX_MMAP_FLAGS) - else if (arg_alloc_mech == HostSpace::POSIX_MMAP) { - m_alloc_mech = HostSpace::POSIX_MMAP; - } #endif else { const char *const mech = (arg_alloc_mech == HostSpace::INTEL_MM_ALLOC) ? "INTEL_MM_ALLOC" - : ((arg_alloc_mech == HostSpace::POSIX_MEMALIGN) - ? "POSIX_MEMALIGN" - : ((arg_alloc_mech == HostSpace::POSIX_MMAP) ? "POSIX_MMAP" - : "")); + : ((arg_alloc_mech == HostSpace::POSIX_MMAP) ? "POSIX_MMAP" : ""); std::string msg; msg.append("Kokkos::HostSpace "); @@ -215,42 +171,6 @@ void *HostSpace::impl_allocate( ptr = _mm_malloc(arg_alloc_size, alignment); } #endif - -#if defined(KOKKOS_ENABLE_POSIX_MEMALIGN) - else if (m_alloc_mech == POSIX_MEMALIGN) { - posix_memalign(&ptr, alignment, arg_alloc_size); - } -#endif - -#if defined(KOKKOS_IMPL_POSIX_MMAP_FLAGS) - else if (m_alloc_mech == POSIX_MMAP) { - constexpr size_t use_huge_pages = (1u << 27); - constexpr int prot = PROT_READ | PROT_WRITE; - const int flags = arg_alloc_size < use_huge_pages - ? KOKKOS_IMPL_POSIX_MMAP_FLAGS - : KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE; - - // read write access to private memory - - ptr = - mmap(nullptr /* address hint, if nullptr OS kernel chooses address */ - , - arg_alloc_size /* size in bytes */ - , - prot /* memory protection */ - , - flags /* visibility of updates */ - , - -1 /* file descriptor */ - , - 0 /* offset */ - ); - - /* Associated reallocation: - ptr = mremap( old_ptr , old_size , new_size , MREMAP_MAYMOVE ); - */ - } -#endif } if ((ptr == nullptr) || (reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0)) || @@ -324,18 +244,6 @@ void HostSpace::impl_deallocate( _mm_free(arg_alloc_ptr); } #endif - -#if defined(KOKKOS_ENABLE_POSIX_MEMALIGN) - else if (m_alloc_mech == POSIX_MEMALIGN) { - free(arg_alloc_ptr); - } -#endif - -#if defined(KOKKOS_IMPL_POSIX_MMAP_FLAGS) - else if (m_alloc_mech == POSIX_MMAP) { - munmap(arg_alloc_ptr, arg_alloc_size); - } -#endif } } @@ -428,56 +336,18 @@ void init_lock_array_host_space() { } bool lock_address_host_space(void *ptr) { -#if defined(KOKKOS_ENABLE_ISA_X86_64) && defined(KOKKOS_ENABLE_TM) && \ - !defined(KOKKOS_COMPILER_PGI) - const unsigned status = _xbegin(); - - if (_XBEGIN_STARTED == status) { - const int val = - HOST_SPACE_ATOMIC_LOCKS[((size_t(ptr) >> 2) & HOST_SPACE_ATOMIC_MASK) ^ - HOST_SPACE_ATOMIC_XOR_MASK]; - - if (0 == val) { - HOST_SPACE_ATOMIC_LOCKS[((size_t(ptr) >> 2) & HOST_SPACE_ATOMIC_MASK) ^ - HOST_SPACE_ATOMIC_XOR_MASK] = 1; - } else { - _xabort(1); - } - - _xend(); - - return 1; - } else { -#endif - return 0 == atomic_compare_exchange( - &HOST_SPACE_ATOMIC_LOCKS[((size_t(ptr) >> 2) & - HOST_SPACE_ATOMIC_MASK) ^ - HOST_SPACE_ATOMIC_XOR_MASK], - 0, 1); -#if defined(KOKKOS_ENABLE_ISA_X86_64) && defined(KOKKOS_ENABLE_TM) && \ - !defined(KOKKOS_COMPILER_PGI) - } -#endif + return 0 == atomic_compare_exchange( + &HOST_SPACE_ATOMIC_LOCKS[((size_t(ptr) >> 2) & + HOST_SPACE_ATOMIC_MASK) ^ + HOST_SPACE_ATOMIC_XOR_MASK], + 0, 1); } void unlock_address_host_space(void *ptr) { -#if defined(KOKKOS_ENABLE_ISA_X86_64) && defined(KOKKOS_ENABLE_TM) && \ - !defined(KOKKOS_COMPILER_PGI) - const unsigned status = _xbegin(); - - if (_XBEGIN_STARTED == status) { - HOST_SPACE_ATOMIC_LOCKS[((size_t(ptr) >> 2) & HOST_SPACE_ATOMIC_MASK) ^ - HOST_SPACE_ATOMIC_XOR_MASK] = 0; - } else { -#endif - atomic_exchange( - &HOST_SPACE_ATOMIC_LOCKS[((size_t(ptr) >> 2) & HOST_SPACE_ATOMIC_MASK) ^ - HOST_SPACE_ATOMIC_XOR_MASK], - 0); -#if defined(KOKKOS_ENABLE_ISA_X86_64) && defined(KOKKOS_ENABLE_TM) && \ - !defined(KOKKOS_COMPILER_PGI) - } -#endif + atomic_exchange( + &HOST_SPACE_ATOMIC_LOCKS[((size_t(ptr) >> 2) & HOST_SPACE_ATOMIC_MASK) ^ + HOST_SPACE_ATOMIC_XOR_MASK], + 0); } } // namespace Impl diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp index 4ac0941a300822e9d86137660b18ef72f61ac33b..e6ef73295420abe6b83bd9b8209d5bd35714cd99 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include "Kokkos_Core.hpp" #include "Kokkos_HostSpace_deepcopy.hpp" @@ -49,6 +53,10 @@ namespace Kokkos { namespace Impl { +void hostspace_fence(const DefaultHostExecutionSpace& exec) { + exec.fence("HostSpace fence"); +} + void hostspace_parallel_deepcopy(void* dst, const void* src, ptrdiff_t n) { Kokkos::DefaultHostExecutionSpace exec; hostspace_parallel_deepcopy_async(exec, dst, src, n); @@ -67,13 +75,13 @@ void hostspace_parallel_deepcopy_async(const DefaultHostExecutionSpace& exec, void* dst, const void* src, ptrdiff_t n) { using policy_t = Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>; - constexpr int host_deep_copy_serial_limit = 10 * 8192; // If the asynchronous HPX backend is enabled, do *not* copy anything // synchronously. The deep copy must be correctly sequenced with respect to // other kernels submitted to the same instance, so we only use the fallback // parallel_for version in this case. #if !(defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH)) + constexpr int host_deep_copy_serial_limit = 10 * 8192; if ((n < host_deep_copy_serial_limit) || (DefaultHostExecutionSpace().concurrency() == 1)) { std::memcpy(dst, src, n); diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp index 6eec3566ab3711c9bc2b60042c56e0cbc99f5953..88d37672d26433b5259fdae354068aa538550dac 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp @@ -51,6 +51,8 @@ namespace Kokkos { namespace Impl { +void hostspace_fence(const DefaultHostExecutionSpace& exec); + void hostspace_parallel_deepcopy(void* dst, const void* src, ptrdiff_t n); // DeepCopy called with an execution space that can't access HostSpace void hostspace_parallel_deepcopy_async(void* dst, const void* src, ptrdiff_t n); diff --git a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp index a7f4a652befb148bafff866d8949edb9f2520eaa..1f1acca5df7a6dfb1c78c193c0eb4d41be93a7d2 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <limits> #include <Kokkos_Macros.hpp> #include <impl/Kokkos_HostThreadTeam.hpp> diff --git a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index 82aed19659b70d90029322892798464772fdd347..7f39f188600297168242432e69c83b779dec20ef 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -49,7 +49,6 @@ #include <Kokkos_Pair.hpp> #include <Kokkos_Atomic.hpp> #include <Kokkos_ExecPolicy.hpp> -#include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_FunctorAnalysis.hpp> #include <impl/Kokkos_HostBarrier.hpp> @@ -113,10 +112,10 @@ class HostThreadTeamData { int64_t* m_team_scratch; // == pool[ 0 + m_team_base ]->m_scratch int m_pool_rank; int m_pool_size; - int m_team_reduce; - int m_team_shared; - int m_thread_local; - int m_scratch_size; + size_t m_team_reduce; + size_t m_team_shared; + size_t m_thread_local; + size_t m_scratch_size; int m_team_base; int m_team_rank; int m_team_size; @@ -184,7 +183,11 @@ class HostThreadTeamData { //---------------------------------------- - constexpr HostThreadTeamData() noexcept +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC bug in NVHPC regarding constexpr + // constructors used in device code + constexpr +#endif + HostThreadTeamData() noexcept : m_work_range(-1, -1), m_work_end(0), m_scratch(nullptr), @@ -205,7 +208,8 @@ class HostThreadTeamData { m_work_chunk(0), m_steal_rank(0), m_pool_rendezvous_step(0), - m_team_rendezvous_step(0) {} + m_team_rendezvous_step(0) { + } //---------------------------------------- // Organize array of members into a pool. @@ -247,33 +251,31 @@ class HostThreadTeamData { //---------------------------------------- - private: - enum : int { mask_to_16 = 0x0f }; // align to 16 bytes - enum : int { shift_to_8 = 3 }; // size to 8 bytes - public: - static constexpr int align_to_int64(int n) { + static constexpr size_t align_to_int64(size_t n) { + constexpr size_t mask_to_16 = 0x0f; // align to 16 bytes + constexpr size_t shift_to_8 = 3; // size to 8 bytes return ((n + mask_to_16) & ~mask_to_16) >> shift_to_8; } - constexpr int pool_reduce_bytes() const { + constexpr size_t pool_reduce_bytes() const { return m_scratch_size ? sizeof(int64_t) * (m_team_reduce - m_pool_reduce) : 0; } - constexpr int team_reduce_bytes() const { + constexpr size_t team_reduce_bytes() const { return sizeof(int64_t) * (m_team_shared - m_team_reduce); } - constexpr int team_shared_bytes() const { + constexpr size_t team_shared_bytes() const { return sizeof(int64_t) * (m_thread_local - m_team_shared); } - constexpr int thread_local_bytes() const { + constexpr size_t thread_local_bytes() const { return sizeof(int64_t) * (m_scratch_size - m_thread_local); } - constexpr int scratch_bytes() const { + constexpr size_t scratch_bytes() const { return sizeof(int64_t) * m_scratch_size; } @@ -310,8 +312,9 @@ class HostThreadTeamData { // thread_local_size = number bytes for thread local memory // Return: // total number of bytes that must be allocated - static size_t scratch_size(int pool_reduce_size, int team_reduce_size, - int team_shared_size, int thread_local_size) { + static size_t scratch_size(size_t pool_reduce_size, size_t team_reduce_size, + size_t team_shared_size, + size_t thread_local_size) { pool_reduce_size = align_to_int64(pool_reduce_size); team_reduce_size = align_to_int64(team_reduce_size); team_shared_size = align_to_int64(team_shared_size); @@ -336,7 +339,7 @@ class HostThreadTeamData { // total number of bytes that must be allocated void scratch_assign(void* const alloc_ptr, size_t const alloc_size, int pool_reduce_size, int team_reduce_size, - int team_shared_size, int /* thread_local_size */) { + size_t team_shared_size, size_t /* thread_local_size */) { pool_reduce_size = align_to_int64(pool_reduce_size); team_reduce_size = align_to_int64(team_reduce_size); team_shared_size = align_to_int64(team_shared_size); @@ -556,18 +559,15 @@ class HostThreadTeamMember { // team_reduce( Max(result) ); template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer) const noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer) const noexcept { team_reduce(reducer, reducer.reference()); } template <typename ReducerType> - KOKKOS_INLINE_FUNCTION - typename std::enable_if<is_reducer<ReducerType>::value>::type - team_reduce(ReducerType const& reducer, - typename ReducerType::value_type contribution) const - noexcept { + KOKKOS_INLINE_FUNCTION std::enable_if_t<is_reducer<ReducerType>::value> + team_reduce(ReducerType const& reducer, + typename ReducerType::value_type contribution) const noexcept { KOKKOS_IF_ON_HOST(( if (1 < m_data.m_team_size) { using value_type = typename ReducerType::value_type; @@ -689,61 +689,59 @@ template <typename iType, typename Member> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct<iType, Member> TeamThreadRange( Member const& member, iType count, - typename std::enable_if< - Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { + std::enable_if_t<Impl::is_thread_team_member<Member>::value> const** = + nullptr) { return Impl::TeamThreadRangeBoundariesStruct<iType, Member>(member, 0, count); } template <typename iType1, typename iType2, typename Member> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Member> + std::common_type_t<iType1, iType2>, Member> TeamThreadRange( Member const& member, iType1 begin, iType2 end, - typename std::enable_if< - Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { + std::enable_if_t<Impl::is_thread_team_member<Member>::value> const** = + nullptr) { return Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Member>(member, begin, - end); + std::common_type_t<iType1, iType2>, Member>(member, begin, end); } template <typename iType, typename Member> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct<iType, Member> TeamVectorRange( Member const& member, iType count, - typename std::enable_if< - Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { + std::enable_if_t<Impl::is_thread_team_member<Member>::value> const** = + nullptr) { return Impl::TeamThreadRangeBoundariesStruct<iType, Member>(member, 0, count); } template <typename iType1, typename iType2, typename Member> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Member> + std::common_type_t<iType1, iType2>, Member> TeamVectorRange( Member const& member, iType1 begin, iType2 end, - typename std::enable_if< - Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { + std::enable_if_t<Impl::is_thread_team_member<Member>::value> const** = + nullptr) { return Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Member>(member, begin, - end); + std::common_type_t<iType1, iType2>, Member>(member, begin, end); } template <typename iType, typename Member> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct<iType, Member> ThreadVectorRange( Member const& member, iType count, - typename std::enable_if< - Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { + std::enable_if_t<Impl::is_thread_team_member<Member>::value> const** = + nullptr) { return Impl::ThreadVectorRangeBoundariesStruct<iType, Member>(member, count); } template <typename iType1, typename iType2, typename Member> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - typename std::common_type<iType1, iType2>::type, Member> + std::common_type_t<iType1, iType2>, Member> ThreadVectorRange( Member const& member, iType1 arg_begin, iType2 arg_end, - typename std::enable_if< - Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { - using iType = typename std::common_type<iType1, iType2>::type; + std::enable_if_t<Impl::is_thread_team_member<Member>::value> const** = + nullptr) { + using iType = std::common_type_t<iType1, iType2>; return Impl::ThreadVectorRangeBoundariesStruct<iType, Member>( member, iType(arg_begin), iType(arg_end)); } @@ -759,8 +757,8 @@ template <typename iType, class Closure, class Member> KOKKOS_INLINE_FUNCTION void parallel_for( Impl::TeamThreadRangeBoundariesStruct<iType, Member> const& loop_boundaries, Closure const& closure, - typename std::enable_if<Impl::is_host_thread_team_member<Member>::value>:: - type const** = nullptr) { + std::enable_if_t<Impl::is_host_thread_team_member<Member>::value> const** = + nullptr) { for (iType i = loop_boundaries.start; i < loop_boundaries.end; i += loop_boundaries.increment) { closure(i); @@ -772,8 +770,8 @@ KOKKOS_INLINE_FUNCTION void parallel_for( Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const& loop_boundaries, Closure const& closure, - typename std::enable_if<Impl::is_host_thread_team_member<Member>::value>:: - type const** = nullptr) { + std::enable_if_t<Impl::is_host_thread_team_member<Member>::value> const** = + nullptr) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif @@ -786,12 +784,12 @@ KOKKOS_INLINE_FUNCTION void parallel_for( //---------------------------------------------------------------------------- template <typename iType, class Closure, class Reducer, class Member> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Kokkos::is_reducer<Reducer>::value && - Impl::is_host_thread_team_member<Member>::value>::type -parallel_reduce( - Impl::TeamThreadRangeBoundariesStruct<iType, Member> const& loop_boundaries, - Closure const& closure, Reducer const& reducer) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Kokkos::is_reducer<Reducer>::value && + Impl::is_host_thread_team_member<Member>::value> + parallel_reduce(Impl::TeamThreadRangeBoundariesStruct<iType, Member> const& + loop_boundaries, + Closure const& closure, Reducer const& reducer) { typename Reducer::value_type value; reducer.init(value); @@ -804,12 +802,12 @@ parallel_reduce( } template <typename iType, typename Closure, typename ValueType, typename Member> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - !Kokkos::is_reducer<ValueType>::value && - Impl::is_host_thread_team_member<Member>::value>::type -parallel_reduce( - Impl::TeamThreadRangeBoundariesStruct<iType, Member> const& loop_boundaries, - Closure const& closure, ValueType& result) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<!Kokkos::is_reducer<ValueType>::value && + Impl::is_host_thread_team_member<Member>::value> + parallel_reduce(Impl::TeamThreadRangeBoundariesStruct<iType, Member> const& + loop_boundaries, + Closure const& closure, ValueType& result) { ValueType val; Sum<ValueType> reducer(val); reducer.init(val); @@ -858,12 +856,12 @@ Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > * performed and put into result. */ template <typename iType, class Lambda, typename ValueType, typename Member> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - !Kokkos::is_reducer<ValueType>::value && - Impl::is_host_thread_team_member<Member>::value>::type -parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& - loop_boundaries, - const Lambda& lambda, ValueType& result) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<!Kokkos::is_reducer<ValueType>::value && + Impl::is_host_thread_team_member<Member>::value> + parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Member>& loop_boundaries, + const Lambda& lambda, ValueType& result) { result = ValueType(); for (iType i = loop_boundaries.start; i < loop_boundaries.end; i += loop_boundaries.increment) { @@ -872,12 +870,12 @@ parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& } template <typename iType, class Lambda, typename ReducerType, typename Member> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Kokkos::is_reducer<ReducerType>::value && - Impl::is_host_thread_team_member<Member>::value>::type -parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& - loop_boundaries, - const Lambda& lambda, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Kokkos::is_reducer<ReducerType>::value && + Impl::is_host_thread_team_member<Member>::value> + parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Member>& loop_boundaries, + const Lambda& lambda, const ReducerType& reducer) { reducer.init(reducer.reference()); for (iType i = loop_boundaries.start; i < loop_boundaries.end; i += loop_boundaries.increment) { @@ -888,11 +886,11 @@ parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& //---------------------------------------------------------------------------- template <typename iType, class Closure, class Member> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Impl::is_host_thread_team_member<Member>::value>::type -parallel_scan( - Impl::TeamThreadRangeBoundariesStruct<iType, Member> const& loop_boundaries, - Closure const& closure) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Impl::is_host_thread_team_member<Member>::value> + parallel_scan(Impl::TeamThreadRangeBoundariesStruct<iType, Member> const& + loop_boundaries, + Closure const& closure) { // Extract ValueType from the closure using value_type = typename Kokkos::Impl::FunctorAnalysis< @@ -916,11 +914,11 @@ parallel_scan( } template <typename iType, class ClosureType, class Member> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Impl::is_host_thread_team_member<Member>::value>::type -parallel_scan(Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const& - loop_boundaries, - ClosureType const& closure) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Impl::is_host_thread_team_member<Member>::value> + parallel_scan(Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const& + loop_boundaries, + ClosureType const& closure) { using value_type = typename Kokkos::Impl::FunctorAnalysis< Impl::FunctorPatternInterface::SCAN, void, ClosureType>::value_type; @@ -936,12 +934,12 @@ parallel_scan(Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const& } template <typename iType, class Lambda, typename ReducerType, typename Member> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Kokkos::is_reducer<ReducerType>::value && - Impl::is_host_thread_team_member<Member>::value>::type -parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& - loop_boundaries, - const Lambda& lambda, const ReducerType& reducer) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Kokkos::is_reducer<ReducerType>::value && + Impl::is_host_thread_team_member<Member>::value> + parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& + loop_boundaries, + const Lambda& lambda, const ReducerType& reducer) { typename ReducerType::value_type scan_val; reducer.init(scan_val); @@ -959,48 +957,49 @@ parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& template <class Member> KOKKOS_INLINE_FUNCTION Impl::ThreadSingleStruct<Member> PerTeam( Member const& member, - typename std::enable_if< - Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { + std::enable_if_t<Impl::is_thread_team_member<Member>::value> const** = + nullptr) { return Impl::ThreadSingleStruct<Member>(member); } template <class Member> KOKKOS_INLINE_FUNCTION Impl::VectorSingleStruct<Member> PerThread( Member const& member, - typename std::enable_if< - Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { + std::enable_if_t<Impl::is_thread_team_member<Member>::value> const** = + nullptr) { return Impl::VectorSingleStruct<Member>(member); } template <class Member, class FunctorType> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Impl::is_host_thread_team_member<Member>::value>::type -single(const Impl::ThreadSingleStruct<Member>& single, - const FunctorType& functor) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Impl::is_host_thread_team_member<Member>::value> + single(const Impl::ThreadSingleStruct<Member>& single, + const FunctorType& functor) { // 'single' does not perform a barrier. if (single.team_member.team_rank() == 0) functor(); } template <class Member, class FunctorType, typename ValueType> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Impl::is_host_thread_team_member<Member>::value>::type -single(const Impl::ThreadSingleStruct<Member>& single, - const FunctorType& functor, ValueType& val) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Impl::is_host_thread_team_member<Member>::value> + single(const Impl::ThreadSingleStruct<Member>& single, + const FunctorType& functor, ValueType& val) { single.team_member.team_broadcast(functor, val, 0); } template <class Member, class FunctorType> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Impl::is_host_thread_team_member<Member>::value>::type -single(const Impl::VectorSingleStruct<Member>&, const FunctorType& functor) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Impl::is_host_thread_team_member<Member>::value> + single(const Impl::VectorSingleStruct<Member>&, + const FunctorType& functor) { functor(); } template <class Member, class FunctorType, typename ValueType> -KOKKOS_INLINE_FUNCTION typename std::enable_if< - Impl::is_host_thread_team_member<Member>::value>::type -single(const Impl::VectorSingleStruct<Member>&, const FunctorType& functor, - ValueType& val) { +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Impl::is_host_thread_team_member<Member>::value> + single(const Impl::VectorSingleStruct<Member>&, const FunctorType& functor, + ValueType& val) { functor(val); } diff --git a/packages/kokkos/core/src/impl/Kokkos_InitializationSettings.hpp b/packages/kokkos/core/src/impl/Kokkos_InitializationSettings.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ceb35f0247f03ef571404d43d71f9ab13d54930d --- /dev/null +++ b/packages/kokkos/core/src/impl/Kokkos_InitializationSettings.hpp @@ -0,0 +1,195 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_INITIALIZATION_SETTINGS_HPP +#define KOKKOS_INITIALIZATION_SETTINGS_HPP + +#include <Kokkos_Macros.hpp> + +#include <climits> +#include <string> + +namespace Kokkos { + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +struct InitArguments { + int num_threads; + int num_numa; + int device_id; + int ndevices; + int skip_device; + bool disable_warnings; + bool tune_internals; + bool tool_help = false; + std::string tool_lib = {}; + std::string tool_args = {}; + + KOKKOS_DEPRECATED_WITH_COMMENT("Use InitializationSettings instead!") + InitArguments(int nt = -1, int nn = -1, int dv = -1, bool dw = false, + bool ti = false) + : num_threads{nt}, + num_numa{nn}, + device_id{dv}, + ndevices{-1}, + skip_device{9999}, + disable_warnings{dw}, + tune_internals{ti} {} +}; +#endif + +namespace Impl { +// FIXME_CXX17 replace with std::optional +template <class> +struct InitializationSettingsHelper; +template <> +struct InitializationSettingsHelper<int> { + using value_type = int; + using storage_type = int; + + static constexpr storage_type unspecified = INT_MIN; +}; +template <> +struct InitializationSettingsHelper<bool> { + using value_type = bool; + using storage_type = char; + + static constexpr storage_type unspecified = CHAR_MAX; + static_assert(static_cast<storage_type>(true) != unspecified && + static_cast<storage_type>(false) != unspecified, + ""); +}; +template <> +struct InitializationSettingsHelper<std::string> { + using value_type = std::string; + using storage_type = std::string; + + static storage_type const unspecified; +}; +} // namespace Impl + +class InitializationSettings { +#define KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER(NAME) \ + impl_do_not_use_i_really_mean_it_##NAME##_ + +#define KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER_TYPE(NAME) impl_##NAME##_type + +#define KOKKOS_IMPL_DECLARE(TYPE, NAME) \ + private: \ + using KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER_TYPE(NAME) = TYPE; \ + Impl::InitializationSettingsHelper<TYPE>::storage_type \ + KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER(NAME) = \ + Impl::InitializationSettingsHelper<TYPE>::unspecified; \ + \ + public: \ + InitializationSettings& set_##NAME( \ + Impl::InitializationSettingsHelper<TYPE>::value_type NAME) { \ + KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER(NAME) = NAME; \ + return *this; \ + } \ + bool has_##NAME() const noexcept { \ + return KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER(NAME) != \ + Impl::InitializationSettingsHelper< \ + KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER_TYPE(NAME)>::unspecified; \ + } \ + KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER_TYPE(NAME) get_##NAME() const noexcept { \ + return KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER(NAME); \ + } \ + static_assert(true, "no-op to require trailing semicolon") + + public: + KOKKOS_IMPL_DECLARE(int, num_threads); + KOKKOS_IMPL_DECLARE(int, device_id); + KOKKOS_IMPL_DECLARE(std::string, map_device_id_by); + KOKKOS_IMPL_DECLARE(int, num_devices); // deprecated + KOKKOS_IMPL_DECLARE(int, skip_device); // deprecated + KOKKOS_IMPL_DECLARE(bool, disable_warnings); + KOKKOS_IMPL_DECLARE(bool, print_configuration); + KOKKOS_IMPL_DECLARE(bool, tune_internals); + KOKKOS_IMPL_DECLARE(bool, tools_help); + KOKKOS_IMPL_DECLARE(std::string, tools_libs); + KOKKOS_IMPL_DECLARE(std::string, tools_args); + +#undef KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER_TYPE +#undef KOKKOS_IMPL_INIT_ARGS_DATA_MEMBER +#undef KOKKOS_IMPL_DECLARE + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 + public: + InitializationSettings() = default; + + InitializationSettings(InitArguments const& old) { + if (old.num_threads != -1) { + set_num_threads(old.num_threads); + } + if (old.device_id != -1) { + set_device_id(old.device_id); + } + if (old.ndevices != -1) { + set_num_devices(old.ndevices); + } + if (old.skip_device != 9999) { + set_skip_device(old.skip_device); + } + if (old.disable_warnings) { + set_disable_warnings(true); + } + if (old.tune_internals) { + set_tune_internals(true); + } + if (old.tool_help) { + set_tools_help(true); + } + if (!old.tool_lib.empty()) { + set_tools_libs(old.tool_lib); + } + if (!old.tool_args.empty()) { + set_tools_args(old.tool_args); + } + } +#endif +}; + +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_LIFO.hpp b/packages/kokkos/core/src/impl/Kokkos_LIFO.hpp index 683c5c9b18ba5b8c802eebf5cdcc62cac42bf616..286c56743af8f51cd5bca27b333db59ee8e6d911 100644 --- a/packages/kokkos/core/src/impl/Kokkos_LIFO.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_LIFO.hpp @@ -77,7 +77,7 @@ struct LockBasedLIFOCommon { static constexpr uintptr_t LockTag = ~uintptr_t(0); static constexpr uintptr_t EndTag = ~uintptr_t(1); - OwningRawPtr<node_type> m_head = (node_type*)EndTag; + OwningRawPtr<node_type> m_head = reinterpret_cast<node_type*>(EndTag); KOKKOS_INLINE_FUNCTION bool _try_push_node(node_type& node) { @@ -89,7 +89,7 @@ struct LockBasedLIFOCommon { auto* old_head = m_head; // retry until someone locks the queue or we successfully compare exchange - while (old_head != (node_type*)LockTag) { + while (old_head != reinterpret_cast<node_type*>(LockTag)) { // TODO @tasking @memory_order DSH this should have a memory order and not // a memory fence @@ -132,7 +132,8 @@ struct LockBasedLIFOCommon { bool _is_empty() const noexcept { // TODO @tasking @memory_order DSH make this an atomic load with memory // order - return (volatile node_type*)this->m_head == (node_type*)EndTag; + return (volatile node_type*)this->m_head == + reinterpret_cast<node_type*>(EndTag); } }; diff --git a/packages/kokkos/core/src/impl/Kokkos_MemoryPool.cpp b/packages/kokkos/core/src/impl/Kokkos_MemoryPool.cpp index 889d821bb1ced124c06f7c41f3604baa9d3bf782..f82e88fad96bf04417b2481d616ca9096c5cc18a 100644 --- a/packages/kokkos/core/src/impl/Kokkos_MemoryPool.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_MemoryPool.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <impl/Kokkos_Error.hpp> #include <ostream> diff --git a/packages/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp b/packages/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp index 2218405766cacb1215604ff7e7cb749b509be56e..7dede48a14da83be482a2c87ebd09b97d0e99333 100644 --- a/packages/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp @@ -89,7 +89,7 @@ class MemoryPoolAllocator { using value_type = T; using pointer = T*; using size_type = typename MemoryPool::memory_space::size_type; - using difference_type = typename std::make_signed<size_type>::type; + using difference_type = std::make_signed_t<size_type>; template <class U> struct rebind { diff --git a/packages/kokkos/core/src/impl/Kokkos_MemorySpace.cpp b/packages/kokkos/core/src/impl/Kokkos_MemorySpace.cpp index ec2e573c0450c6d81db64334db65102bd59f2ae1..a80ea0a1dafafa8756bf7e223a366f1a634f39b3 100644 --- a/packages/kokkos/core/src/impl/Kokkos_MemorySpace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_MemorySpace.cpp @@ -47,6 +47,10 @@ * implementations thereof. */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <impl/Kokkos_MemorySpace.hpp> #include <iostream> diff --git a/packages/kokkos/core/src/impl/Kokkos_MemorySpace.hpp b/packages/kokkos/core/src/impl/Kokkos_MemorySpace.hpp index 5b3764686f517066194f142fbca68e270cdd1b8f..dee11bbdb42b7e8ea34be6424592146d697926c4 100644 --- a/packages/kokkos/core/src/impl/Kokkos_MemorySpace.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_MemorySpace.hpp @@ -78,6 +78,20 @@ SharedAllocationHeader *checked_allocation_with_header(MemorySpace const &space, return nullptr; // unreachable } +template <class ExecutionSpace, class MemorySpace> +SharedAllocationHeader *checked_allocation_with_header( + ExecutionSpace const &exec_space, MemorySpace const &space, + std::string const &label, size_t alloc_size) { + try { + return reinterpret_cast<SharedAllocationHeader *>(space.allocate( + exec_space, label.c_str(), alloc_size + sizeof(SharedAllocationHeader), + alloc_size)); + } catch (Kokkos::Experimental::RawMemoryAllocationFailure const &failure) { + safe_throw_allocation_with_header_failure(space.name(), label, failure); + } + return nullptr; // unreachable +} + } // end namespace Impl } // end namespace Kokkos diff --git a/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp index f6870899632a7049d19082364aac26a439754e64..1df5d13b957c8e5d29e0ad1bedb88054bac604a4 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp @@ -58,8 +58,7 @@ void memory_fence() { #elif defined(__HIP_DEVICE_COMPILE__) __threadfence(); #elif defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__) - sycl::atomic_fence(sycl::ext::oneapi::memory_order::acq_rel, - sycl::ext::oneapi::memory_scope::device); + sycl::atomic_fence(sycl::memory_order::acq_rel, sycl::memory_scope::device); #elif defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) asm volatile("mfence" ::: "memory"); #elif defined(KOKKOS_ENABLE_GNU_ATOMICS) || \ diff --git a/packages/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp b/packages/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp index 1c61b73f027aaefa4993aaae7beee3ca9af05110..209ba1920039ff434a2cfd4390c322dd30647bf3 100644 --- a/packages/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp @@ -64,7 +64,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -102,10 +101,10 @@ struct MultipleTaskQueueTeamEntry { using ready_queue_type = typename TaskQueueTraits::template ready_queue_type<task_base_type>; using task_queue_traits = TaskQueueTraits; - using task_scheduling_info_type = typename std::conditional< + using task_scheduling_info_type = std::conditional_t< TaskQueueTraits::ready_queue_insertion_may_fail, FailedQueueInsertionLinkedListSchedulingInfo<TaskQueueTraits>, - EmptyTaskSchedulingInfo>::type; + EmptyTaskSchedulingInfo>; private: // Number of allowed priorities @@ -123,10 +122,9 @@ struct MultipleTaskQueueTeamEntry { template <class _always_void = void> KOKKOS_INLINE_FUNCTION OptionalRef<task_base_type> _pop_failed_insertion( int priority, TaskType type, - typename std::enable_if< - task_queue_traits::ready_queue_insertion_may_fail && - std::is_void<_always_void>::value, - void*>::type = nullptr) { + std::enable_if_t<task_queue_traits::ready_queue_insertion_may_fail && + std::is_void<_always_void>::value, + void*> = nullptr) { auto* rv_ptr = m_failed_heads[priority][(int)type]; if (rv_ptr) { m_failed_heads[priority][(int)type] = @@ -142,10 +140,9 @@ struct MultipleTaskQueueTeamEntry { template <class _always_void = void> KOKKOS_INLINE_FUNCTION OptionalRef<task_base_type> _pop_failed_insertion( int /*priority*/, TaskType /*type*/, - typename std::enable_if< - !task_queue_traits::ready_queue_insertion_may_fail && - std::is_void<_always_void>::value, - void*>::type = nullptr) { + std::enable_if_t<!task_queue_traits::ready_queue_insertion_may_fail && + std::is_void<_always_void>::value, + void*> = nullptr) { return OptionalRef<task_base_type>{nullptr}; } @@ -201,10 +198,9 @@ struct MultipleTaskQueueTeamEntry { template <class _always_void = void> KOKKOS_INLINE_FUNCTION void do_handle_failed_insertion( runnable_task_base_type&& task, - typename std::enable_if< - task_queue_traits::ready_queue_insertion_may_fail && - std::is_void<_always_void>::value, - void*>::type = nullptr) { + std::enable_if_t<task_queue_traits::ready_queue_insertion_may_fail && + std::is_void<_always_void>::value, + void*> = nullptr) { // failed insertions, if they happen, must be from the only thread that // is allowed to push to m_ready_queues, so this linked-list insertion is // not concurrent @@ -217,21 +213,20 @@ struct MultipleTaskQueueTeamEntry { template <class _always_void = void> KOKKOS_INLINE_FUNCTION void do_handle_failed_insertion( runnable_task_base_type&& /*task*/, - typename std::enable_if< - !task_queue_traits::ready_queue_insertion_may_fail && - std::is_void<_always_void>::value, - void*>::type = nullptr) { + std::enable_if_t<!task_queue_traits::ready_queue_insertion_may_fail && + std::is_void<_always_void>::value, + void*> = nullptr) { Kokkos::abort("should be unreachable!"); } template <class _always_void = void> KOKKOS_INLINE_FUNCTION void flush_failed_insertions( int priority, int task_type, - typename std::enable_if< + std::enable_if_t< task_queue_traits::ready_queue_insertion_may_fail && std::is_void<_always_void>::value, // just to make this dependent // on template parameter - int>::type = 0) { + int> = 0) { // TODO @tasking @minor DSH this somethimes gets some things out of LIFO // order, which may be undesirable (but not a bug) @@ -256,11 +251,11 @@ struct MultipleTaskQueueTeamEntry { template <class _always_void = void> KOKKOS_INLINE_FUNCTION void flush_failed_insertions( int, int, - typename std::enable_if< + std::enable_if_t< !task_queue_traits::ready_queue_insertion_may_fail && std::is_void<_always_void>::value, // just to make this dependent // on template parameter - int>::type = 0) {} + int> = 0) {} KOKKOS_INLINE_FUNCTION void flush_all_failed_insertions() { @@ -353,10 +348,10 @@ class MultipleTaskQueue final ~SchedulerInfo() = default; }; - using task_scheduling_info_type = typename std::conditional< + using task_scheduling_info_type = std::conditional_t< TaskQueueTraits::ready_queue_insertion_may_fail, FailedQueueInsertionLinkedListSchedulingInfo<TaskQueueTraits>, - EmptyTaskSchedulingInfo>::type; + EmptyTaskSchedulingInfo>; using team_scheduler_info_type = SchedulerInfo; using runnable_task_base_type = RunnableTaskBase<TaskQueueTraits>; diff --git a/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp b/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp index 71067b8e1b4ec0d61fd58cc2e2a19d3afaa0f8f4..5ff094032faa37b906b835d1ff7b8bd2ec46de2a 100644 --- a/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp @@ -1,3 +1,8 @@ + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_NumericTraits.hpp> // NOTE These out-of class definitions are only required with C++14. Since diff --git a/packages/kokkos/core/src/impl/Kokkos_OptionalRef.hpp b/packages/kokkos/core/src/impl/Kokkos_OptionalRef.hpp index 12f6c9f5fdb42e8383f3c9b174ea17c28ff04fe7..1a3cbaba2859fdbd6b946a1e85422b5de7489ff3 100644 --- a/packages/kokkos/core/src/impl/Kokkos_OptionalRef.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_OptionalRef.hpp @@ -120,18 +120,14 @@ struct OptionalRef { //---------------------------------------- KOKKOS_INLINE_FUNCTION - OptionalRef<typename std::add_volatile<T>::type> - as_volatile() volatile noexcept { - return OptionalRef<typename std::add_volatile<T>::type>(*(*this)); + OptionalRef<std::add_volatile_t<T>> as_volatile() volatile noexcept { + return OptionalRef<std::add_volatile_t<T>>(*(*this)); } KOKKOS_INLINE_FUNCTION - OptionalRef< - typename std::add_volatile<typename std::add_const<T>::type>::type> - as_volatile() const volatile noexcept { - return OptionalRef< - typename std::add_volatile<typename std::add_const<T>::type>::type>( - *(*this)); + OptionalRef<std::add_volatile_t<std::add_const_t<T>>> as_volatile() const + volatile noexcept { + return OptionalRef<std::add_volatile_t<std::add_const_t<T>>>(*(*this)); } //---------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_ParseCommandLineArgumentsAndEnvironmentVariables.hpp b/packages/kokkos/core/src/impl/Kokkos_ParseCommandLineArgumentsAndEnvironmentVariables.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4fdb85b6a0ecc6dff59268637ebf8be7352a5500 --- /dev/null +++ b/packages/kokkos/core/src/impl/Kokkos_ParseCommandLineArgumentsAndEnvironmentVariables.hpp @@ -0,0 +1,58 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_PARSE_COMMAND_LINE_ARGUMENTS_AND_ENVIRONMENT_VARIABLES_HPP +#define KOKKOS_PARSE_COMMAND_LINE_ARGUMENTS_AND_ENVIRONMENT_VARIABLES_HPP + +// These declaration are only provided for testing purposes +namespace Kokkos { +class InitializationSettings; +namespace Impl { +void parse_command_line_arguments(int& argc, char* argv[], + InitializationSettings& settings); +void parse_environment_variables(InitializationSettings& settings); +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp b/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp index 9c8118e2bfb326de725555d441667e0cce87d670..480b1a392bf019b4c37c452a09801fe8ef8f3458 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #ifndef KOKKOS_TOOLS_INDEPENDENT_BUILD #include <Kokkos_Macros.hpp> #include <Kokkos_Tuners.hpp> @@ -67,6 +71,34 @@ #include <vector> #include <sstream> #include <iostream> + +namespace { +void warn_cmd_line_arg_ignored_when_kokkos_tools_disabled(char const* arg) { +#ifndef KOKKOS_TOOLS_ENABLE_LIBDL + if (Kokkos::show_warnings()) { + std::cerr << "Warning: command line argument '" << arg + << "' ignored because kokkos-tools is disabled." + << " Raised by Kokkos::initialize()." << std::endl; + } +#else + (void)arg; +#endif +} +void warn_env_var_ignored_when_kokkos_tools_disabled(char const* env_var, + char const* val) { +#ifndef KOKKOS_TOOLS_ENABLE_LIBDL + if (Kokkos::show_warnings()) { + std::cerr << "Warning: environment variable '" << env_var << "=" << val + << "' ignored because kokkos-tools is disabled." + << " Raised by Kokkos::initialize()." << std::endl; + } +#else + (void)env_var; + (void)val; +#endif +} +} // namespace + namespace Kokkos { namespace Tools { @@ -77,34 +109,29 @@ const std::string InitArguments::unset_string_option = { InitArguments tool_arguments; namespace Impl { -void parse_command_line_arguments(int& narg, char* arg[], +void parse_command_line_arguments(int& argc, char* argv[], InitArguments& arguments) { int iarg = 0; using Kokkos::Impl::check_arg; - using Kokkos::Impl::check_int_arg; - using Kokkos::Impl::check_str_arg; - - auto& lib = arguments.lib; - auto& args = arguments.args; - auto& help = arguments.help; - auto& tune_internals = arguments.tune_internals; - while (iarg < narg) { - if (check_arg(arg[iarg], "--kokkos-tune-internals")) { - tune_internals = InitArguments::PossiblyUnsetOption::on; - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; - } - narg--; - } else if (check_str_arg(arg[iarg], "--kokkos-tools-library", lib)) { - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; - } - narg--; - } else if (check_str_arg(arg[iarg], "--kokkos-tools-args", args)) { - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; + using Kokkos::Impl::check_arg_str; + + auto& libs = arguments.lib; + auto& args = arguments.args; + auto& help = arguments.help; + while (iarg < argc) { + bool remove_flag = false; + if (check_arg_str(argv[iarg], "--kokkos-tools-libs", libs) || + check_arg_str(argv[iarg], "--kokkos-tools-library", libs)) { + if (check_arg(argv[iarg], "--kokkos-tools-library")) { + using Kokkos::Impl::warn_deprecated_command_line_argument; + warn_deprecated_command_line_argument("--kokkos-tools-library", + "--kokkos-tools-libs"); } - narg--; + warn_cmd_line_arg_ignored_when_kokkos_tools_disabled(argv[iarg]); + remove_flag = true; + } else if (check_arg_str(argv[iarg], "--kokkos-tools-args", args)) { + warn_cmd_line_arg_ignored_when_kokkos_tools_disabled(argv[iarg]); + remove_flag = true; // strip any leading and/or trailing quotes if they were retained in the // string because this will very likely cause parsing issues for tools. // If the quotes are retained (via bypassing the shell): @@ -118,56 +145,71 @@ void parse_command_line_arguments(int& narg, char* arg[], if (args.back() == '"') args = args.substr(0, args.length() - 1); } // add the name of the executable to the beginning - if (narg > 0) args = std::string(arg[0]) + " " + args; - } else if (check_arg(arg[iarg], "--kokkos-tools-help")) { + if (argc > 0) args = std::string(argv[0]) + " " + args; + } else if (check_arg(argv[iarg], "--kokkos-tools-help")) { help = InitArguments::PossiblyUnsetOption::on; - for (int k = iarg; k < narg - 1; k++) { - arg[k] = arg[k + 1]; + warn_cmd_line_arg_ignored_when_kokkos_tools_disabled(argv[iarg]); + remove_flag = true; + } else if (std::regex_match(argv[iarg], std::regex("-?-kokkos-tool.*", + std::regex::egrep))) { + std::cerr << "Warning: command line argument '" << argv[iarg] + << "' is not recognized." + << " Raised by Kokkos::initialize()." << std::endl; + } + if (remove_flag) { + // Shift the remainder of the argv list by one. Note that argv has + // (argc + 1) arguments, the last one always being nullptr. The following + // loop moves the trailing nullptr element as well + for (int k = iarg; k < argc; ++k) { + argv[k] = argv[k + 1]; } - narg--; + argc--; } else { iarg++; } - if ((args == Kokkos::Tools::InitArguments::unset_string_option) && narg > 0) - args = arg[0]; + if ((args == Kokkos::Tools::InitArguments::unset_string_option) && argc > 0) + args = argv[0]; } } Kokkos::Tools::Impl::InitializationStatus parse_environment_variables( InitArguments& arguments) { - auto& tool_lib = arguments.lib; - auto& tune_internals = arguments.tune_internals; - auto env_tool_lib = std::getenv("KOKKOS_PROFILE_LIBRARY"); - if (env_tool_lib != nullptr) { - if ((tool_lib != Kokkos::Tools::InitArguments::unset_string_option) && - std::string(env_tool_lib) != tool_lib) - return {Kokkos::Tools::Impl::InitializationStatus::InitializationResult:: - environment_argument_mismatch, - "Error: expecting a match between --kokkos-tools-library and " - "KOKKOS_PROFILE_LIBRARY if both are set. Raised by " - "Kokkos::initialize(int narg, char* argc[])."}; - else - tool_lib = env_tool_lib; + auto& libs = arguments.lib; + auto& args = arguments.args; + auto env_profile_library = std::getenv("KOKKOS_PROFILE_LIBRARY"); + if (env_profile_library != nullptr) { + using Kokkos::Impl::warn_deprecated_environment_variable; + warn_deprecated_environment_variable("KOKKOS_PROFILE_LIBRARY", + "KOKKOS_TOOLS_LIBS"); + warn_env_var_ignored_when_kokkos_tools_disabled("KOKKOS_PROFILE_LIBRARY", + env_profile_library); + libs = env_profile_library; } - char* env_tuneinternals_str = std::getenv("KOKKOS_TUNE_INTERNALS"); - if (env_tuneinternals_str != nullptr) { - std::string env_str(env_tuneinternals_str); // deep-copies string - for (char& c : env_str) { - c = toupper(c); + auto env_tools_libs = std::getenv("KOKKOS_TOOLS_LIBS"); + if (env_tools_libs != nullptr) { + warn_env_var_ignored_when_kokkos_tools_disabled("KOKKOS_TOOLS_LIBS", + env_tools_libs); + if (env_profile_library != nullptr && libs != env_tools_libs) { + std::stringstream ss; + ss << "Error: environment variables 'KOKKOS_PROFILE_LIBRARY=" + << env_profile_library << "' and 'KOKKOS_TOOLS_LIBS=" << env_tools_libs + << "' are both set and do not match." + << " Raised by Kokkos::initialize().\n"; + Kokkos::abort(ss.str().c_str()); } - if ((env_str == "TRUE") || (env_str == "ON") || (env_str == "1")) - tune_internals = InitArguments::PossiblyUnsetOption::on; - else if (tune_internals) - return {Kokkos::Tools::Impl::InitializationStatus::InitializationResult:: - environment_argument_mismatch, - "Error: expecting a match between --kokkos-tune-internals and " - "KOKKOS_TUNE_INTERNALS if both are set. Raised by " - "Kokkos::initialize(int narg, char* argc[])."}; + libs = env_tools_libs; + } + auto env_tools_args = std::getenv("KOKKOS_TOOLS_ARGS"); + if (env_tools_args != nullptr) { + warn_env_var_ignored_when_kokkos_tools_disabled("KOKKOS_TOOLS_ARGS", + env_tools_args); + args = env_tools_args; } return { Kokkos::Tools::Impl::InitializationStatus::InitializationResult::success}; } InitializationStatus initialize_tools_subsystem( const Kokkos::Tools::InitArguments& args) { +#ifdef KOKKOS_TOOLS_ENABLE_LIBDL Kokkos::Profiling::initialize(args.lib); auto final_args = (args.args != Kokkos::Tools::InitArguments::unset_string_option) @@ -181,6 +223,9 @@ InitializationStatus initialize_tools_subsystem( return {InitializationStatus::InitializationResult::help_request}; } Kokkos::Tools::parseArgs(final_args); +#else + (void)args; +#endif return {InitializationStatus::InitializationResult::success}; } @@ -190,8 +235,8 @@ void initialize(const InitArguments& arguments) { } void initialize(int argc, char* argv[]) { InitArguments arguments; - Impl::parse_command_line_arguments(argc, argv, arguments); Impl::parse_environment_variables(arguments); + Impl::parse_command_line_arguments(argc, argv, arguments); initialize(arguments); } diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp b/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp index 4a8527f5e3d00b7311ebc7c7340d359d7f55a0cd..cb17a0cd836167297621a751dc83991f7f4dc4fb 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp @@ -56,6 +56,7 @@ namespace Kokkos { // forward declaration +bool show_warnings() noexcept; bool tune_internals() noexcept; namespace Tools { @@ -66,10 +67,9 @@ struct InitArguments { // for this long-term static const std::string unset_string_option; enum PossiblyUnsetOption { unset, off, on }; - PossiblyUnsetOption tune_internals = unset; - PossiblyUnsetOption help = unset; - std::string lib = unset_string_option; - std::string args = unset_string_option; + PossiblyUnsetOption help = unset; + std::string lib = unset_string_option; + std::string args = unset_string_option; }; namespace Impl { diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index d5266820560eb22b5f71309aff7acaabc9de9ee6..428a3cb17b7a941e843f4f90c01124a657282dd0 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -74,6 +74,7 @@ enum struct DeviceType { HPX, Threads, SYCL, + OpenACC, Unknown }; @@ -98,6 +99,7 @@ inline DeviceType devicetype_from_uint32t(const uint32_t in) { case 5: return DeviceType::HPX; case 6: return DeviceType::Threads; case 7: return DeviceType::SYCL; + case 8: return DeviceType::OpenACC; default: return DeviceType::Unknown; // TODO: error out? } } diff --git a/packages/kokkos/core/src/impl/Kokkos_QuadPrecisionMath.hpp b/packages/kokkos/core/src/impl/Kokkos_QuadPrecisionMath.hpp index b67cede45bfddd657448a054aca3254d627267ce..c7936e950d7ebaec8c9987a65577b85f5952abf0 100644 --- a/packages/kokkos/core/src/impl/Kokkos_QuadPrecisionMath.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_QuadPrecisionMath.hpp @@ -50,6 +50,8 @@ #if defined(KOKKOS_ENABLE_LIBQUADMATH) #include <Kokkos_NumericTraits.hpp> +#include <Kokkos_MathematicalConstants.hpp> +#include <Kokkos_MathematicalFunctions.hpp> #include <quadmath.h> @@ -88,7 +90,14 @@ KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(finite_max, __float128, __float128, FLT KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(epsilon, __float128, __float128, FLT128_EPSILON) KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(round_error, __float128, __float128, static_cast<__float128>(0.5)) KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(norm_min, __float128, __float128, FLT128_MIN) +KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(denorm_min, __float128, __float128, FLT128_DENORM_MIN) +KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(reciprocal_overflow_threshold, __float128, __float128, FLT128_MIN) +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU >= 710) +KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(quiet_NaN, __float128, __float128, __builtin_nanq("")) +KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(signaling_NaN, __float128, __float128, __builtin_nansq("")) +#endif +// Numeric characteristics traits KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(digits, __float128, int, FLT128_MANT_DIG) KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(digits10, __float128, int, FLT128_DIG) KOKKOS_IMPL_SPECIALIZE_NUMERIC_TRAIT(max_digits10, __float128, int, 36) @@ -124,21 +133,21 @@ struct reduction_identity<__float128> { //<editor-fold desc="Common mathematical functions __float128 overloads"> namespace Kokkos { -namespace Experimental { // clang-format off +namespace Impl { +template <> struct promote<__float128> { using type = __float128; }; +} // Basic operations +inline __float128 abs(__float128 x) { return ::fabsq(x); } inline __float128 fabs(__float128 x) { return ::fabsq(x); } inline __float128 fmod(__float128 x, __float128 y) { return ::fmodq(x, y); } inline __float128 remainder(__float128 x, __float128 y) { return ::remainderq(x, y); } -inline __float128 fmin(__float128 x, __float128 y) { return ::fminq(x, y); } +// remquo +// fma inline __float128 fmax(__float128 x, __float128 y) { return ::fmaxq(x, y); } +inline __float128 fmin(__float128 x, __float128 y) { return ::fminq(x, y); } inline __float128 fdim(__float128 x, __float128 y) { return ::fdimq(x, y); } inline __float128 nanq(char const* arg) { return ::nanq(arg); } -// Power functions -inline __float128 pow(__float128 x, __float128 y) { return ::powq(x, y); } -inline __float128 sqrt(__float128 x) { return ::sqrtq(x); } -inline __float128 cbrt(__float128 x) { return ::cbrtq(x); } -inline __float128 hypot(__float128 x, __float128 y) { return ::hypotq(x, y); } // Exponential functions inline __float128 exp(__float128 x) { return ::expq(x); } #if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU >= 910) @@ -149,6 +158,11 @@ inline __float128 log(__float128 x) { return ::logq(x); } inline __float128 log10(__float128 x) { return ::log10q(x); } inline __float128 log2(__float128 x) { return ::log2q(x); } inline __float128 log1p(__float128 x) { return ::log1pq(x); } +// Power functions +inline __float128 pow(__float128 x, __float128 y) { return ::powq(x, y); } +inline __float128 sqrt(__float128 x) { return ::sqrtq(x); } +inline __float128 cbrt(__float128 x) { return ::cbrtq(x); } +inline __float128 hypot(__float128 x, __float128 y) { return ::hypotq(x, y); } // Trigonometric functions inline __float128 sin(__float128 x) { return ::sinq(x); } inline __float128 cos(__float128 x) { return ::cosq(x); } @@ -173,11 +187,61 @@ inline __float128 lgamma(__float128 x) { return ::lgammaq(x); } inline __float128 ceil(__float128 x) { return ::ceilq(x); } inline __float128 floor(__float128 x) { return ::floorq(x); } inline __float128 trunc(__float128 x) { return ::truncq(x); } +inline __float128 round(__float128 x) { return ::roundq(x); } +// lround +// llround inline __float128 nearbyint(__float128 x) { return ::nearbyintq(x); } +// rint +// lrint +// llrint +// Floating point manipulation functions +// frexp +// ldexp +// modf +// scalbn +// scalbln +// ilog +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU >= 610) +inline __float128 logb(__float128 x) { return ::logbq(x); } +#endif +inline __float128 nextafter(__float128 x, __float128 y) { return ::nextafterq(x, y); } +// nexttoward +inline __float128 copysign(__float128 x, __float128 y) { return ::copysignq(x, y); } // Classification and comparison +// fpclassify inline bool isfinite(__float128 x) { return !::isinfq(x); } // isfiniteq not provided inline bool isinf(__float128 x) { return ::isinfq(x); } inline bool isnan(__float128 x) { return ::isnanq(x); } +// isnormal +inline bool signbit(__float128 x) { return ::signbitq(x); } +// isgreater +// isgreaterequal +// isless +// islessequal +// islessgreater +// isunordered +// clang-format on +} // namespace Kokkos +//</editor-fold> + +//<editor-fold desc="Mathematical constants __float128 specializations"> +namespace Kokkos { +namespace Experimental { +// clang-format off +template <> constexpr __float128 e_v <__float128> = 2.718281828459045235360287471352662498Q; +template <> constexpr __float128 log2e_v <__float128> = 1.442695040888963407359924681001892137Q; +template <> constexpr __float128 log10e_v <__float128> = 0.434294481903251827651128918916605082Q; +template <> constexpr __float128 pi_v <__float128> = 3.141592653589793238462643383279502884Q; +template <> constexpr __float128 inv_pi_v <__float128> = 0.318309886183790671537767526745028724Q; +template <> constexpr __float128 inv_sqrtpi_v<__float128> = 0.564189583547756286948079451560772586Q; +template <> constexpr __float128 ln2_v <__float128> = 0.693147180559945309417232121458176568Q; +template <> constexpr __float128 ln10_v <__float128> = 2.302585092994045684017991454684364208Q; +template <> constexpr __float128 sqrt2_v <__float128> = 1.414213562373095048801688724209698079Q; +template <> constexpr __float128 sqrt3_v <__float128> = 1.732050807568877293527446341505872367Q; +template <> constexpr __float128 inv_sqrt3_v <__float128> = 0.577350269189625764509148780501957456Q; +template <> constexpr __float128 egamma_v <__float128> = 0.577215664901532860606512090082402431Q; +template <> constexpr __float128 phi_v <__float128> = 1.618033988749894848204586834365638118Q; +// clang-format on } // namespace Experimental } // namespace Kokkos //</editor-fold> diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp index 149c881af5e0cf636907834794f41a1a0dc5a4c5..aff6332cc083ac9feff0e62185ddb21f84430474 100644 --- a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp @@ -42,13 +42,16 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Core.hpp> namespace Kokkos { namespace Impl { -KOKKOS_THREAD_LOCAL int SharedAllocationRecord<void, void>::t_tracking_enabled = - 1; +thread_local int SharedAllocationRecord<void, void>::t_tracking_enabled = 1; #ifdef KOKKOS_ENABLE_DEBUG bool SharedAllocationRecord<void, void>::is_sane( @@ -238,7 +241,7 @@ SharedAllocationRecord<void, void>* SharedAllocationRecord< const int old_count = Kokkos::atomic_fetch_sub(&arg_record->m_count, 1); if (old_count == 1) { - if (!Kokkos::is_initialized()) { + if (is_finalized()) { std::stringstream ss; ss << "Kokkos allocation \""; ss << arg_record->get_label(); diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp index 2f18157ffeb21743a920bc90bc5a033191c192fa..02dcd1ec652efcd9b95e9e4902cd626ed16bb600 100644 --- a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp @@ -86,9 +86,9 @@ class SharedAllocationHeader { public: /* Given user memory get pointer to the header */ KOKKOS_INLINE_FUNCTION static const SharedAllocationHeader* get_header( - void* alloc_ptr) { - return reinterpret_cast<SharedAllocationHeader*>( - reinterpret_cast<char*>(alloc_ptr) - sizeof(SharedAllocationHeader)); + void const* alloc_ptr) { + return reinterpret_cast<SharedAllocationHeader const*>( + static_cast<char const*>(alloc_ptr) - sizeof(SharedAllocationHeader)); } KOKKOS_INLINE_FUNCTION @@ -141,15 +141,22 @@ class SharedAllocationRecord<void, void> { SharedAllocationHeader* arg_alloc_ptr, size_t arg_alloc_size, function_type arg_dealloc, const std::string& label); private: - static KOKKOS_THREAD_LOCAL int t_tracking_enabled; + static thread_local int t_tracking_enabled; public: virtual std::string get_label() const { return std::string("Unmanaged"); } +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma push +#pragma diag_suppress implicit_return_from_non_void_function +#endif static KOKKOS_FUNCTION int tracking_enabled() { KOKKOS_IF_ON_HOST(return t_tracking_enabled;) KOKKOS_IF_ON_DEVICE(return 0;) } +#if defined(__EDG__) && !defined(KOKKOS_COMPILER_INTEL) +#pragma pop +#endif /**\brief A host process thread claims and disables the * shared allocation tracking flag. @@ -185,7 +192,7 @@ class SharedAllocationRecord<void, void> { /* User's memory begins at the end of the header */ KOKKOS_INLINE_FUNCTION - void* data() const { return reinterpret_cast<void*>(m_alloc_ptr + 1); } + void* data() const { return static_cast<void*>(m_alloc_ptr + 1); } /* User's memory begins at the end of the header */ size_t size() const { return m_alloc_size - sizeof(SharedAllocationHeader); } @@ -302,6 +309,16 @@ template <class MemorySpace, class DestroyFunctor> class SharedAllocationRecord : public SharedAllocationRecord<MemorySpace, void> { private: + template <typename ExecutionSpace> + SharedAllocationRecord(const ExecutionSpace& execution_space, + const MemorySpace& arg_space, + const std::string& arg_label, const size_t arg_alloc) + /* Allocate user memory as [ SharedAllocationHeader , user_memory ] */ + : SharedAllocationRecord<MemorySpace, void>( + execution_space, arg_space, arg_label, arg_alloc, + &Kokkos::Impl::deallocate<MemorySpace, DestroyFunctor>), + m_destroy() {} + SharedAllocationRecord(const MemorySpace& arg_space, const std::string& arg_label, const size_t arg_alloc) /* Allocate user memory as [ SharedAllocationHeader , user_memory ] */ @@ -328,6 +345,17 @@ class SharedAllocationRecord KOKKOS_IF_ON_DEVICE( ((void)arg_space; (void)arg_label; (void)arg_alloc; return nullptr;)) } + + template <typename ExecutionSpace> + KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate( + const ExecutionSpace& exec_space, const MemorySpace& arg_space, + const std::string& arg_label, const size_t arg_alloc) { + KOKKOS_IF_ON_HOST( + (return new SharedAllocationRecord(exec_space, arg_space, arg_label, + arg_alloc);)) + KOKKOS_IF_ON_DEVICE(((void)exec_space; (void)arg_space; (void)arg_label; + (void)arg_alloc; return nullptr;)) + } }; template <class MemorySpace> diff --git a/packages/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp b/packages/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp index 7f222c92ca704908e7e6be05229c976d113395f9..06bfe276c73bc888fe2ae2ace36a93d3936f9101 100644 --- a/packages/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp @@ -153,8 +153,7 @@ class SimpleTaskScheduler } template <int TaskEnum, class DepTaskType, class FunctorType> - KOKKOS_FUNCTION future_type_for_functor< - typename std::decay<FunctorType>::type> + KOKKOS_FUNCTION future_type_for_functor<std::decay_t<FunctorType>> _spawn_impl( DepTaskType arg_predecessor_task, TaskPriority arg_priority, typename runnable_task_base_type::function_type apply_function_ptr, @@ -163,7 +162,7 @@ class SimpleTaskScheduler KOKKOS_EXPECTS(m_queue != nullptr); using functor_future_type = - future_type_for_functor<typename std::decay<FunctorType>::type>; + future_type_for_functor<std::decay_t<FunctorType>>; using task_type = typename task_queue_type::template runnable_task_type<FunctorType, scheduler_type>; @@ -221,7 +220,7 @@ class SimpleTaskScheduler // SharedAllocationRecord pattern using record_type = Impl::SharedAllocationRecord<memory_space, - Impl::DefaultDestroy<task_queue_type> >; + Impl::DefaultDestroy<task_queue_type>>; // Allocate space for the task queue auto* record = record_type::allocate(memory_space(), "Kokkos::TaskQueue", diff --git a/packages/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp b/packages/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp index 0584cd29eb70470f4c206317d35a57f24893a518..aa84fbbf6e2e3ea3b090a9b52afb90f51a042d1b 100644 --- a/packages/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp @@ -64,7 +64,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp b/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp index f46d89226ceddea11c223087e94a5760f0075d8d..d0954291fa9a119bf755625aa96dd749fb0e8875 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <Kokkos_Macros.hpp> #include <Kokkos_Atomic.hpp> diff --git a/packages/kokkos/core/src/impl/Kokkos_Spinwait.hpp b/packages/kokkos/core/src/impl/Kokkos_Spinwait.hpp index 1c65fb91f2942aca57e66b09bd99ccaf5e450783..085157521d38cf7ea3266364097f2b89861f22b5 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Spinwait.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Spinwait.hpp @@ -66,8 +66,8 @@ enum class WaitMode : int { void host_thread_yield(const uint32_t i, const WaitMode mode); template <typename T> -typename std::enable_if<std::is_integral<T>::value, void>::type -root_spinwait_while_equal(T const volatile& flag, const T value) { +std::enable_if_t<std::is_integral<T>::value, void> root_spinwait_while_equal( + T const volatile& flag, const T value) { Kokkos::store_fence(); uint32_t i = 0; while (value == flag) { @@ -77,8 +77,8 @@ root_spinwait_while_equal(T const volatile& flag, const T value) { } template <typename T> -typename std::enable_if<std::is_integral<T>::value, void>::type -root_spinwait_until_equal(T const volatile& flag, const T value) { +std::enable_if_t<std::is_integral<T>::value, void> root_spinwait_until_equal( + T const volatile& flag, const T value) { Kokkos::store_fence(); uint32_t i = 0; while (value != flag) { @@ -88,8 +88,8 @@ root_spinwait_until_equal(T const volatile& flag, const T value) { } template <typename T> -typename std::enable_if<std::is_integral<T>::value, void>::type -spinwait_while_equal(T const volatile& flag, const T value) { +std::enable_if_t<std::is_integral<T>::value, void> spinwait_while_equal( + T const volatile& flag, const T value) { Kokkos::store_fence(); uint32_t i = 0; while (value == flag) { @@ -99,8 +99,8 @@ spinwait_while_equal(T const volatile& flag, const T value) { } template <typename T> -typename std::enable_if<std::is_integral<T>::value, void>::type -yield_while_equal(T const volatile& flag, const T value) { +std::enable_if_t<std::is_integral<T>::value, void> yield_while_equal( + T const volatile& flag, const T value) { Kokkos::store_fence(); uint32_t i = 0; while (value == flag) { @@ -110,8 +110,8 @@ yield_while_equal(T const volatile& flag, const T value) { } template <typename T> -typename std::enable_if<std::is_integral<T>::value, void>::type -spinwait_until_equal(T const volatile& flag, const T value) { +std::enable_if_t<std::is_integral<T>::value, void> spinwait_until_equal( + T const volatile& flag, const T value) { Kokkos::store_fence(); uint32_t i = 0; while (value != flag) { @@ -121,8 +121,8 @@ spinwait_until_equal(T const volatile& flag, const T value) { } template <typename T> -typename std::enable_if<std::is_integral<T>::value, void>::type -yield_until_equal(T const volatile& flag, const T value) { +std::enable_if_t<std::is_integral<T>::value, void> yield_until_equal( + T const volatile& flag, const T value) { Kokkos::store_fence(); uint32_t i = 0; while (value != flag) { diff --git a/packages/kokkos/core/src/impl/Kokkos_Stacktrace.cpp b/packages/kokkos/core/src/impl/Kokkos_Stacktrace.cpp index c0c1fdf6be7e2024aa92692b21b4d0996e370bca..e1f59c1d8666e951bcc4378b3effef541fb861dc 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Stacktrace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Stacktrace.cpp @@ -1,3 +1,8 @@ + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include "Kokkos_Macros.hpp" #include "Kokkos_Stacktrace.hpp" diff --git a/packages/kokkos/core/src/impl/Kokkos_StringManipulation.hpp b/packages/kokkos/core/src/impl/Kokkos_StringManipulation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..644dcf7faaa3a6b1ba41349ad01ca512b26a0ac2 --- /dev/null +++ b/packages/kokkos/core/src/impl/Kokkos_StringManipulation.hpp @@ -0,0 +1,220 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STRING_MANIPULATION_HPP +#define KOKKOS_STRING_MANIPULATION_HPP + +#include <Kokkos_Macros.hpp> +#include <cstddef> +#include <type_traits> + +namespace Kokkos { +namespace Impl { + +// This header provides a subset of the functionality from <cstring>. In +// contrast to the standard library header, functions are usable on the device +// and in constant expressions. It also includes functionality from <charconv> +// to convert an integer value to a character sequence. + +//<editor-fold desc="String examination"> +// returns the length of a given string +KOKKOS_INLINE_FUNCTION constexpr std::size_t strlen(const char *str) { + std::size_t i = 0; + while (str[i] != '\0') { + ++i; + } + return i; +} + +// compares two strings +KOKKOS_INLINE_FUNCTION constexpr int strcmp(const char *lhs, const char *rhs) { + while (*lhs == *rhs++) { + if (*lhs++ == '\0') { + return 0; + } + } + return static_cast<unsigned int>(*lhs) - + static_cast<unsigned int>(*(rhs - 1)); +} + +// compares a certain number of characters from two strings +KOKKOS_INLINE_FUNCTION constexpr int strncmp(const char *lhs, const char *rhs, + std::size_t count) { + for (std::size_t i = 0; i < count; ++i) { + if (lhs[i] != rhs[i]) { + return lhs[i] < rhs[i] ? -1 : 1; + } else if (lhs[i] == '\0') { + return 0; + } + } + return 0; +} +//</editor-fold> + +//<editor-fold desc="String manipulation"> +// copies one string to another +KOKKOS_INLINE_FUNCTION constexpr char *strcpy(char *dest, const char *src) { + char *d = dest; + for (; (*d = *src) != '\0'; ++d, ++src) { + } + return dest; +} + +// copies a certain amount of characters from one string to another +KOKKOS_INLINE_FUNCTION constexpr char *strncpy(char *dest, const char *src, + std::size_t count) { + if (count != 0) { + char *d = dest; + do { + if ((*d++ = *src++) == '\0') { + while (--count != 0) { + *d++ = '\0'; + } + break; + } + } while (--count != 0); + } + return dest; +} + +// concatenates two strings +KOKKOS_INLINE_FUNCTION constexpr char *strcat(char *dest, const char *src) { + char *d = dest; + for (; *d != '\0'; ++d) { + } + while ((*d++ = *src++) != '\0') { + } + return dest; +} + +// concatenates a certain amount of characters of two strings +KOKKOS_INLINE_FUNCTION constexpr char *strncat(char *dest, const char *src, + std::size_t count) { + if (count != 0) { + char *d = dest; + for (; *d != '\0'; ++d) { + } + do { + if ((*d = *src++) == '\0') { + break; + } + d++; + } while (--count != 0); + *d = '\0'; + } + return dest; +} +//</editor-fold> + +//<editor-fold desc="Character conversions"> +template <class Unsigned> +KOKKOS_FUNCTION constexpr unsigned int to_chars_len(Unsigned val) { + unsigned int const base = 10; + static_assert(std::is_integral<Unsigned>::value, "implementation bug"); + static_assert(std::is_unsigned<Unsigned>::value, "implementation bug"); + unsigned int n = 1; + while (val >= base) { + val /= base; + ++n; + } + return n; +} +template <class Unsigned> +KOKKOS_FUNCTION constexpr void to_chars_impl(char *first, unsigned int len, + Unsigned val) { + unsigned int const base = 10; + static_assert(std::is_integral<Unsigned>::value, "implementation bug"); + static_assert(std::is_unsigned<Unsigned>::value, "implementation bug"); + unsigned int pos = len - 1; + while (val > 0) { + auto const num = val % base; + val /= base; + first[pos] = '0' + num; + --pos; + } +} + +// define values of portable error conditions that correspond to the POSIX error +// codes +enum class errc { + value_too_large = 75 // equivalent POSIX error is EOVERFLOW +}; +struct to_chars_result { + char *ptr; + errc ec; +}; + +// converts an integer value to a character sequence +template <class Integral> +KOKKOS_FUNCTION constexpr to_chars_result to_chars_i(char *first, char *last, + Integral value) { + using Unsigned = std::conditional_t<sizeof(Integral) <= sizeof(unsigned int), + unsigned int, unsigned long long>; + Unsigned unsigned_val = value; + if (value == 0) { + *first = '0'; + return {first + 1, {}}; + } else if +#ifdef KOKKOS_ENABLE_CXX17 + constexpr +#endif + (std::is_signed<Integral>::value) { + if (value < 0) { + *first++ = '-'; + unsigned_val = Unsigned(~value) + Unsigned(1); + } + } + unsigned int const len = to_chars_len(unsigned_val); + if (last - first < len) { + return {last, errc::value_too_large}; + } + to_chars_impl(first, len, unsigned_val); + return {first + len, {}}; +} +//</editor-fold> + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskBase.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskBase.hpp index e9f5d91aa89058fb574339d544e0eb3bef4d66d7..bb89ab914471d61dde22a2009a57d41094428308 100644 --- a/packages/kokkos/core/src/impl/Kokkos_TaskBase.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_TaskBase.hpp @@ -57,7 +57,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -199,7 +198,7 @@ class TaskBase { void add_dependence(TaskBase* dep) { // Precondition: lock == m_next - TaskBase* const lock = (TaskBase*)LockTag; + auto* const lock = reinterpret_cast<TaskBase*>(LockTag); // Assign dependence to m_next. It will be processed in the subsequent // call to schedule. Error if the dependence is reset. @@ -221,7 +220,7 @@ class TaskBase { KOKKOS_INLINE_FUNCTION int32_t reference_count() const { - return *((int32_t volatile*)(&m_ref_count)); + return *const_cast<int32_t volatile*>(&m_ref_count); } }; diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskNode.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskNode.hpp index 7cfd696d2c40958fe60167ad7fe5d39091ca1a9e..5e2ebb058aeaaaf79ae9e0916549d41e394fa97e 100644 --- a/packages/kokkos/core/src/impl/Kokkos_TaskNode.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_TaskNode.hpp @@ -63,7 +63,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskQueue.hpp index e74e84a2e535b5953ae58667a2a7f4b4f53b293d..4f565f019ca16d40124a14fd5bf955dfbfe6321a 100644 --- a/packages/kokkos/core/src/impl/Kokkos_TaskQueue.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -64,7 +64,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp index 757e5f98864bc3c74faa8f4bdfffb795e68aab60..82af5625e87ecb77ca5070f2fce5b78c37ce0168 100644 --- a/packages/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp @@ -63,7 +63,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp index 3a71aa17e69042c791e9e7302c3c14cdca91b8aa..c8039fa77dd1f7b94c7ee8073533a4bc8a390eab 100644 --- a/packages/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp @@ -62,7 +62,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp index 5f98e8d85e9214289ce43f98b920ec27da4a672f..31c737650abffa0ed4275e71a2d847ecdf9567a6 100644 --- a/packages/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp @@ -63,7 +63,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> #include <cassert> //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskResult.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskResult.hpp index 40a9c3bf57cfbe36e5a2646b963b71338c410cf7..7c893547d280afb1966c91825dff398ee233a65a 100644 --- a/packages/kokkos/core/src/impl/Kokkos_TaskResult.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_TaskResult.hpp @@ -58,7 +58,6 @@ #include <string> #include <typeinfo> -#include <stdexcept> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp b/packages/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp index f53dfe5a96621a0e31d3deb95bb83ac9bef35907..1d6c766a75ef4b5345ba6d267f52bb7a814b0d25 100644 --- a/packages/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp @@ -80,8 +80,9 @@ class TaskTeamMemberAdapter : public TeamMember { // type that we're adapting template <typename... Args> KOKKOS_INLINE_FUNCTION explicit TaskTeamMemberAdapter( - typename std::enable_if<std::is_constructible<TeamMember, Args...>::value, - Scheduler>::type arg_scheduler, + std::enable_if_t<std::is_constructible<TeamMember, Args...>::value, + Scheduler> + arg_scheduler, Args&&... args) // TODO @tasking @minor DSH noexcept specification : TeamMember(std::forward<Args>(args)...), m_scheduler( diff --git a/packages/kokkos/core/src/impl/Kokkos_Tools_Generic.hpp b/packages/kokkos/core/src/impl/Kokkos_Tools_Generic.hpp index e734b369b2b70ecd7339aec264dff1a3c2a37994..702fc0997bf96fb7a85b282c559ba57111b9faa0 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Tools_Generic.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Tools_Generic.hpp @@ -179,9 +179,8 @@ void generic_tune_policy(const std::string& label_in, Map& map, Policy& policy, if (should_tune(policy)) { std::string label = label_in; if (label_in.empty()) { - using policy_type = - typename std::remove_reference<decltype(policy)>::type; - using work_tag = typename policy_type::work_tag; + using policy_type = std::remove_reference_t<decltype(policy)>; + using work_tag = typename policy_type::work_tag; Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label); label = name.get(); } @@ -205,9 +204,8 @@ void generic_tune_policy(const std::string& label_in, Map& map, Policy& policy, if (should_tune(policy)) { std::string label = label_in; if (label_in.empty()) { - using policy_type = - typename std::remove_reference<decltype(policy)>::type; - using work_tag = typename policy_type::work_tag; + using policy_type = std::remove_reference_t<decltype(policy)>; + using work_tag = typename policy_type::work_tag; Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label); label = name.get(); } @@ -312,9 +310,8 @@ void generic_report_results(const std::string& label_in, Map& map, if (should_tune(policy)) { std::string label = label_in; if (label_in.empty()) { - using policy_type = - typename std::remove_reference<decltype(policy)>::type; - using work_tag = typename policy_type::work_tag; + using policy_type = std::remove_reference_t<decltype(policy)>; + using work_tag = typename policy_type::work_tag; Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label); label = name.get(); } diff --git a/packages/kokkos/core/src/impl/Kokkos_Traits.hpp b/packages/kokkos/core/src/impl/Kokkos_Traits.hpp index aa38388acc40b3645b92dc1400c3001f8bf75f75..38edc118db6e62d8a7602e90e6c3b18cd9dc8722 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Traits.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Traits.hpp @@ -115,8 +115,7 @@ struct has_condition<DefaultType, Condition, S, Pack...> { public: enum : bool { value = self_value || next::value }; - using type = - typename std::conditional<self_value, S, typename next::type>::type; + using type = std::conditional_t<self_value, S, typename next::type>; }; template <class... Args> @@ -156,10 +155,9 @@ struct if_c { using type = FalseType; - using value_type = typename std::remove_const< - typename std::remove_reference<type>::type>::type; + using value_type = std::remove_const_t<std::remove_reference_t<type>>; - using const_value_type = typename std::add_const<value_type>::type; + using const_value_type = std::add_const_t<value_type>; static KOKKOS_INLINE_FUNCTION const_value_type& select(const_value_type& v) { return v; @@ -191,10 +189,9 @@ struct if_c<true, TrueType, FalseType> { using type = TrueType; - using value_type = typename std::remove_const< - typename std::remove_reference<type>::type>::type; + using value_type = std::remove_const_t<std::remove_reference_t<type>>; - using const_value_type = typename std::add_const<value_type>::type; + using const_value_type = std::add_const_t<value_type>; static KOKKOS_INLINE_FUNCTION const_value_type& select(const_value_type& v) { return v; diff --git a/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp b/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp index bea7c2c9d1e56a61802bc47c60f00e82496c8061..37b74103d3c6c1a158fe6d64da2a2aeb407902d4 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp @@ -65,6 +65,9 @@ struct identity { template <typename T> using identity_t = typename identity<T>::type; +template <typename... Is> +struct always_true : std::true_type {}; + #if defined(__cpp_lib_void_t) // since C++17 using std::void_t; diff --git a/packages/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp b/packages/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp index ace826dd5a7f726cd9e0e2b3ce14b081a26680f2..6773263340274928f1baa4bbe3308755ba2494e2 100644 --- a/packages/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp @@ -117,7 +117,7 @@ struct ObjectWithVLAEmulation { using vla_entry_count_type = EntryCountType; using iterator = VLAValueType*; - using const_iterator = typename std::add_const<VLAValueType>::type*; + using const_iterator = std::add_const_t<VLAValueType>*; // TODO @tasking @minor DSH require that Derived be marked final? (note that // std::is_final is C++14) diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp index fbda3e09318201655c34cd870dfc93a154071360..12a5fa288fdb1951ceed0a062387a80df45f2e89 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp @@ -55,7 +55,7 @@ struct ViewDataAnalysis<DataType, ArrayLayout, Kokkos::Array<V, N, P>> { private: using array_analysis = ViewArrayAnalysis<DataType>; - static_assert(std::is_same<P, void>::value, ""); + static_assert(std::is_void<P>::value, ""); static_assert(std::is_same<typename array_analysis::non_const_value_type, Kokkos::Array<V, N, P>>::value, ""); @@ -75,7 +75,7 @@ struct ViewDataAnalysis<DataType, ArrayLayout, Kokkos::Array<V, N, P>> { using array_scalar_dimension = typename dimension::template append<N>::type; - using scalar_type = typename std::conditional<is_const, const V, V>::type; + using scalar_type = std::conditional_t<is_const, const V, V>; using non_const_scalar_type = V; using const_scalar_type = const V; @@ -230,8 +230,8 @@ class ViewMapping<Traits, Kokkos::Array<>> { } using reference_type = - typename std::conditional<is_contiguous_reference, contiguous_reference, - strided_reference>::type; + std::conditional_t<is_contiguous_reference, contiguous_reference, + strided_reference>; using pointer_type = handle_type; @@ -350,7 +350,8 @@ class ViewMapping<Traits, Kokkos::Array<>> { template <class... P> Kokkos::Impl::SharedAllocationRecord<> *allocate_shared( Kokkos::Impl::ViewCtorProp<P...> const &arg_prop, - typename Traits::array_layout const &arg_layout) { + typename Traits::array_layout const &arg_layout, + bool execution_space_specified) { using alloc_prop = Kokkos::Impl::ViewCtorProp<P...>; using execution_space = typename alloc_prop::execution_space; @@ -373,12 +374,21 @@ class ViewMapping<Traits, Kokkos::Array<>> { static_cast<Kokkos::Impl::ViewCtorProp<void, std::string> const &>( arg_prop) .value; - // Allocate memory from the memory space and create tracking record. - record_type *const record = record_type::allocate( + const execution_space &exec_space = + static_cast<Kokkos::Impl::ViewCtorProp<void, execution_space> const &>( + arg_prop) + .value; + const memory_space &mem_space = static_cast<Kokkos::Impl::ViewCtorProp<void, memory_space> const &>( arg_prop) - .value, - alloc_name, alloc_size); + .value; + + // Allocate memory from the memory space and create tracking record. + record_type *const record = + execution_space_specified + ? record_type::allocate(exec_space, mem_space, alloc_name, + alloc_size) + : record_type::allocate(mem_space, alloc_name, alloc_size); if (alloc_size) { m_impl_handle = @@ -386,12 +396,12 @@ class ViewMapping<Traits, Kokkos::Array<>> { if (alloc_prop::initialize) { // The functor constructs and destroys - record->m_destroy = functor_type( - static_cast<Kokkos::Impl::ViewCtorProp<void, execution_space> const - &>(arg_prop) - .value, - (pointer_type)m_impl_handle, m_impl_offset.span() * Array_N, - alloc_name); + record->m_destroy = + execution_space_specified + ? functor_type(exec_space, (pointer_type)m_impl_handle, + m_impl_offset.span() * Array_N, alloc_name) + : functor_type((pointer_type)m_impl_handle, + m_impl_offset.span() * Array_N, alloc_name); record->m_destroy.construct_shared_allocation(); } @@ -406,10 +416,10 @@ class ViewMapping<Traits, Kokkos::Array<>> { template <class DstTraits, class SrcTraits> class ViewMapping< DstTraits, SrcTraits, - typename std::enable_if<( + std::enable_if_t<( std::is_same<typename DstTraits::memory_space, typename SrcTraits::memory_space>::value && - std::is_same<typename DstTraits::specialize, void>::value && + std::is_void<typename DstTraits::specialize>::value && (std::is_same<typename DstTraits::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename DstTraits::array_layout, @@ -422,7 +432,7 @@ class ViewMapping< std::is_same<typename SrcTraits::array_layout, Kokkos::LayoutRight>::value || std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutStride>::value))>::type> { + Kokkos::LayoutStride>::value))>> { public: // Can only convert to View::array_type @@ -506,14 +516,14 @@ class ViewMapping< template <class SrcTraits, class... Args> class ViewMapping< - typename std::enable_if<( + std::enable_if_t<( std::is_same<typename SrcTraits::specialize, Kokkos::Array<>>::value && (std::is_same<typename SrcTraits::array_layout, Kokkos::LayoutLeft>::value || std::is_same<typename SrcTraits::array_layout, Kokkos::LayoutRight>::value || std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutStride>::value))>::type, + Kokkos::LayoutStride>::value))>, SrcTraits, Args...> { private: static_assert(SrcTraits::rank == sizeof...(Args), ""); @@ -558,36 +568,34 @@ class ViewMapping< // Subview's layout using array_layout = - typename std::conditional<((rank == 0) || - (rank <= 2 && R0 && - std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutLeft>::value) || - (rank <= 2 && R0_rev && - std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutRight>::value)), - typename SrcTraits::array_layout, - Kokkos::LayoutStride>::type; + std::conditional_t<((rank == 0) || + (rank <= 2 && R0 && + std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutLeft>::value) || + (rank <= 2 && R0_rev && + std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutRight>::value)), + typename SrcTraits::array_layout, + Kokkos::LayoutStride>; using value_type = typename SrcTraits::value_type; - using data_type = typename std::conditional< + using data_type = std::conditional_t< rank == 0, value_type, - typename std::conditional< + std::conditional_t< rank == 1, value_type *, - typename std::conditional< + std::conditional_t< rank == 2, value_type **, - typename std::conditional< + std::conditional_t< rank == 3, value_type ***, - typename std::conditional< + std::conditional_t< rank == 4, value_type ****, - typename std::conditional< + std::conditional_t< rank == 5, value_type *****, - typename std::conditional< + std::conditional_t< rank == 6, value_type ******, - typename std::conditional< - rank == 7, value_type *******, - value_type ********>::type>::type>::type>:: - type>::type>::type>::type>::type; + std::conditional_t<rank == 7, value_type *******, + value_type ********>>>>>>>>; public: using traits_type = Kokkos::ViewTraits<data_type, array_layout, diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewCtor.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewCtor.hpp index cc3953c05e3292b1f9f45d0d21d014a06f40ddab..8bc8f8686487212d5ac380f35447e2ad0d6a5752 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewCtor.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewCtor.hpp @@ -53,7 +53,6 @@ namespace Impl { struct WithoutInitializing_t {}; struct AllowPadding_t {}; -struct NullSpace_t {}; template <typename> struct is_view_ctor_property : public std::false_type {}; @@ -64,9 +63,6 @@ struct is_view_ctor_property<WithoutInitializing_t> : public std::true_type {}; template <> struct is_view_ctor_property<AllowPadding_t> : public std::true_type {}; -template <> -struct is_view_ctor_property<NullSpace_t> : public std::true_type {}; - //---------------------------------------------------------------------------- /**\brief Whether a type can be used for a view label */ @@ -91,10 +87,15 @@ struct ViewCtorProp; template <typename Specialize, typename T> struct CommonViewAllocProp; +/* Dummy to allow for empty ViewCtorProp object + */ +template <> +struct ViewCtorProp<void> {}; + /* Common value_type stored as ViewCtorProp */ template <typename Specialize, typename T> -struct ViewCtorProp<void, CommonViewAllocProp<Specialize, T> > { +struct ViewCtorProp<void, CommonViewAllocProp<Specialize, T>> { ViewCtorProp() = default; ViewCtorProp(const ViewCtorProp &) = default; ViewCtorProp &operator=(const ViewCtorProp &) = default; @@ -113,7 +114,7 @@ struct ViewCtorProp<void, CommonViewAllocProp<Specialize, T> > { * that avoid duplicate base class errors */ template <unsigned I> -struct ViewCtorProp<void, std::integral_constant<unsigned, I> > { +struct ViewCtorProp<void, std::integral_constant<unsigned, I>> { ViewCtorProp() = default; ViewCtorProp(const ViewCtorProp &) = default; ViewCtorProp &operator=(const ViewCtorProp &) = default; @@ -124,10 +125,10 @@ struct ViewCtorProp<void, std::integral_constant<unsigned, I> > { /* Property flags have constexpr value */ template <typename P> -struct ViewCtorProp<typename std::enable_if< - std::is_same<P, AllowPadding_t>::value || - std::is_same<P, WithoutInitializing_t>::value>::type, - P> { +struct ViewCtorProp< + std::enable_if_t<std::is_same<P, AllowPadding_t>::value || + std::is_same<P, WithoutInitializing_t>::value>, + P> { ViewCtorProp() = default; ViewCtorProp(const ViewCtorProp &) = default; ViewCtorProp &operator=(const ViewCtorProp &) = default; @@ -136,13 +137,12 @@ struct ViewCtorProp<typename std::enable_if< ViewCtorProp(const type &) {} - static constexpr type value = type(); + type value = type(); }; /* Map input label type to std::string */ template <typename Label> -struct ViewCtorProp<typename std::enable_if<is_view_label<Label>::value>::type, - Label> { +struct ViewCtorProp<std::enable_if_t<is_view_label<Label>::value>, Label> { ViewCtorProp() = default; ViewCtorProp(const ViewCtorProp &) = default; ViewCtorProp &operator=(const ViewCtorProp &) = default; @@ -156,10 +156,9 @@ struct ViewCtorProp<typename std::enable_if<is_view_label<Label>::value>::type, }; template <typename Space> -struct ViewCtorProp< - typename std::enable_if<Kokkos::is_memory_space<Space>::value || - Kokkos::is_execution_space<Space>::value>::type, - Space> { +struct ViewCtorProp<std::enable_if_t<Kokkos::is_memory_space<Space>::value || + Kokkos::is_execution_space<Space>::value>, + Space> { ViewCtorProp() = default; ViewCtorProp(const ViewCtorProp &) = default; ViewCtorProp &operator=(const ViewCtorProp &) = default; diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp index 6915622352e47d25efa34ae687f3e4f190150974..8d367cebab75bfb71cdcce5bf415fc0ae36642dc 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp @@ -58,39 +58,39 @@ namespace Kokkos { template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, unsigned ArgN1> struct is_array_layout<Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, ArgN0, ArgN1, 0, 0, 0, 0, 0, 0, true> > + OuterP, InnerP, ArgN0, ArgN1, 0, 0, 0, 0, 0, 0, true>> : public std::true_type {}; template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, unsigned ArgN1, unsigned ArgN2> struct is_array_layout<Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, ArgN0, ArgN1, ArgN2, 0, 0, 0, 0, 0, true> > + OuterP, InnerP, ArgN0, ArgN1, ArgN2, 0, 0, 0, 0, 0, true>> : public std::true_type {}; template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3> struct is_array_layout<Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, 0, 0, 0, 0, true> > + OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, 0, 0, 0, 0, true>> : public std::true_type {}; template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3, unsigned ArgN4> struct is_array_layout<Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, 0, 0, 0, true> > + OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, 0, 0, 0, true>> : public std::true_type {}; template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3, unsigned ArgN4, unsigned ArgN5> struct is_array_layout<Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, 0, 0, true> > + OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, 0, 0, true>> : public std::true_type {}; template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3, unsigned ArgN4, unsigned ArgN5, unsigned ArgN6> struct is_array_layout<Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, 0, true> > + OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, 0, true>> : public std::true_type {}; template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, @@ -98,7 +98,7 @@ template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7> struct is_array_layout< Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, - ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > + ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true>> : public std::true_type {}; template <class L> @@ -109,7 +109,7 @@ template <Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7, bool IsPowerTwo> struct is_array_layout_tiled<Kokkos::Experimental::LayoutTiled< OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, - IsPowerTwo> > : public std::true_type { + IsPowerTwo>> : public std::true_type { }; // Last template parameter "true" meaning this currently only supports // powers-of-two @@ -118,9 +118,9 @@ namespace Impl { template <class Dimension, class Layout> struct ViewOffset< Dimension, Layout, - typename std::enable_if<((Dimension::rank <= 8) && (Dimension::rank >= 2) && - is_array_layout<Layout>::value && - is_array_layout_tiled<Layout>::value)>::type> { + std::enable_if_t<((Dimension::rank <= 8) && (Dimension::rank >= 2) && + is_array_layout<Layout>::value && + is_array_layout_tiled<Layout>::value)>> { public: static constexpr Kokkos::Iterate outer_pattern = Layout::outer_pattern; static constexpr Kokkos::Iterate inner_pattern = Layout::inner_pattern; @@ -493,8 +493,14 @@ struct ViewOffset< //---------------------------------------- KOKKOS_INLINE_FUNCTION constexpr array_layout layout() const { - return array_layout(m_dim.N0, m_dim.N1, m_dim.N2, m_dim.N2, m_dim.N3, - m_dim.N4, m_dim.N5, m_dim.N6, m_dim.N7); + return array_layout((VORank > 0 ? m_dim.N0 : KOKKOS_INVALID_INDEX), + (VORank > 1 ? m_dim.N1 : KOKKOS_INVALID_INDEX), + (VORank > 2 ? m_dim.N2 : KOKKOS_INVALID_INDEX), + (VORank > 3 ? m_dim.N3 : KOKKOS_INVALID_INDEX), + (VORank > 4 ? m_dim.N4 : KOKKOS_INVALID_INDEX), + (VORank > 5 ? m_dim.N5 : KOKKOS_INVALID_INDEX), + (VORank > 6 ? m_dim.N6 : KOKKOS_INVALID_INDEX), + (VORank > 7 ? m_dim.N7 : KOKKOS_INVALID_INDEX)); } KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { @@ -660,10 +666,10 @@ struct ViewOffset< }; // FIXME Remove the out-of-class definitions when we require C++17 -#define KOKKOS_ITERATE_VIEW_OFFSET_ENABLE \ - typename std::enable_if<((Dimension::rank <= 8) && (Dimension::rank >= 2) && \ - is_array_layout<Layout>::value && \ - is_array_layout_tiled<Layout>::value)>::type +#define KOKKOS_ITERATE_VIEW_OFFSET_ENABLE \ + std::enable_if_t<((Dimension::rank <= 8) && (Dimension::rank >= 2) && \ + is_array_layout<Layout>::value && \ + is_array_layout_tiled<Layout>::value)> template <class Dimension, class Layout> constexpr Kokkos::Iterate ViewOffset< Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::outer_pattern; @@ -754,18 +760,17 @@ template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P, typename iType0, typename iType1> -class ViewMapping< - typename std::enable_if<(N2 == 0 && N3 == 0 && N4 == 0 && N5 == 0 && - N6 == 0 && N7 == 0)>::type // void - , - Kokkos::ViewTraits< - T**, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, - N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, - N6, N7, true>, - iType0, iType1> { +class ViewMapping<std::enable_if_t<(N2 == 0 && N3 == 0 && N4 == 0 && N5 == 0 && + N6 == 0 && N7 == 0)> // void + , + Kokkos::ViewTraits< + T**, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>, + Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, + N3, N4, N5, N6, N7, true>, + iType0, iType1> { public: using src_layout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, @@ -774,11 +779,10 @@ class ViewMapping< static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = - typename std::conditional<is_inner_left, Kokkos::LayoutLeft, - Kokkos::LayoutRight>::type; - using traits = Kokkos::ViewTraits<T[N0][N1], array_layout, P...>; - using type = Kokkos::View<T[N0][N1], array_layout, P...>; + using array_layout = std::conditional_t<is_inner_left, Kokkos::LayoutLeft, + Kokkos::LayoutRight>; + using traits = Kokkos::ViewTraits<T[N0][N1], array_layout, P...>; + using type = Kokkos::View<T[N0][N1], array_layout, P...>; KOKKOS_INLINE_FUNCTION static void assign( ViewMapping<traits, void>& dst, const ViewMapping<src_traits, void>& src, @@ -807,8 +811,8 @@ template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P, typename iType0, typename iType1, typename iType2> -class ViewMapping<typename std::enable_if<(N3 == 0 && N4 == 0 && N5 == 0 && - N6 == 0 && N7 == 0)>::type // void +class ViewMapping<std::enable_if_t<(N3 == 0 && N4 == 0 && N5 == 0 && N6 == 0 && + N7 == 0)> // void , Kokkos::ViewTraits< T***, @@ -826,11 +830,10 @@ class ViewMapping<typename std::enable_if<(N3 == 0 && N4 == 0 && N5 == 0 && static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = - typename std::conditional<is_inner_left, Kokkos::LayoutLeft, - Kokkos::LayoutRight>::type; - using traits = Kokkos::ViewTraits<T[N0][N1][N2], array_layout, P...>; - using type = Kokkos::View<T[N0][N1][N2], array_layout, P...>; + using array_layout = std::conditional_t<is_inner_left, Kokkos::LayoutLeft, + Kokkos::LayoutRight>; + using traits = Kokkos::ViewTraits<T[N0][N1][N2], array_layout, P...>; + using type = Kokkos::View<T[N0][N1][N2], array_layout, P...>; KOKKOS_INLINE_FUNCTION static void assign( ViewMapping<traits, void>& dst, const ViewMapping<src_traits, void>& src, @@ -865,17 +868,17 @@ template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P, typename iType0, typename iType1, typename iType2, typename iType3> -class ViewMapping<typename std::enable_if<(N4 == 0 && N5 == 0 && N6 == 0 && - N7 == 0)>::type // void - , - Kokkos::ViewTraits< - T****, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - iType0, iType1, iType2, iType3> { +class ViewMapping< + std::enable_if_t<(N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0)> // void + , + Kokkos::ViewTraits< + T****, + Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, + N5, N6, N7, true>, + P...>, + Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, + N6, N7, true>, + iType0, iType1, iType2, iType3> { public: using src_layout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, @@ -884,9 +887,8 @@ class ViewMapping<typename std::enable_if<(N4 == 0 && N5 == 0 && N6 == 0 && static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = - typename std::conditional<is_inner_left, Kokkos::LayoutLeft, - Kokkos::LayoutRight>::type; + using array_layout = std::conditional_t<is_inner_left, Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using traits = Kokkos::ViewTraits<T[N0][N1][N2][N3], array_layout, P...>; using type = Kokkos::View<T[N0][N1][N2][N3], array_layout, P...>; @@ -928,17 +930,16 @@ template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P, typename iType0, typename iType1, typename iType2, typename iType3, typename iType4> -class ViewMapping< - typename std::enable_if<(N5 == 0 && N6 == 0 && N7 == 0)>::type // void - , - Kokkos::ViewTraits< - T*****, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, - N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, - N6, N7, true>, - iType0, iType1, iType2, iType3, iType4> { +class ViewMapping<std::enable_if_t<(N5 == 0 && N6 == 0 && N7 == 0)> // void + , + Kokkos::ViewTraits< + T*****, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>, + Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, + N3, N4, N5, N6, N7, true>, + iType0, iType1, iType2, iType3, iType4> { public: using src_layout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, @@ -947,9 +948,8 @@ class ViewMapping< static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = - typename std::conditional<is_inner_left, Kokkos::LayoutLeft, - Kokkos::LayoutRight>::type; + using array_layout = std::conditional_t<is_inner_left, Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using traits = Kokkos::ViewTraits<T[N0][N1][N2][N3][N4], array_layout, P...>; using type = Kokkos::View<T[N0][N1][N2][N3][N4], array_layout, P...>; @@ -997,7 +997,7 @@ template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N5, unsigned N6, unsigned N7, class... P, typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5> -class ViewMapping<typename std::enable_if<(N6 == 0 && N7 == 0)>::type // void +class ViewMapping<std::enable_if_t<(N6 == 0 && N7 == 0)> // void , Kokkos::ViewTraits< T******, @@ -1015,9 +1015,8 @@ class ViewMapping<typename std::enable_if<(N6 == 0 && N7 == 0)>::type // void static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = - typename std::conditional<is_inner_left, Kokkos::LayoutLeft, - Kokkos::LayoutRight>::type; + using array_layout = std::conditional_t<is_inner_left, Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using traits = Kokkos::ViewTraits<T[N0][N1][N2][N3][N4][N5], array_layout, P...>; using type = Kokkos::View<T[N0][N1][N2][N3][N4][N5], array_layout, P...>; @@ -1071,7 +1070,7 @@ template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N5, unsigned N6, unsigned N7, class... P, typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5, typename iType6> -class ViewMapping<typename std::enable_if<(N7 == 0)>::type // void +class ViewMapping<std::enable_if_t<(N7 == 0)> // void , Kokkos::ViewTraits< T*******, @@ -1089,9 +1088,8 @@ class ViewMapping<typename std::enable_if<(N7 == 0)>::type // void static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = - typename std::conditional<is_inner_left, Kokkos::LayoutLeft, - Kokkos::LayoutRight>::type; + using array_layout = std::conditional_t<is_inner_left, Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using traits = Kokkos::ViewTraits<T[N0][N1][N2][N3][N4][N5][N6], array_layout, P...>; using type = Kokkos::View<T[N0][N1][N2][N3][N4][N5][N6], array_layout, P...>; @@ -1151,19 +1149,18 @@ template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N5, unsigned N6, unsigned N7, class... P, typename iType0, typename iType1, typename iType2, typename iType3, typename iType4, typename iType5, typename iType6, typename iType7> -class ViewMapping<typename std::enable_if<(N0 != 0 && N1 != 0 && N2 != 0 && - N3 != 0 && N4 != 0 && N5 != 0 && - N6 != 0 && N7 != 0)>::type // void - , - Kokkos::ViewTraits< - T********, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - iType0, iType1, iType2, iType3, iType4, iType5, iType6, - iType7> { +class ViewMapping< + std::enable_if_t<(N0 != 0 && N1 != 0 && N2 != 0 && N3 != 0 && N4 != 0 && + N5 != 0 && N6 != 0 && N7 != 0)> // void + , + Kokkos::ViewTraits< + T********, + Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, + N5, N6, N7, true>, + P...>, + Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, + N6, N7, true>, + iType0, iType1, iType2, iType3, iType4, iType5, iType6, iType7> { public: using src_layout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, @@ -1172,9 +1169,8 @@ class ViewMapping<typename std::enable_if<(N0 != 0 && N1 != 0 && N2 != 0 && static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = - typename std::conditional<is_inner_left, Kokkos::LayoutLeft, - Kokkos::LayoutRight>::type; + using array_layout = std::conditional_t<is_inner_left, Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using traits = Kokkos::ViewTraits<T[N0][N1][N2][N3][N4][N5][N6][N7], array_layout, P...>; using type = @@ -1244,22 +1240,22 @@ namespace Kokkos { template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P> -KOKKOS_INLINE_FUNCTION Kokkos::View< - T[N0][N1], - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type, - P...> -tile_subview(const Kokkos::View< - T**, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1) { +KOKKOS_INLINE_FUNCTION + Kokkos::View<T[N0][N1], + std::conditional_t<(InnerP == Kokkos::Iterate::Left), + Kokkos::LayoutLeft, Kokkos::LayoutRight>, + P...> + tile_subview(const Kokkos::View< + T**, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>& src, + const size_t i_tile0, const size_t i_tile1) { // Force the specialized ViewMapping for extracting a tile // by using the first subview argument as the layout. using array_layout = - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; + std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using SrcLayout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>; @@ -1272,22 +1268,23 @@ tile_subview(const Kokkos::View< template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P> -KOKKOS_INLINE_FUNCTION Kokkos::View< - T[N0][N1][N2], - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type, - P...> -tile_subview(const Kokkos::View< - T***, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, const size_t i_tile2) { +KOKKOS_INLINE_FUNCTION + Kokkos::View<T[N0][N1][N2], + std::conditional_t<(InnerP == Kokkos::Iterate::Left), + Kokkos::LayoutLeft, Kokkos::LayoutRight>, + P...> + tile_subview(const Kokkos::View< + T***, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>& src, + const size_t i_tile0, const size_t i_tile1, + const size_t i_tile2) { // Force the specialized ViewMapping for extracting a tile // by using the first subview argument as the layout. using array_layout = - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; + std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using SrcLayout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>; @@ -1300,23 +1297,23 @@ tile_subview(const Kokkos::View< template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P> -KOKKOS_INLINE_FUNCTION Kokkos::View< - T[N0][N1][N2][N3], - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type, - P...> -tile_subview(const Kokkos::View< - T****, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, const size_t i_tile2, - const size_t i_tile3) { +KOKKOS_INLINE_FUNCTION + Kokkos::View<T[N0][N1][N2][N3], + std::conditional_t<(InnerP == Kokkos::Iterate::Left), + Kokkos::LayoutLeft, Kokkos::LayoutRight>, + P...> + tile_subview(const Kokkos::View< + T****, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>& src, + const size_t i_tile0, const size_t i_tile1, + const size_t i_tile2, const size_t i_tile3) { // Force the specialized ViewMapping for extracting a tile // by using the first subview argument as the layout. using array_layout = - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; + std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using SrcLayout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>; @@ -1329,23 +1326,24 @@ tile_subview(const Kokkos::View< template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P> -KOKKOS_INLINE_FUNCTION Kokkos::View< - T[N0][N1][N2][N3][N4], - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type, - P...> -tile_subview(const Kokkos::View< - T*****, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, const size_t i_tile2, - const size_t i_tile3, const size_t i_tile4) { +KOKKOS_INLINE_FUNCTION + Kokkos::View<T[N0][N1][N2][N3][N4], + std::conditional_t<(InnerP == Kokkos::Iterate::Left), + Kokkos::LayoutLeft, Kokkos::LayoutRight>, + P...> + tile_subview(const Kokkos::View< + T*****, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>& src, + const size_t i_tile0, const size_t i_tile1, + const size_t i_tile2, const size_t i_tile3, + const size_t i_tile4) { // Force the specialized ViewMapping for extracting a tile // by using the first subview argument as the layout. using array_layout = - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; + std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using SrcLayout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>; @@ -1358,23 +1356,24 @@ tile_subview(const Kokkos::View< template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P> -KOKKOS_INLINE_FUNCTION Kokkos::View< - T[N0][N1][N2][N3][N4][N5], - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type, - P...> -tile_subview(const Kokkos::View< - T******, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, const size_t i_tile2, - const size_t i_tile3, const size_t i_tile4, const size_t i_tile5) { +KOKKOS_INLINE_FUNCTION + Kokkos::View<T[N0][N1][N2][N3][N4][N5], + std::conditional_t<(InnerP == Kokkos::Iterate::Left), + Kokkos::LayoutLeft, Kokkos::LayoutRight>, + P...> + tile_subview(const Kokkos::View< + T******, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>& src, + const size_t i_tile0, const size_t i_tile1, + const size_t i_tile2, const size_t i_tile3, + const size_t i_tile4, const size_t i_tile5) { // Force the specialized ViewMapping for extracting a tile // by using the first subview argument as the layout. using array_layout = - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; + std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using SrcLayout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>; @@ -1387,24 +1386,25 @@ tile_subview(const Kokkos::View< template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P> -KOKKOS_INLINE_FUNCTION Kokkos::View< - T[N0][N1][N2][N3][N4][N5][N6], - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type, - P...> -tile_subview(const Kokkos::View< - T*******, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, const size_t i_tile2, - const size_t i_tile3, const size_t i_tile4, const size_t i_tile5, - const size_t i_tile6) { +KOKKOS_INLINE_FUNCTION + Kokkos::View<T[N0][N1][N2][N3][N4][N5][N6], + std::conditional_t<(InnerP == Kokkos::Iterate::Left), + Kokkos::LayoutLeft, Kokkos::LayoutRight>, + P...> + tile_subview(const Kokkos::View< + T*******, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>& src, + const size_t i_tile0, const size_t i_tile1, + const size_t i_tile2, const size_t i_tile3, + const size_t i_tile4, const size_t i_tile5, + const size_t i_tile6) { // Force the specialized ViewMapping for extracting a tile // by using the first subview argument as the layout. using array_layout = - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; + std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using SrcLayout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>; @@ -1418,24 +1418,25 @@ tile_subview(const Kokkos::View< template <typename T, Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4, unsigned N5, unsigned N6, unsigned N7, class... P> -KOKKOS_INLINE_FUNCTION Kokkos::View< - T[N0][N1][N2][N3][N4][N5][N6][N7], - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type, - P...> -tile_subview(const Kokkos::View< - T********, - Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, - N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, const size_t i_tile2, - const size_t i_tile3, const size_t i_tile4, const size_t i_tile5, - const size_t i_tile6, const size_t i_tile7) { +KOKKOS_INLINE_FUNCTION + Kokkos::View<T[N0][N1][N2][N3][N4][N5][N6][N7], + std::conditional_t<(InnerP == Kokkos::Iterate::Left), + Kokkos::LayoutLeft, Kokkos::LayoutRight>, + P...> + tile_subview(const Kokkos::View< + T********, + Kokkos::Experimental::LayoutTiled< + OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, + P...>& src, + const size_t i_tile0, const size_t i_tile1, + const size_t i_tile2, const size_t i_tile3, + const size_t i_tile4, const size_t i_tile5, + const size_t i_tile6, const size_t i_tile7) { // Force the specialized ViewMapping for extracting a tile // by using the first subview argument as the layout. using array_layout = - typename std::conditional<(InnerP == Kokkos::Iterate::Left), - Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; + std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, + Kokkos::LayoutRight>; using SrcLayout = Kokkos::Experimental::LayoutTiled<OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>; diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index f606a39839ff0f00317709ec9aa24374d762a853..738231677c600f6d928122269d848b1a2b51ac46 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -59,6 +59,7 @@ #include <impl/Kokkos_ViewCtor.hpp> #include <impl/Kokkos_Atomic_View.hpp> #include <impl/Kokkos_Tools.hpp> +#include <impl/Kokkos_StringManipulation.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -111,7 +112,7 @@ struct rank_dynamic<Val, Args...> { template <unsigned RD> \ struct ViewDimension##R<0u, RD> { \ static constexpr size_t ArgN##R = 0; \ - typename std::conditional<(RD < 3), size_t, unsigned>::type N##R; \ + std::conditional_t<(RD < 3), size_t, unsigned> N##R; \ ViewDimension##R() = default; \ ViewDimension##R(const ViewDimension##R&) = default; \ ViewDimension##R& operator=(const ViewDimension##R&) = default; \ @@ -196,7 +197,14 @@ struct KOKKOS_IMPL_ENFORCE_EMPTY_BASE_OPTIMIZATION ViewDimension KOKKOS_INLINE_FUNCTION constexpr ViewDimension(size_t n0, size_t n1, size_t n2, size_t n3, size_t n4, size_t n5, size_t n6, size_t n7) - : D0(n0), D1(n1), D2(n2), D3(n3), D4(n4), D5(n5), D6(n6), D7(n7) {} + : D0(n0 == KOKKOS_INVALID_INDEX ? 1 : n0), + D1(n1 == KOKKOS_INVALID_INDEX ? 1 : n1), + D2(n2 == KOKKOS_INVALID_INDEX ? 1 : n2), + D3(n3 == KOKKOS_INVALID_INDEX ? 1 : n3), + D4(n4 == KOKKOS_INVALID_INDEX ? 1 : n4), + D5(n5 == KOKKOS_INVALID_INDEX ? 1 : n5), + D6(n6 == KOKKOS_INVALID_INDEX ? 1 : n6), + D7(n7 == KOKKOS_INVALID_INDEX ? 1 : n7) {} KOKKOS_INLINE_FUNCTION constexpr size_t extent(const unsigned r) const noexcept { @@ -346,13 +354,13 @@ struct is_integral_extent_type<std::initializer_list<iType>> { template <unsigned I, class... Args> struct is_integral_extent { // get_type is void when sizeof...(Args) <= I - using type = typename std::remove_cv<typename std::remove_reference< - typename Kokkos::Impl::get_type<I, Args...>::type>::type>::type; + using type = std::remove_cv_t<std::remove_reference_t< + typename Kokkos::Impl::get_type<I, Args...>::type>>; enum : bool { value = is_integral_extent_type<type>::value }; static_assert(value || std::is_integral<type>::value || - std::is_same<type, void>::value, + std::is_void<type>::value, "subview argument must be either integral or integral extent"); }; @@ -753,8 +761,8 @@ struct ViewDataType<T, ViewDimension<N, Args...>> { template <class T> struct ViewArrayAnalysis { using value_type = T; - using const_value_type = typename std::add_const<T>::type; - using non_const_value_type = typename std::remove_const<T>::type; + using const_value_type = std::add_const_t<T>; + using non_const_value_type = std::remove_const_t<T>; using static_dimension = ViewDimension<>; using dynamic_dimension = ViewDimension<>; using dimension = ViewDimension<>; @@ -869,8 +877,7 @@ struct ViewOffset { template <class Dimension> struct ViewOffset< Dimension, Kokkos::LayoutLeft, - typename std::enable_if<(1 >= Dimension::rank || - 0 == Dimension::rank_dynamic)>::type> { + std::enable_if_t<(1 >= Dimension::rank || 0 == Dimension::rank_dynamic)>> { using is_mapping_plugin = std::true_type; using is_regular = std::true_type; @@ -973,8 +980,15 @@ struct ViewOffset< KOKKOS_INLINE_FUNCTION constexpr array_layout layout() const { - return array_layout(m_dim.N0, m_dim.N1, m_dim.N2, m_dim.N3, m_dim.N4, - m_dim.N5, m_dim.N6, m_dim.N7); + constexpr auto r = dimension_type::rank; + return array_layout((r > 0 ? m_dim.N0 : KOKKOS_INVALID_INDEX), + (r > 1 ? m_dim.N1 : KOKKOS_INVALID_INDEX), + (r > 2 ? m_dim.N2 : KOKKOS_INVALID_INDEX), + (r > 3 ? m_dim.N3 : KOKKOS_INVALID_INDEX), + (r > 4 ? m_dim.N4 : KOKKOS_INVALID_INDEX), + (r > 5 ? m_dim.N5 : KOKKOS_INVALID_INDEX), + (r > 6 ? m_dim.N6 : KOKKOS_INVALID_INDEX), + (r > 7 ? m_dim.N7 : KOKKOS_INVALID_INDEX)); } KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { @@ -1152,8 +1166,7 @@ struct ViewOffset< template <class Dimension> struct ViewOffset< Dimension, Kokkos::LayoutLeft, - typename std::enable_if<(1 < Dimension::rank && - 0 < Dimension::rank_dynamic)>::type> { + std::enable_if_t<(1 < Dimension::rank && 0 < Dimension::rank_dynamic)>> { using is_mapping_plugin = std::true_type; using is_regular = std::true_type; @@ -1257,8 +1270,15 @@ struct ViewOffset< KOKKOS_INLINE_FUNCTION constexpr array_layout layout() const { - return array_layout(m_dim.N0, m_dim.N1, m_dim.N2, m_dim.N3, m_dim.N4, - m_dim.N5, m_dim.N6, m_dim.N7); + constexpr auto r = dimension_type::rank; + return array_layout((r > 0 ? m_dim.N0 : KOKKOS_INVALID_INDEX), + (r > 1 ? m_dim.N1 : KOKKOS_INVALID_INDEX), + (r > 2 ? m_dim.N2 : KOKKOS_INVALID_INDEX), + (r > 3 ? m_dim.N3 : KOKKOS_INVALID_INDEX), + (r > 4 ? m_dim.N4 : KOKKOS_INVALID_INDEX), + (r > 5 ? m_dim.N5 : KOKKOS_INVALID_INDEX), + (r > 6 ? m_dim.N6 : KOKKOS_INVALID_INDEX), + (r > 7 ? m_dim.N7 : KOKKOS_INVALID_INDEX)); } KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { @@ -1496,8 +1516,7 @@ struct ViewOffset< template <class Dimension> struct ViewOffset< Dimension, Kokkos::LayoutRight, - typename std::enable_if<(1 >= Dimension::rank || - 0 == Dimension::rank_dynamic)>::type> { + std::enable_if_t<(1 >= Dimension::rank || 0 == Dimension::rank_dynamic)>> { using is_mapping_plugin = std::true_type; using is_regular = std::true_type; @@ -1602,8 +1621,15 @@ struct ViewOffset< KOKKOS_INLINE_FUNCTION constexpr array_layout layout() const { - return array_layout(m_dim.N0, m_dim.N1, m_dim.N2, m_dim.N3, m_dim.N4, - m_dim.N5, m_dim.N6, m_dim.N7); + constexpr auto r = dimension_type::rank; + return array_layout((r > 0 ? m_dim.N0 : KOKKOS_INVALID_INDEX), + (r > 1 ? m_dim.N1 : KOKKOS_INVALID_INDEX), + (r > 2 ? m_dim.N2 : KOKKOS_INVALID_INDEX), + (r > 3 ? m_dim.N3 : KOKKOS_INVALID_INDEX), + (r > 4 ? m_dim.N4 : KOKKOS_INVALID_INDEX), + (r > 5 ? m_dim.N5 : KOKKOS_INVALID_INDEX), + (r > 6 ? m_dim.N6 : KOKKOS_INVALID_INDEX), + (r > 7 ? m_dim.N7 : KOKKOS_INVALID_INDEX)); } KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { @@ -1783,8 +1809,7 @@ struct ViewOffset< template <class Dimension> struct ViewOffset< Dimension, Kokkos::LayoutRight, - typename std::enable_if<(1 < Dimension::rank && - 0 < Dimension::rank_dynamic)>::type> { + std::enable_if_t<(1 < Dimension::rank && 0 < Dimension::rank_dynamic)>> { using is_mapping_plugin = std::true_type; using is_regular = std::true_type; @@ -1885,8 +1910,15 @@ struct ViewOffset< KOKKOS_INLINE_FUNCTION constexpr array_layout layout() const { - return array_layout(m_dim.N0, m_dim.N1, m_dim.N2, m_dim.N3, m_dim.N4, - m_dim.N5, m_dim.N6, m_dim.N7); + constexpr auto r = dimension_type::rank; + return array_layout((r > 0 ? m_dim.N0 : KOKKOS_INVALID_INDEX), + (r > 1 ? m_dim.N1 : KOKKOS_INVALID_INDEX), + (r > 2 ? m_dim.N2 : KOKKOS_INVALID_INDEX), + (r > 3 ? m_dim.N3 : KOKKOS_INVALID_INDEX), + (r > 4 ? m_dim.N4 : KOKKOS_INVALID_INDEX), + (r > 5 ? m_dim.N5 : KOKKOS_INVALID_INDEX), + (r > 6 ? m_dim.N6 : KOKKOS_INVALID_INDEX), + (r > 7 ? m_dim.N7 : KOKKOS_INVALID_INDEX)); } KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { @@ -2414,10 +2446,15 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> { KOKKOS_INLINE_FUNCTION constexpr array_layout layout() const { - return array_layout(m_dim.N0, m_stride.S0, m_dim.N1, m_stride.S1, m_dim.N2, - m_stride.S2, m_dim.N3, m_stride.S3, m_dim.N4, - m_stride.S4, m_dim.N5, m_stride.S5, m_dim.N6, - m_stride.S6, m_dim.N7, m_stride.S7); + constexpr auto r = dimension_type::rank; + return array_layout((r > 0 ? m_dim.N0 : KOKKOS_INVALID_INDEX), m_stride.S0, + (r > 1 ? m_dim.N1 : KOKKOS_INVALID_INDEX), m_stride.S1, + (r > 2 ? m_dim.N2 : KOKKOS_INVALID_INDEX), m_stride.S2, + (r > 3 ? m_dim.N3 : KOKKOS_INVALID_INDEX), m_stride.S3, + (r > 4 ? m_dim.N4 : KOKKOS_INVALID_INDEX), m_stride.S4, + (r > 5 ? m_dim.N5 : KOKKOS_INVALID_INDEX), m_stride.S5, + (r > 6 ? m_dim.N6 : KOKKOS_INVALID_INDEX), m_stride.S6, + (r > 7 ? m_dim.N7 : KOKKOS_INVALID_INDEX), m_stride.S7); } KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { @@ -2672,11 +2709,11 @@ struct ViewDataHandle { template <class Traits> struct ViewDataHandle< - Traits, typename std::enable_if<( - std::is_same<typename Traits::non_const_value_type, - typename Traits::value_type>::value && - std::is_same<typename Traits::specialize, void>::value && - Traits::memory_traits::is_atomic)>::type> { + Traits, + std::enable_if_t<(std::is_same<typename Traits::non_const_value_type, + typename Traits::value_type>::value && + std::is_void<typename Traits::specialize>::value && + Traits::memory_traits::is_atomic)>> { using value_type = typename Traits::value_type; using handle_type = typename Kokkos::Impl::AtomicViewDataHandle<Traits>; using return_type = typename Kokkos::Impl::AtomicDataElement<Traits>; @@ -2697,17 +2734,17 @@ struct ViewDataHandle< template <class Traits> struct ViewDataHandle< - Traits, typename std::enable_if<( - std::is_same<typename Traits::specialize, void>::value && - (!Traits::memory_traits::is_aligned) && - Traits::memory_traits::is_restrict + Traits, + std::enable_if_t<(std::is_void<typename Traits::specialize>::value && + (!Traits::memory_traits::is_aligned) && + Traits::memory_traits::is_restrict #ifdef KOKKOS_ENABLE_CUDA - && (!(std::is_same<typename Traits::memory_space, - Kokkos::CudaSpace>::value || - std::is_same<typename Traits::memory_space, - Kokkos::CudaUVMSpace>::value)) + && (!(std::is_same<typename Traits::memory_space, + Kokkos::CudaSpace>::value || + std::is_same<typename Traits::memory_space, + Kokkos::CudaUVMSpace>::value)) #endif - && (!Traits::memory_traits::is_atomic))>::type> { + && (!Traits::memory_traits::is_atomic))>> { using value_type = typename Traits::value_type; using handle_type = typename Traits::value_type* KOKKOS_RESTRICT; using return_type = typename Traits::value_type& KOKKOS_RESTRICT; @@ -2727,17 +2764,17 @@ struct ViewDataHandle< template <class Traits> struct ViewDataHandle< - Traits, typename std::enable_if<( - std::is_same<typename Traits::specialize, void>::value && - Traits::memory_traits::is_aligned && - (!Traits::memory_traits::is_restrict) + Traits, + std::enable_if_t<(std::is_void<typename Traits::specialize>::value && + Traits::memory_traits::is_aligned && + (!Traits::memory_traits::is_restrict) #ifdef KOKKOS_ENABLE_CUDA - && (!(std::is_same<typename Traits::memory_space, - Kokkos::CudaSpace>::value || - std::is_same<typename Traits::memory_space, - Kokkos::CudaUVMSpace>::value)) + && (!(std::is_same<typename Traits::memory_space, + Kokkos::CudaSpace>::value || + std::is_same<typename Traits::memory_space, + Kokkos::CudaUVMSpace>::value)) #endif - && (!Traits::memory_traits::is_atomic))>::type> { + && (!Traits::memory_traits::is_atomic))>> { using value_type = typename Traits::value_type; // typedef work-around for intel compilers error #3186: expected typedef // declaration @@ -2773,16 +2810,16 @@ struct ViewDataHandle< template <class Traits> struct ViewDataHandle< Traits, - typename std::enable_if<( - std::is_same<typename Traits::specialize, void>::value && - Traits::memory_traits::is_aligned && Traits::memory_traits::is_restrict + std::enable_if_t<(std::is_void<typename Traits::specialize>::value && + Traits::memory_traits::is_aligned && + Traits::memory_traits::is_restrict #ifdef KOKKOS_ENABLE_CUDA - && (!(std::is_same<typename Traits::memory_space, - Kokkos::CudaSpace>::value || - std::is_same<typename Traits::memory_space, - Kokkos::CudaUVMSpace>::value)) + && (!(std::is_same<typename Traits::memory_space, + Kokkos::CudaSpace>::value || + std::is_same<typename Traits::memory_space, + Kokkos::CudaUVMSpace>::value)) #endif - && (!Traits::memory_traits::is_atomic))>::type> { + && (!Traits::memory_traits::is_atomic))>> { using value_type = typename Traits::value_type; // typedef work-around for intel compilers error #3186: expected typedef // declaration @@ -2863,6 +2900,7 @@ struct ViewValueFunctor<DeviceType, ValueType, false /* is_scalar */> { size_t n; bool destroy; std::string name; + bool default_exec_space; KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const { @@ -2885,13 +2923,26 @@ struct ViewValueFunctor<DeviceType, ValueType, false /* is_scalar */> { ptr(arg_ptr), n(arg_n), destroy(false), - name(std::move(arg_name)) {} + name(std::move(arg_name)), + default_exec_space(false) {} + + ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, + std::string arg_name) + : space(ExecSpace{}), + ptr(arg_ptr), + n(arg_n), + destroy(false), + name(std::move(arg_name)), + default_exec_space(true) {} template <typename Dummy = ValueType> std::enable_if_t<std::is_trivial<Dummy>::value && std::is_trivially_copy_assignable<ValueType>::value> construct_dispatch() { ValueType value{}; +// On A64FX memset seems to do the wrong thing with regards to first touch +// leading to the significant performance issues +#ifndef KOKKOS_ARCH_A64FX if (Impl::is_zero_byte(value)) { uint64_t kpID = 0; if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -2903,7 +2954,6 @@ struct ViewValueFunctor<DeviceType, ValueType, false /* is_scalar */> { "Kokkos::View::initialization [" + name + "] via memset", Kokkos::Profiling::Experimental::device_id(space), &kpID); } - (void)ZeroMemset<ExecSpace, ValueType*, typename DeviceType::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>( space, @@ -2914,9 +2964,14 @@ struct ViewValueFunctor<DeviceType, ValueType, false /* is_scalar */> { if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } + if (default_exec_space) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); } else { +#endif parallel_for_implementation(false); +#ifndef KOKKOS_ARCH_A64FX } +#endif } template <typename Dummy = ValueType> @@ -2950,7 +3005,8 @@ struct ViewValueFunctor<DeviceType, ValueType, false /* is_scalar */> { const Kokkos::Impl::ParallelFor<ViewValueFunctor, PolicyType> closure( *this, policy); closure.execute(); - space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); + if (default_exec_space || destroy) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } @@ -2973,6 +3029,7 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> { ValueType* ptr; size_t n; std::string name; + bool default_exec_space; KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const { ptr[i] = ValueType(); } @@ -2983,7 +3040,19 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> { ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, size_t const arg_n, std::string arg_name) - : space(arg_space), ptr(arg_ptr), n(arg_n), name(std::move(arg_name)) {} + : space(arg_space), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(false) {} + + ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, + std::string arg_name) + : space(ExecSpace{}), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(true) {} template <typename Dummy = ValueType> std::enable_if_t<std::is_trivial<Dummy>::value && @@ -2991,6 +3060,9 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> { construct_shared_allocation() { // Shortcut for zero initialization ValueType value{}; +// On A64FX memset seems to do the wrong thing with regards to first touch +// leading to the significant performance issues +#ifndef KOKKOS_ARCH_A64FX if (Impl::is_zero_byte(value)) { uint64_t kpID = 0; if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -3013,9 +3085,14 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> { if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } + if (default_exec_space) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); } else { +#endif parallel_for_implementation(); +#ifndef KOKKOS_ARCH_A64FX } +#endif } template <typename Dummy = ValueType> @@ -3044,8 +3121,10 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> { const Kokkos::Impl::ParallelFor<ViewValueFunctor, PolicyType> closure( *this, PolicyType(0, n)); closure.execute(); - space.fence( - "Kokkos::Impl::ViewValueFunctor: Fence after setting values in view"); + if (default_exec_space) + space.fence( + "Kokkos::Impl::ViewValueFunctor: Fence after setting values in " + "view"); if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } @@ -3062,10 +3141,10 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> { template <class Traits> class ViewMapping< Traits, - typename std::enable_if<( - std::is_same<typename Traits::specialize, void>::value && + std::enable_if_t<( + std::is_void<typename Traits::specialize>::value && ViewOffset<typename Traits::dimension, typename Traits::array_layout, - void>::is_mapping_plugin::value)>::type> { + void>::is_mapping_plugin::value)>> { public: using offset_type = ViewOffset<typename Traits::dimension, typename Traits::array_layout, void>; @@ -3196,26 +3275,26 @@ class ViewMapping< template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(std::is_integral<I0>::value && - // if layout is neither stride nor irregular, - // then just use the handle directly - !(std::is_same<typename Traits::array_layout, - Kokkos::LayoutStride>::value || - !is_regular::value)), - reference_type>::type + std::enable_if_t<(std::is_integral<I0>::value && + // if layout is neither stride nor irregular, + // then just use the handle directly + !(std::is_same<typename Traits::array_layout, + Kokkos::LayoutStride>::value || + !is_regular::value)), + reference_type> reference(const I0& i0) const { return m_impl_handle[i0]; } template <typename I0> KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<(std::is_integral<I0>::value && - // if the layout is strided or irregular, then - // we have to use the offset - (std::is_same<typename Traits::array_layout, - Kokkos::LayoutStride>::value || - !is_regular::value)), - reference_type>::type + std::enable_if_t<(std::is_integral<I0>::value && + // if the layout is strided or irregular, then + // we have to use the offset + (std::is_same<typename Traits::array_layout, + Kokkos::LayoutStride>::value || + !is_regular::value)), + reference_type> reference(const I0& i0) const { return m_impl_handle[m_impl_offset(i0)]; } @@ -3334,7 +3413,8 @@ class ViewMapping< template <class... P> Kokkos::Impl::SharedAllocationRecord<>* allocate_shared( Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, - typename Traits::array_layout const& arg_layout) { + typename Traits::array_layout const& arg_layout, + bool execution_space_specified) { using alloc_prop = Kokkos::Impl::ViewCtorProp<P...>; using execution_space = typename alloc_prop::execution_space; @@ -3361,13 +3441,22 @@ class ViewMapping< static_cast<Kokkos::Impl::ViewCtorProp<void, std::string> const&>( arg_prop) .value; - // Create shared memory tracking record with allocate memory from the memory - // space - record_type* const record = record_type::allocate( + const execution_space& exec_space = + static_cast<Kokkos::Impl::ViewCtorProp<void, execution_space> const&>( + arg_prop) + .value; + const memory_space& mem_space = static_cast<Kokkos::Impl::ViewCtorProp<void, memory_space> const&>( arg_prop) - .value, - alloc_name, alloc_size); + .value; + + // Create shared memory tracking record with allocate memory from the memory + // space + record_type* const record = + execution_space_specified + ? record_type::allocate(exec_space, mem_space, alloc_name, + alloc_size) + : record_type::allocate(mem_space, alloc_name, alloc_size); m_impl_handle = handle_type(reinterpret_cast<pointer_type>(record->data())); @@ -3377,11 +3466,12 @@ class ViewMapping< // Assume destruction is only required when construction is requested. // The ViewValueFunctor has both value construction and destruction // operators. - record->m_destroy = functor_type( - static_cast<Kokkos::Impl::ViewCtorProp<void, execution_space> const&>( - arg_prop) - .value, - (value_type*)m_impl_handle, m_impl_offset.span(), alloc_name); + record->m_destroy = + execution_space_specified + ? functor_type(exec_space, (value_type*)m_impl_handle, + m_impl_offset.span(), alloc_name) + : functor_type((value_type*)m_impl_handle, m_impl_offset.span(), + alloc_name); // Construct values record->m_destroy.construct_shared_allocation(); @@ -3398,13 +3488,13 @@ class ViewMapping< template <class DstTraits, class SrcTraits> class ViewMapping< DstTraits, SrcTraits, - typename std::enable_if<( + std::enable_if_t<( !(std::is_same<typename SrcTraits::array_layout, LayoutStride>:: value) && // Added to have a new specialization for SrcType of // LayoutStride // default mappings - std::is_same<typename DstTraits::specialize, void>::value && - std::is_same<typename SrcTraits::specialize, void>::value && + std::is_void<typename DstTraits::specialize>::value && + std::is_void<typename SrcTraits::specialize>::value && ( // same layout std::is_same<typename DstTraits::array_layout, @@ -3421,7 +3511,7 @@ class ViewMapping< std::is_same<typename SrcTraits::array_layout, Kokkos::LayoutRight>::value || std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutStride>::value))))>::type> { + Kokkos::LayoutStride>::value))))>> { private: enum { is_assignable_space = Kokkos::Impl::MemorySpaceAccess< @@ -3539,11 +3629,11 @@ class ViewMapping< template <class DstTraits, class SrcTraits> class ViewMapping< DstTraits, SrcTraits, - typename std::enable_if<( + std::enable_if_t<( std::is_same<typename SrcTraits::array_layout, Kokkos::LayoutStride>::value && - std::is_same<typename DstTraits::specialize, void>::value && - std::is_same<typename SrcTraits::specialize, void>::value && + std::is_void<typename DstTraits::specialize>::value && + std::is_void<typename SrcTraits::specialize>::value && ( // same layout std::is_same<typename DstTraits::array_layout, @@ -3554,7 +3644,7 @@ class ViewMapping< std::is_same<typename DstTraits::array_layout, Kokkos::LayoutRight>::value || std::is_same<typename DstTraits::array_layout, - Kokkos::LayoutStride>::value)))>::type> { + Kokkos::LayoutStride>::value)))>> { private: enum { is_assignable_space = Kokkos::Impl::MemorySpaceAccess< @@ -3704,8 +3794,7 @@ struct SubViewDataTypeImpl<void, ValueType, Kokkos::Experimental::Extents<>> { template <class ValueType, ptrdiff_t Ext, ptrdiff_t... Exts, class Integral, class... Args> struct SubViewDataTypeImpl< - typename std::enable_if< - std::is_integral<typename std::decay<Integral>::type>::value>::type, + std::enable_if_t<std::is_integral<std::decay_t<Integral>>::value>, ValueType, Kokkos::Experimental::Extents<Ext, Exts...>, Integral, Args...> : SubViewDataTypeImpl<void, ValueType, Kokkos::Experimental::Extents<Exts...>, Args...> {}; @@ -3725,7 +3814,7 @@ struct SubViewDataTypeImpl<void, ValueType, template <class ValueType, ptrdiff_t Ext, ptrdiff_t... Exts, class PairLike, class... Args> struct SubViewDataTypeImpl< - typename std::enable_if<is_pair_like<PairLike>::value>::type, ValueType, + std::enable_if_t<is_pair_like<PairLike>::value>, ValueType, Kokkos::Experimental::Extents<Ext, Exts...>, PairLike, Args...> : SubViewDataTypeImpl< void, typename make_all_extents_into_pointers<ValueType>::type*, @@ -3738,14 +3827,13 @@ struct SubViewDataType : SubViewDataTypeImpl<void, ValueType, Exts, Args...> {}; template <class SrcTraits, class... Args> class ViewMapping< - typename std::enable_if<( - std::is_same<typename SrcTraits::specialize, void>::value && - (std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutLeft>::value || - std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutRight>::value || - std::is_same<typename SrcTraits::array_layout, - Kokkos::LayoutStride>::value))>::type, + std::enable_if_t<(std::is_void<typename SrcTraits::specialize>::value && + (std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutLeft>::value || + std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutRight>::value || + std::is_same<typename SrcTraits::array_layout, + Kokkos::LayoutStride>::value))>, SrcTraits, Args...> { private: static_assert(SrcTraits::rank == sizeof...(Args), @@ -3792,7 +3880,7 @@ class ViewMapping< }; // Subview's layout - using array_layout = typename std::conditional< + using array_layout = std::conditional_t< ( /* Same array layout IF */ (rank == 0) /* output rank zero */ || SubviewLegalArgsCompileTime<typename SrcTraits::array_layout, @@ -3810,7 +3898,7 @@ class ViewMapping< std::is_same<typename SrcTraits::array_layout, Kokkos::LayoutRight>::value) // replace input rank ), - typename SrcTraits::array_layout, Kokkos::LayoutStride>::type; + typename SrcTraits::array_layout, Kokkos::LayoutStride>; using value_type = typename SrcTraits::value_type; @@ -3920,7 +4008,8 @@ struct OperatorBoundsErrorOnDevice<MapType, true> { KOKKOS_INLINE_FUNCTION static void run(MapType const& map) { SharedAllocationHeader const* const header = - SharedAllocationHeader::get_header((void*)(map.data())); + SharedAllocationHeader::get_header( + static_cast<void const*>(map.data())); char const* const label = header->label(); enum { LEN = 128 }; char msg[LEN]; @@ -3986,6 +4075,62 @@ KOKKOS_INLINE_FUNCTION void view_verify_operator_bounds( } } +// primary template: memory space is accessible, do nothing. +template <class MemorySpace, class AccessSpace, + bool = SpaceAccessibility<AccessSpace, MemorySpace>::accessible> +struct RuntimeCheckViewMemoryAccessViolation { + template <class Track, class Map> + KOKKOS_FUNCTION RuntimeCheckViewMemoryAccessViolation(char const* const, + Track const&, + Map const&) {} +}; + +// explicit specialization: memory access violation will occur, call abort with +// the specified error message. +template <class MemorySpace, class AccessSpace> +struct RuntimeCheckViewMemoryAccessViolation<MemorySpace, AccessSpace, false> { + template <class Track, class Map> + KOKKOS_FUNCTION RuntimeCheckViewMemoryAccessViolation(char const* const msg, + Track const& track, + Map const&) { + char err[256] = ""; + strncat(err, msg, 64); + strcat(err, " (label=\""); + + KOKKOS_IF_ON_HOST(({ + auto const tracker = track.m_tracker; + + if (tracker.has_record()) { + strncat(err, tracker.template get_label<void>().c_str(), 128); + } else { + strcat(err, "**UNMANAGED**"); + } + })) + + KOKKOS_IF_ON_DEVICE(({ + strcat(err, "**UNAVAILABLE**"); + (void)track; + })) + + strcat(err, "\")"); + + Kokkos::abort(err); + } +}; + +template <class MemorySpace, class Track, class Map, class... Ignore> +KOKKOS_FUNCTION void runtime_check_memory_access_violation( + char const* const msg, Track const& track, Map const& map, Ignore...) { + KOKKOS_IF_ON_HOST( + ((void)RuntimeCheckViewMemoryAccessViolation<MemorySpace, + DefaultHostExecutionSpace>( + msg, track, map);)) + KOKKOS_IF_ON_DEVICE( + ((void)RuntimeCheckViewMemoryAccessViolation<MemorySpace, + DefaultExecutionSpace>( + msg, track, map);)) +} + } /* namespace Impl */ } /* namespace Kokkos */ diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp index 972b1b6d9afdffd379f803c003dd35ddab7c751c..cfa30f6e7b053acc6af07b95e27d5e851613e8f7 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewTracker.hpp @@ -60,7 +60,7 @@ namespace Impl { * constructors that match the view. The constructors and assignments * from view will externalize the logic needed to enable/disable * ref counting to provide a single gate to enable further developments - * which may hing on the same logic. + * which may hinge on the same logic. * */ template <class ParentView> diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewUniformType.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewUniformType.hpp index 2eb8fc9e3b820cc5d93dcf01b049ede170735d1a..13ed4df6a2f5d3aae474d45869482e98519df8df 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewUniformType.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewUniformType.hpp @@ -76,15 +76,13 @@ struct ViewUniformLayout<Kokkos::LayoutRight, 1> { template <class ViewType, int Traits> struct ViewUniformType { - using data_type = typename ViewType::data_type; - using const_data_type = - typename std::add_const<typename ViewType::data_type>::type; + using data_type = typename ViewType::data_type; + using const_data_type = std::add_const_t<typename ViewType::data_type>; using runtime_data_type = typename ViewScalarToDataType<typename ViewType::value_type, ViewType::rank>::type; using runtime_const_data_type = typename ViewScalarToDataType< - typename std::add_const<typename ViewType::value_type>::type, - ViewType::rank>::type; + std::add_const_t<typename ViewType::value_type>, ViewType::rank>::type; using array_layout = typename ViewUniformLayout<typename ViewType::array_layout, diff --git a/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp b/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp index 04507b0984a78b12ea16c8c1ee33c4fc99f24b08..a0d1bc4cb446f706671fca7c6526bacdc2533168 100644 --- a/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #define DEBUG_PRINT 0 #include <iostream> @@ -234,7 +238,6 @@ unsigned thread_mapping(const char* const label, const bool allow_async, #include <iostream> #include <sstream> -#include <stdexcept> /*--------------------------------------------------------------------------*/ /* Third Party Libraries */ @@ -272,9 +275,9 @@ enum { MAX_CORE = 1024 }; std::pair<unsigned, unsigned> s_core_topology(0, 0); unsigned s_core_capacity(0); -hwloc_topology_t s_hwloc_topology(0); -hwloc_bitmap_t s_hwloc_location(0); -hwloc_bitmap_t s_process_binding(0); +hwloc_topology_t s_hwloc_topology(nullptr); +hwloc_bitmap_t s_hwloc_location(nullptr); +hwloc_bitmap_t s_process_binding(nullptr); hwloc_bitmap_t s_core[MAX_CORE]; bool s_can_bind_threads(true); @@ -286,13 +289,13 @@ struct Sentinel { bool sentinel() { static Sentinel self; - if (0 == s_hwloc_topology) { + if (nullptr == s_hwloc_topology) { std::cerr << "Kokkos::hwloc ERROR : Called after return from main()" << std::endl; std::cerr.flush(); } - return 0 != s_hwloc_topology; + return nullptr != s_hwloc_topology; } Sentinel::~Sentinel() { @@ -303,9 +306,9 @@ Sentinel::~Sentinel() { s_core_topology.first = 0; s_core_topology.second = 0; s_core_capacity = 0; - s_hwloc_topology = 0; - s_hwloc_location = 0; - s_process_binding = 0; + s_hwloc_topology = nullptr; + s_hwloc_location = nullptr; + s_process_binding = nullptr; } Sentinel::Sentinel() { @@ -317,11 +320,11 @@ Sentinel::Sentinel() { s_core_topology = std::pair<unsigned, unsigned>(0, 0); s_core_capacity = 0; - s_hwloc_topology = 0; - s_hwloc_location = 0; - s_process_binding = 0; + s_hwloc_topology = nullptr; + s_hwloc_location = nullptr; + s_process_binding = nullptr; - for (unsigned i = 0; i < MAX_CORE; ++i) s_core[i] = 0; + for (unsigned i = 0; i < MAX_CORE; ++i) s_core[i] = nullptr; hwloc_topology_init(&s_hwloc_topology); hwloc_topology_load(s_hwloc_topology); diff --git a/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp b/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp index 8551856aa89ae8fa9fddbf0f5a83bdaa25e297b4..983a71a561ec5da83311c83f3b005fc41259e872 100644 --- a/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp +++ b/packages/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp @@ -125,8 +125,6 @@ #else #define KOKKOS_DEFAULTED_FUNCTION inline #endif -#define KOKKOS_IMPL_HOST_FUNCTION __host__ -#define KOKKOS_IMPL_DEVICE_FUNCTION __device__ #if (CUDA_VERSION >= 10000) #define KOKKOS_CUDA_ENABLE_GRAPHS diff --git a/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp b/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp index 32236e963d563be07c010d6a520a745d2d977eb5..b203e9afb27b1c0d5b18b2a25c93e5d65a0d8e80 100644 --- a/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp +++ b/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp @@ -45,6 +45,15 @@ #ifndef KOKKOS_SETUP_SYCL_HPP_ #define KOKKOS_SETUP_SYCL_HPP_ +// FIXME_SYCL the fallback assert is temporarily disabled by default in the +// compiler so we need to force it +#ifndef SYCL_ENABLE_FALLBACK_ASSERT +#define SYCL_ENABLE_FALLBACK_ASSERT +#endif +#ifndef SYCL_FALLBACK_ASSERT +#define SYCL_FALLBACK_ASSERT 1 +#endif + #include <CL/sycl.hpp> #ifdef __SYCL_DEVICE_ONLY__ diff --git a/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp index 7bd96ab53c784d186d606727d1ea81938f70ab74..f306e43a031bdd3a55e3a150dddb87aec9e994f1 100644 --- a/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp +++ b/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp @@ -109,9 +109,14 @@ struct WorkTagTrait : TraitSpecificationBase<WorkTagTrait> { // we should benchmark this assumption if it becomes a problem. template <class T> using trait_matches_specification = std::integral_constant< - bool, !std::is_void<T>::value && - !type_list_any<_trait_matches_spec_predicate<T>::template apply, - _exec_policy_traits_without_work_tag>::value>; + bool, +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_3 + std::is_empty<T>::value && +#else + !std::is_void<T>::value && +#endif + !type_list_any<_trait_matches_spec_predicate<T>::template apply, + _exec_policy_traits_without_work_tag>::value>; }; // </editor-fold> end trait specification }}}1 diff --git a/packages/kokkos/core/unit_test/CMakeLists.txt b/packages/kokkos/core/unit_test/CMakeLists.txt index 0d968b89f6793d0be26f700f7ae9d488916c77bf..24f70c0ccb3208ca3db1acf82e08bfb56d1ef0de 100644 --- a/packages/kokkos/core/unit_test/CMakeLists.txt +++ b/packages/kokkos/core/unit_test/CMakeLists.txt @@ -58,6 +58,8 @@ SET(KOKKOS_SYCL_FEATURE_LEVEL 999) SET(KOKKOS_SYCL_NAME Experimental::SYCL) SET(KOKKOS_THREADS_FEATURE_LEVEL 999) SET(KOKKOS_THREADS_NAME Threads) +SET(KOKKOS_OPENACC_FEATURE_LEVEL 3) +SET(KOKKOS_OPENACC_NAME Experimental::OpenACC) # @@ -70,8 +72,11 @@ KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_ KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/core/unit_test/category_files) SET(COMPILE_ONLY_SOURCES + TestArray.cpp TestDetectionIdiom.cpp TestInterOp.cpp + TestLegionInteroperability.cpp + TestStringManipulation.cpp TestTypeList.cpp ) # TestInterOp has a dependency on containers @@ -116,10 +121,12 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) ExecutionSpace FunctorAnalysis Init + JoinBackwardCompatibility LocalDeepCopy MinMaxClamp MathematicalConstants - MathematicalFunctions + MathematicalFunctions1 + MathematicalFunctions2 MDRange_a MDRange_b MDRange_c @@ -172,8 +179,8 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) list(APPEND ${Tag}_SOURCES1B ${file}) endforeach() - SET(${Tag}_SOURCES2A) - foreach(Name + SET(SOURCES2A_NAME_LIST + Abort TeamBasic TeamReductionScan TeamScan @@ -188,14 +195,27 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) ViewAPI_e ViewCopy_a ViewCopy_b + ViewCtorDimMatch + ViewHooks ViewLayoutStrideAssignment ViewMapping_b ViewMapping_subview + ViewMemoryAccessViolation ViewOfClass ViewResize View_64bit WorkGraph + WithoutInitializing + ) + IF(KOKKOS_HAS_TRILINOS) + LIST(REMOVE_ITEM SOURCES2A_NAME_LIST + Abort + ViewMemoryAccessViolation ) + ENDIF() + + SET(${Tag}_SOURCES2A) + foreach(Name ${SOURCES2A_NAME_LIST}) set(file ${dir}/Test${Tag}_${Name}.cpp) # Write to a temporary intermediate file and call configure_file to avoid # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. @@ -211,7 +231,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) if (Tag STREQUAL "Cuda") set(TagHostAccessible CudaUVM) elseif(Tag STREQUAL "HIP") - set(TagHostAccessible HIPHostPinned) + set(TagHostAccessible HIPManaged) elseif(Tag STREQUAL "SYCL") set(TagHostAccessible SYCLSharedUSM) endif() @@ -262,7 +282,6 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) SubView_c12 SubView_c13 SubView_c14 - WithoutInitializing ) set(file ${dir}/Test${Tag}_${Name}.cpp) # Write to a temporary intermediate file and call configure_file to avoid @@ -281,7 +300,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) endif() endforeach() -foreach(PairDeviceSpace HIP-HostPinned;Cuda-HostPinned;Cuda-UVM;SYCL-HostUSM;SYCL-SharedUSM) +foreach(PairDeviceSpace HIP-HostPinned;HIP-Managed;Cuda-HostPinned;Cuda-UVM;SYCL-HostUSM;SYCL-SharedUSM) string(REGEX REPLACE "([^-]*)-(.*)" "\\1" DEVICE ${PairDeviceSpace}) string(REGEX REPLACE "([^-]*)-(.*)" "\\2" SPACE ${PairDeviceSpace}) @@ -450,6 +469,7 @@ endif() if (Kokkos_ENABLE_OPENMP) set(OpenMP_EXTRA_SOURCES openmp/TestOpenMP_Task.cpp + openmp/TestOpenMP_PartitionMaster.cpp ) if (Kokkos_ENABLE_DEPRECATED_CODE_3) list(APPEND OpenMP_EXTRA_SOURCES openmp/TestOpenMP_Task.cpp) @@ -492,25 +512,10 @@ if(Kokkos_ENABLE_HPX) KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_HPX_IndependentInstances SOURCES - UnitTestMain.cpp + UnitTestMainInit.cpp hpx/TestHPX_IndependentInstances.cpp - ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_HPX_IndependentInstancesDelayedExecution - SOURCES - UnitTestMain.cpp hpx/TestHPX_IndependentInstancesDelayedExecution.cpp - ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_HPX_IndependentInstancesInstanceIds - SOURCES - UnitTestMain.cpp hpx/TestHPX_IndependentInstancesInstanceIds.cpp - ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_HPX_IndependentInstancesRefCounting - SOURCES - UnitTestMain.cpp hpx/TestHPX_IndependentInstancesRefCounting.cpp ) endif() @@ -585,6 +590,8 @@ if(Kokkos_ENABLE_HIP) UnitTestMainInit.cpp ${HIP_SOURCES} hip/TestHIP_ScanUnit.cpp + hip/TestHIP_Spaces.cpp + hip/TestHIP_Memory_Requirements.cpp hip/TestHIP_TeamScratchStreams.cpp hip/TestHIP_AsyncLauncher.cpp hip/TestHIP_BlocksizeDeduction.cpp @@ -690,11 +697,15 @@ endif() if (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) SET(DEFAULT_DEVICE_SOURCES UnitTestMainInit.cpp + TestInitializationSettings.cpp + TestParseCmdLineArgsAndEnvVars.cpp default/TestDefaultDeviceType.cpp ) else() SET(DEFAULT_DEVICE_SOURCES UnitTestMainInit.cpp + TestInitializationSettings.cpp + TestParseCmdLineArgsAndEnvVars.cpp default/TestDefaultDeviceType.cpp default/TestDefaultDeviceType_a1.cpp default/TestDefaultDeviceType_b1.cpp @@ -827,7 +838,7 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate NAME ProfilingTestLibraryCmdLineHelp EXE ProfilingAllCalls ARGS --kokkos-tools-help - --kokkos-tools-library=$<TARGET_FILE:kokkosprinter-tool> + --kokkos-tools-libs=$<TARGET_FILE:kokkosprinter-tool> PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_print_help:KokkosCore_ProfilingAllCalls::kokkosp_finalize_library::") @@ -853,7 +864,7 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate NAME ProfilingTestLibraryCmdLine EXE ProfilingAllCalls ARGS [=[--kokkos-tools-args=-c test delimit]=] - --kokkos-tools-library=$<TARGET_FILE:kokkosprinter-tool> + --kokkos-tools-libs=$<TARGET_FILE:kokkosprinter-tool> PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_parse_args:4:KokkosCore_ProfilingAllCalls:-c:test:delimit::.*::kokkosp_allocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]source] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_allocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]destination] via memset:[0-9]+:0::kokkosp_end_parallel_for:0::kokkosp_begin_deep_copy:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::.*kokkosp_end_deep_copy::kokkosp_begin_parallel_for:parallel_for:${SIZE_REGEX}:0::kokkosp_end_parallel_for:0::kokkosp_begin_parallel_reduce:parallel_reduce:${SIZE_REGEX}:1${SKIP_SCRATCH_INITIALIZATION_REGEX}::kokkosp_end_parallel_reduce:1::kokkosp_begin_parallel_scan:parallel_scan:${SIZE_REGEX}:2::kokkosp_end_parallel_scan:2::kokkosp_push_profile_region:push_region::kokkosp_pop_profile_region::kokkosp_create_profile_section:created_section:3::kokkosp_start_profile_section:3::kokkosp_stop_profile_section:3::kokkosp_destroy_profile_section:3::kokkosp_profile_event:profiling_event::kokkosp_declare_metadata:dogs:good::kokkosp_deallocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_deallocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_finalize_library::" ) endif() #KOKKOS_ENABLE_LIBDL @@ -878,12 +889,14 @@ KOKKOS_ADD_TEST( NAME UnitTest_StackTraceTest ) endif() -foreach(INITTESTS_NUM RANGE 1 18) -KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_DefaultInit_${INITTESTS_NUM} - SOURCES UnitTestMain.cpp default/TestDefaultDeviceTypeInit_${INITTESTS_NUM}.cpp -) -endforeach(INITTESTS_NUM) +if(Kokkos_ENABLE_DEPRECATED_CODE_3) + foreach(INITTESTS_NUM RANGE 1 18) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_DefaultInit_${INITTESTS_NUM} + SOURCES UnitTestMain.cpp default/TestDefaultDeviceTypeInit_${INITTESTS_NUM}.cpp + ) + endforeach(INITTESTS_NUM) +endif() if (KOKKOS_ENABLE_HWLOC) KOKKOS_ADD_EXECUTABLE_AND_TEST( diff --git a/packages/kokkos/core/unit_test/Makefile b/packages/kokkos/core/unit_test/Makefile index 570cee0227ffea55119c604eef92c06b60478c50..0c3e1ee4767e1b70664de385eef8c2cd2c023243 100644 --- a/packages/kokkos/core/unit_test/Makefile +++ b/packages/kokkos/core/unit_test/Makefile @@ -289,6 +289,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) OBJ_HIP += TestHIP_Reductions.o OBJ_HIP += TestHIP_MDRange_a.o TestHIP_MDRange_b.o TestHIP_MDRange_c.o TestHIP_MDRange_d.o TestHIP_MDRange_e.o OBJ_HIP += TestHIP_Spaces.o + OBJ_HIP += TestHIP_Memory_Requirements.o OBJ_HIP += TestHIPHostPinned_ViewAPI_a.o TestHIPHostPinned_ViewAPI_b.o TestHIPHostPinned_ViewAPI_c.o TestHIPHostPinned_ViewAPI_d.o TestHIPHostPinned_ViewAPI_e.o OBJ_HIP += TestHIPHostPinned_ViewCopy_a.o TestHIPHostPinned_ViewCopy_b.o OBJ_HIP += TestHIPHostPinned_ViewMapping_a.o TestHIPHostPinned_ViewMapping_b.o TestHIPHostPinned_ViewMapping_subview.o @@ -408,12 +409,14 @@ TEST_TARGETS += test-stack-trace TEST_TARGETS += test-stack-trace-terminate TEST_TARGETS += test-stack-trace-generic-term +ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) NUM_INITTESTS = 16 INITTESTS_NUMBERS := $(shell seq 1 ${NUM_INITTESTS}) INITTESTS_TARGETS := $(addprefix KokkosCore_UnitTest_DefaultDeviceTypeInit_,${INITTESTS_NUMBERS}) TARGETS += ${INITTESTS_TARGETS} INITTESTS_TEST_TARGETS := $(addprefix test-default-init-,${INITTESTS_NUMBERS}) TEST_TARGETS += ${INITTESTS_TEST_TARGETS} +endif KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Cuda @@ -508,7 +511,7 @@ test-push-finalize-hook: KokkosCore_UnitTest_PushFinalizeHook test-push-finalize-hook-terminate: KokkosCore_UnitTest_PushFinalizeHook_terminate ./KokkosCore_UnitTest_PushFinalizeHook_terminate - + test-stack-trace: KokkosCore_UnitTest_StackTraceTestExec ./KokkosCore_UnitTest_StackTraceTestExec --gtest_filter=*normal$(STACK_TRACE_TERMINATE_FILTER) diff --git a/packages/kokkos/core/unit_test/TestAbort.hpp b/packages/kokkos/core/unit_test/TestAbort.hpp new file mode 100644 index 0000000000000000000000000000000000000000..58da329c39930822a53ffa0f87bcf56040c71b96 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestAbort.hpp @@ -0,0 +1,138 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <regex> +#include <Kokkos_Core.hpp> + +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC +TEST(TEST_CATEGORY_DEATH, abort_from_host) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + char msg[] = "Goodbye cruel world"; + EXPECT_DEATH({ Kokkos::abort(msg); }, msg); +} + +template <class ExecutionSpace> +struct TestAbortPrintingToStdout { + TestAbortPrintingToStdout() { + ::testing::internal::CaptureStdout(); + Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0, 1), *this); + Kokkos::fence(); + auto const captured = ::testing::internal::GetCapturedStdout(); + EXPECT_TRUE(std::regex_search(captured, + std::regex("move along nothing to see here"))) + << "here is what was printed to stdout \"" << captured << "\""; + } + KOKKOS_FUNCTION void operator()(int) const { + Kokkos::abort("move along nothing to see here"); + } +}; + +template <class ExecutionSpace> +struct TestAbortCausingAbnormalProgramTerminationButIgnoringErrorMessage { + TestAbortCausingAbnormalProgramTerminationButIgnoringErrorMessage() { + EXPECT_DEATH( + { + Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0, 1), + *this); + Kokkos::fence(); + }, + ".*"); + } + KOKKOS_FUNCTION void operator()(int) const { Kokkos::abort("ignored"); } +}; + +template <class ExecutionSpace> +struct TestAbortCausingAbnormalProgramTerminationAndPrinting { + TestAbortCausingAbnormalProgramTerminationAndPrinting() { + EXPECT_DEATH( + { + Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0, 1), + *this); + Kokkos::fence(); + }, + "Meurs, pourriture communiste !"); + } + KOKKOS_FUNCTION void operator()(int) const { + Kokkos::abort("Meurs, pourriture communiste !"); + } +}; + +template <class ExecutionSpace> +void test_abort_from_device() { +#if defined(KOKKOS_ENABLE_OPENMPTARGET) // FIXME_OPENMPTARGET + if (std::is_same<ExecutionSpace, Kokkos::Experimental::OpenMPTarget>::value) { + TestAbortPrintingToStdout<ExecutionSpace>(); + } else { + TestAbortCausingAbnormalProgramTerminationAndPrinting<ExecutionSpace>(); + } +#elif defined(KOKKOS_ENABLE_SYCL) // FIXME_SYCL + if (std::is_same<ExecutionSpace, Kokkos::Experimental::SYCL>::value) { +#ifdef NDEBUG + TestAbortPrintingToStdout<ExecutionSpace>(); +#else + TestAbortCausingAbnormalProgramTerminationAndPrinting<ExecutionSpace>(); +#endif + } else { + TestAbortCausingAbnormalProgramTerminationAndPrinting<ExecutionSpace>(); + } +#elif defined(KOKKOS_IMPL_HIP_ABORT_DOES_NOT_PRINT_MESSAGE) + if (std::is_same<ExecutionSpace, Kokkos::Experimental::HIP>::value) { + TestAbortCausingAbnormalProgramTerminationButIgnoringErrorMessage< + ExecutionSpace>(); + } else { + TestAbortCausingAbnormalProgramTerminationAndPrinting<ExecutionSpace>(); + } +#else + TestAbortCausingAbnormalProgramTerminationAndPrinting<ExecutionSpace>(); +#endif +} + +TEST(TEST_CATEGORY_DEATH, abort_from_device) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + test_abort_from_device<TEST_EXECSPACE>(); +} +#endif diff --git a/packages/kokkos/core/unit_test/TestAggregate.hpp b/packages/kokkos/core/unit_test/TestAggregate.hpp index 7590c6f1fe091227c2033176690cc18aee5be44f..d21e6f46da8d253671b6fac73d4d028cc0ee9cf2 100644 --- a/packages/kokkos/core/unit_test/TestAggregate.hpp +++ b/packages/kokkos/core/unit_test/TestAggregate.hpp @@ -45,13 +45,7 @@ #ifndef TEST_AGGREGATE_HPP #define TEST_AGGREGATE_HPP -#include <gtest/gtest.h> - -#include <stdexcept> -#include <sstream> -#include <iostream> - -#include <impl/Kokkos_ViewArray.hpp> +#include <Kokkos_Core.hpp> namespace Test { @@ -78,8 +72,7 @@ void TestViewAggregate() { static_assert(a32_traits::rank == 2, ""); static_assert(a32_traits::rank_dynamic == 2, ""); - static_assert(std::is_same<typename flat_traits::specialize, void>::value, - ""); + static_assert(std::is_void<typename flat_traits::specialize>::value, ""); static_assert(flat_traits::rank == 3, ""); static_assert(flat_traits::rank_dynamic == 2, ""); static_assert(flat_traits::dimension::N2 == 32, ""); diff --git a/packages/kokkos/core/unit_test/TestArray.cpp b/packages/kokkos/core/unit_test/TestArray.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ca93918e062718a82a9d86022055e8e501f362f1 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestArray.cpp @@ -0,0 +1,84 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Array.hpp> + +namespace { + +#define STATIC_ASSERT(cond) static_assert(cond, "") + +KOKKOS_FUNCTION constexpr bool test_array() { + constexpr Kokkos::Array<int, 3> a{{1, 2}}; + + STATIC_ASSERT(!a.empty()); + STATIC_ASSERT(a.size() == 3); + STATIC_ASSERT(a.max_size() == 3); + + STATIC_ASSERT(*a.data() == 1); + STATIC_ASSERT(a[1] == 2); + + return true; +} + +STATIC_ASSERT(test_array()); + +#ifdef KOKKOS_ENABLE_CXX17 +KOKKOS_FUNCTION constexpr bool test_array_structured_binding_support() { + constexpr Kokkos::Array<float, 2> a{}; + auto& [xr, yr] = a; + (void)xr; + (void)yr; + auto [x, y] = a; + (void)x; + (void)y; + auto const& [xcr, ycr] = a; + (void)xcr; + (void)ycr; + return true; +} + +STATIC_ASSERT(test_array_structured_binding_support()); +#endif + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestAtomicOperations.hpp b/packages/kokkos/core/unit_test/TestAtomicOperations.hpp index 1ec175710c86cf0748c9a4e3d846369e189e7be6..ab9b970be126b83a90c84a9f1b6ba05b2f2c78d0 100644 --- a/packages/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/packages/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -705,7 +705,6 @@ bool DivAtomicTest(T i0, T i1) { bool passed = true; using Kokkos::abs; - using std::abs; if (abs((resSerial - res) * 1.) > 1e-5) { passed = false; diff --git a/packages/kokkos/core/unit_test/TestAtomicViews.hpp b/packages/kokkos/core/unit_test/TestAtomicViews.hpp index 88f1aee630add62ec7753b191fb58ba0ca6d7a6a..916dc949bcfb2140197d213fe3a52e4239f19f76 100644 --- a/packages/kokkos/core/unit_test/TestAtomicViews.hpp +++ b/packages/kokkos/core/unit_test/TestAtomicViews.hpp @@ -73,8 +73,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type& update, - const volatile value_type& input) { + static void join(value_type& update, const value_type& input) { update |= input; } diff --git a/packages/kokkos/core/unit_test/TestAtomics.hpp b/packages/kokkos/core/unit_test/TestAtomics.hpp index f2993914a11560f30cf70aabacf444f3b38e9bfc..0db2c735a78e6dc888e6f2a7388b027994317af2 100644 --- a/packages/kokkos/core/unit_test/TestAtomics.hpp +++ b/packages/kokkos/core/unit_test/TestAtomics.hpp @@ -405,9 +405,9 @@ T ExchLoop(int loop) { } template <class T> -T ExchLoopSerial( - typename std::conditional<!std::is_same<T, Kokkos::complex<double> >::value, - int, void>::type loop) { +T ExchLoopSerial(std::conditional_t< + !std::is_same<T, Kokkos::complex<double> >::value, int, void> + loop) { T* data = new T[1]; T* data2 = new T[1]; data[0] = 0; @@ -427,9 +427,9 @@ T ExchLoopSerial( } template <class T> -T ExchLoopSerial( - typename std::conditional<std::is_same<T, Kokkos::complex<double> >::value, - int, void>::type loop) { +T ExchLoopSerial(std::conditional_t< + std::is_same<T, Kokkos::complex<double> >::value, int, void> + loop) { T* data = new T[1]; T* data2 = new T[1]; data[0] = 0; diff --git a/packages/kokkos/core/unit_test/TestCXX11.hpp b/packages/kokkos/core/unit_test/TestCXX11.hpp index bbe0d01cbae2228afb61f2dfc51d81c81d95173e..3dbce28ad1065768d3d8e18096c74053c0653a6c 100644 --- a/packages/kokkos/core/unit_test/TestCXX11.hpp +++ b/packages/kokkos/core/unit_test/TestCXX11.hpp @@ -216,8 +216,7 @@ struct FunctorReduceTest { void init(value_type& update) const { update = 0.0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& update, - volatile value_type const& input) const { + void join(value_type& update, value_type const& input) const { update += input; } }; diff --git a/packages/kokkos/core/unit_test/TestComplex.hpp b/packages/kokkos/core/unit_test/TestComplex.hpp index 513fb6aeeef5f405642cc7e0560d6bcbf3a5af35..cd4298f8e016922dd3f98e53d79094f6a9d28020 100644 --- a/packages/kokkos/core/unit_test/TestComplex.hpp +++ b/packages/kokkos/core/unit_test/TestComplex.hpp @@ -48,11 +48,6 @@ namespace Test { -#ifdef KOKKOS_COMPILER_NVHPC -// warning: 'long double' is treated as 'double' in device code -#pragma diag_suppress 20208 -#endif - // Test construction and assignment template <class ExecSpace> @@ -369,6 +364,10 @@ struct TestComplexSpecialFunctions { r = {1.380543138238714, 0.2925178131625636}; ASSERT_FLOAT_EQ(h_results(17).real(), r.real()); ASSERT_FLOAT_EQ(h_results(17).imag(), r.imag()); + // log10 + r = std::log10(a); + ASSERT_FLOAT_EQ(h_results(18).real(), r.real()); + ASSERT_FLOAT_EQ(h_results(18).imag(), r.imag()); #endif } @@ -396,6 +395,7 @@ struct TestComplexSpecialFunctions { d_results(15) = Kokkos::asin(a); d_results(16) = Kokkos::acos(a); d_results(17) = Kokkos::atan(a); + d_results(18) = Kokkos::log10(a); } }; diff --git a/packages/kokkos/core/unit_test/TestConcurrentBitset.hpp b/packages/kokkos/core/unit_test/TestConcurrentBitset.hpp index 5a7b8e4bae55565c277637e8f04909029ea37ba7..9feac14d43c00331cc0d5bbeda63b6d85d3d834a 100644 --- a/packages/kokkos/core/unit_test/TestConcurrentBitset.hpp +++ b/packages/kokkos/core/unit_test/TestConcurrentBitset.hpp @@ -47,7 +47,6 @@ #include <gtest/gtest.h> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp b/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp index 73db630b305b59a44bff3431dbbac87a8375b626..4500bd83bba75d7ac3c4de1b4e61cb7e1f8d6111 100644 --- a/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp +++ b/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp @@ -225,12 +225,12 @@ struct TestDeepCopyScalarConversion { using view_type_s1_2d = Kokkos::View<Scalar1**, Layout1, TEST_EXECSPACE>; using view_type_s2_2d = Kokkos::View<Scalar2**, Layout2, TEST_EXECSPACE>; - using base_layout1 = typename std::conditional< - std::is_same<Layout1, Kokkos::LayoutStride>::value, Kokkos::LayoutLeft, - Layout1>::type; - using base_layout2 = typename std::conditional< - std::is_same<Layout2, Kokkos::LayoutStride>::value, Kokkos::LayoutLeft, - Layout2>::type; + using base_layout1 = + std::conditional_t<std::is_same<Layout1, Kokkos::LayoutStride>::value, + Kokkos::LayoutLeft, Layout1>; + using base_layout2 = + std::conditional_t<std::is_same<Layout2, Kokkos::LayoutStride>::value, + Kokkos::LayoutLeft, Layout2>; using base_type_s1_1d = Kokkos::View<Scalar1*, base_layout1, TEST_EXECSPACE>; using base_type_s2_1d = Kokkos::View<Scalar2*, base_layout2, TEST_EXECSPACE>; diff --git a/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp index 7ffa5aaddc899d1c0612eba85473eb64d9c55d5b..d915b7e4728dacec80ebc3ae1fad87b30cf7e51a 100644 --- a/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp +++ b/packages/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -90,7 +90,7 @@ char** init_kokkos_args(bool do_threads, bool do_numa, bool do_device, nthreads = omp_get_max_threads(); } #elif defined(KOKKOS_ENABLE_HPX) - const auto concurrency = std::thread::hardware_concurrency(); + const int concurrency = std::thread::hardware_concurrency(); if (concurrency < nthreads) { nthreads = concurrency; } @@ -165,7 +165,7 @@ Kokkos::InitArguments init_initstruct(bool do_threads, bool do_numa, nthreads = omp_get_max_threads(); } #elif defined(KOKKOS_ENABLE_HPX) - const auto concurrency = std::thread::hardware_concurrency(); + const int concurrency = std::thread::hardware_concurrency(); if (concurrency < nthreads) { nthreads = concurrency; } diff --git a/packages/kokkos/core/unit_test/TestExecSpacePartitioning.hpp b/packages/kokkos/core/unit_test/TestExecSpacePartitioning.hpp index f8f5275d3dd66ce42256bf61637112d026687a3a..82228476e712a4c1cc834fccff6289945f9bb279 100644 --- a/packages/kokkos/core/unit_test/TestExecSpacePartitioning.hpp +++ b/packages/kokkos/core/unit_test/TestExecSpacePartitioning.hpp @@ -43,7 +43,6 @@ */ #include <cstdio> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestExecutionSpace.hpp b/packages/kokkos/core/unit_test/TestExecutionSpace.hpp index 8e4331e809a806f8b7931735e445706abc2e06c5..c9d2d275bfe4b1f73a990a6f9f83242cb970946c 100644 --- a/packages/kokkos/core/unit_test/TestExecutionSpace.hpp +++ b/packages/kokkos/core/unit_test/TestExecutionSpace.hpp @@ -42,39 +42,41 @@ //@HEADER */ -#include <cstdio> - #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -namespace Test { - namespace { -struct StructCopy { +template <class ExecutionSpace> +struct CheckClassWithExecutionSpaceAsDataMemberIsCopyable { Kokkos::DefaultExecutionSpace device; Kokkos::DefaultHostExecutionSpace host; + + KOKKOS_FUNCTION void operator()(int, int& e) const { + auto copy = *this; + // not actually doing anything useful, mostly checking that + // ExecutionSpace::in_parallel() is callable + if (static_cast<int>(copy.device.in_parallel()) < 0) { + ++e; + } + } + + CheckClassWithExecutionSpaceAsDataMemberIsCopyable() { + int errors; + Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(0, 1), *this, + errors); + EXPECT_EQ(errors, 0); + } }; -template <class ExecutionSpace> -void check_struct_copy() { -#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) - // FIXME_OPENMPTARGET nvlink error: Undefined reference to - // '_ZSt25__throw_bad_function_callv' in - // '/tmp/TestOpenMPTarget_ExecutionSpace-434d81.cubin' +// FIXME_OPENMPTARGET nvlink error: Undefined reference to +// '_ZSt25__throw_bad_function_callv' in +// '/tmp/TestOpenMPTarget_ExecutionSpace-434d81.cubin' #ifndef KOKKOS_ENABLE_OPENMPTARGET - StructCopy data; - parallel_for( - Kokkos::RangePolicy<ExecutionSpace>(0, 1), KOKKOS_LAMBDA(int) { - StructCopy data2 = data; - KOKKOS_IMPL_DO_NOT_USE_PRINTF("%i \n", data2.device.in_parallel()); - }); -#endif -#endif +TEST(TEST_CATEGORY, execution_space_as_class_data_member) { + CheckClassWithExecutionSpaceAsDataMemberIsCopyable<TEST_EXECSPACE>(); } +#endif } // namespace - -TEST(TEST_CATEGORY, copy_structure) { check_struct_copy<TEST_EXECSPACE>(); } -} // namespace Test diff --git a/packages/kokkos/core/unit_test/TestFunctorAnalysis.hpp b/packages/kokkos/core/unit_test/TestFunctorAnalysis.hpp index d9e2486a4a9e2c6edcea39968d85e4773b38c484..5e0910b523d1361652f5ad8fc02f20a82953d220 100644 --- a/packages/kokkos/core/unit_test/TestFunctorAnalysis.hpp +++ b/packages/kokkos/core/unit_test/TestFunctorAnalysis.hpp @@ -61,7 +61,7 @@ struct TestFunctorAnalysis_03 { void operator()(int, value_type&) const {} KOKKOS_INLINE_FUNCTION - void join(value_type volatile&, value_type const volatile&) const {} + void join(value_type&, value_type const&) const {} KOKKOS_INLINE_FUNCTION static void init(value_type&) {} }; @@ -75,11 +75,11 @@ void test_functor_analysis() { Kokkos::RangePolicy<ExecSpace>, decltype(c01)>; - using R01 = typename A01::template Reducer<typename ExecSpace::memory_space>; + using R01 = typename A01::Reducer; - static_assert(std::is_same<typename A01::value_type, void>::value, ""); - static_assert(std::is_same<typename A01::pointer_type, void>::value, ""); - static_assert(std::is_same<typename A01::reference_type, void>::value, ""); + static_assert(std::is_void<typename A01::value_type>::value, ""); + static_assert(std::is_void<typename A01::pointer_type>::value, ""); + static_assert(std::is_void<typename A01::reference_type>::value, ""); static_assert(std::is_same<typename R01::functor_type, decltype(c01)>::value, ""); @@ -94,7 +94,7 @@ void test_functor_analysis() { using A02 = Kokkos::Impl::FunctorAnalysis< Kokkos::Impl::FunctorPatternInterface::REDUCE, Kokkos::RangePolicy<ExecSpace>, decltype(c02)>; - using R02 = typename A02::template Reducer<typename ExecSpace::memory_space>; + using R02 = typename A02::Reducer; static_assert(std::is_same<typename A02::value_type, double>::value, ""); static_assert(std::is_same<typename A02::pointer_type, double*>::value, ""); @@ -114,7 +114,7 @@ void test_functor_analysis() { using A03 = Kokkos::Impl::FunctorAnalysis< Kokkos::Impl::FunctorPatternInterface::REDUCE, Kokkos::RangePolicy<ExecSpace>, TestFunctorAnalysis_03>; - using R03 = typename A03::template Reducer<typename ExecSpace::memory_space>; + using R03 = typename A03::Reducer; static_assert(std::is_same<typename A03::value_type, TestFunctorAnalysis_03::value_type>::value, diff --git a/packages/kokkos/core/unit_test/TestHalfOperators.hpp b/packages/kokkos/core/unit_test/TestHalfOperators.hpp index 543ae506ee251de8c8136cf06b8e08ed1d4e2f53..977a704657c7bb6f350143e31c577738de4ff192 100644 --- a/packages/kokkos/core/unit_test/TestHalfOperators.hpp +++ b/packages/kokkos/core/unit_test/TestHalfOperators.hpp @@ -1003,7 +1003,6 @@ void __test_half_operators(half_type h_lhs, half_type h_rhs) { std::memcpy(c_arr, h_arr, n_bytes); for (i = 0; i < n_bytes; i++) ASSERT_EQ(c_arr[i], h_arr_ptr[i]); - std::memcpy(h_arr, c_arr, n_bytes); ASSERT_EQ(h_arr[0], h_arr0); ASSERT_EQ(h_arr[1], h_arr1); } diff --git a/packages/kokkos/core/unit_test/TestHostSharedPtr.hpp b/packages/kokkos/core/unit_test/TestHostSharedPtr.hpp index 731e9fc36d9bf17aa93fc1e458d3058bf7a37994..29bc45592b0287c5671daaac6db983905beaf5c0 100644 --- a/packages/kokkos/core/unit_test/TestHostSharedPtr.hpp +++ b/packages/kokkos/core/unit_test/TestHostSharedPtr.hpp @@ -142,7 +142,7 @@ TEST(TEST_CATEGORY, host_shared_ptr_get) { HostSharedPtr<T> p2; p2 = p1; // copy assignment EXPECT_EQ(p1.get(), &i); - EXPECT_EQ(p1.get(), &i); + EXPECT_EQ(p2.get(), &i); } { T i; diff --git a/packages/kokkos/core/unit_test/TestHostSharedPtrAccessOnDevice.hpp b/packages/kokkos/core/unit_test/TestHostSharedPtrAccessOnDevice.hpp index 10180251ba582e9d11672ce74b4d22335c9da3d4..9a4da5dddebc95a5e24521d6c70a9a22d6063fe6 100644 --- a/packages/kokkos/core/unit_test/TestHostSharedPtrAccessOnDevice.hpp +++ b/packages/kokkos/core/unit_test/TestHostSharedPtrAccessOnDevice.hpp @@ -42,6 +42,7 @@ //@HEADER */ +#include <impl/Kokkos_StringManipulation.hpp> #include <impl/Kokkos_HostSharedPtr.hpp> #include <Kokkos_Core.hpp> @@ -55,14 +56,9 @@ class Data { char d[64]; public: - // Because strncpy is not supported within device code - static KOKKOS_FUNCTION void my_strncpy(char* dst, const char* src, - size_t cnt) { - while (cnt-- > 0 && (*dst++ = *src++) != '\0') - ; - while (cnt-- > 0) *dst++ = '\0'; + KOKKOS_FUNCTION void write(char const* s) { + Kokkos::Impl::strncpy(d, s, sizeof(d)); } - KOKKOS_FUNCTION void write(char const* s) { my_strncpy(d, s, sizeof(d)); } }; template <class SmartPtr> @@ -281,10 +277,14 @@ TEST(TEST_CATEGORY, host_shared_ptr_tracking) { Kokkos::Experimental::SYCLSharedUSMSpace>(); #endif #ifdef KOKKOS_ENABLE_HIP - if (std::is_same<TEST_EXECSPACE, Kokkos::Experimental::HIP>::value) + if (std::is_same<TEST_EXECSPACE, Kokkos::Experimental::HIP>::value) { host_shared_ptr_test_reference_counting< Kokkos::Experimental::HIPHostPinnedSpace, Kokkos::Experimental::HIPHostPinnedSpace>(); + host_shared_ptr_test_reference_counting< + Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIPManagedSpace>(); + } #endif } diff --git a/packages/kokkos/core/unit_test/TestInit.hpp b/packages/kokkos/core/unit_test/TestInit.hpp index f124c6202c5675a28c0b539b7d86e260b62c5874..20536b0d35f44a14bc392c4302072c59cf710772 100644 --- a/packages/kokkos/core/unit_test/TestInit.hpp +++ b/packages/kokkos/core/unit_test/TestInit.hpp @@ -43,7 +43,6 @@ */ #include <cstdio> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestInitializationSettings.cpp b/packages/kokkos/core/unit_test/TestInitializationSettings.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a5b11c5a30d049df4fb590b0838f10fabb9e8ae5 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestInitializationSettings.cpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <impl/Kokkos_InitializationSettings.hpp> + +namespace { + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +void take_initialization_settings(Kokkos::InitializationSettings const&) {} + +TEST(defaultdevicetype, + init_arguments_implicit_conversion_to_initialization_settings) { + Kokkos::InitArguments arguments; + take_initialization_settings(arguments); // check that conversion is implicit + arguments.device_id = 1; + arguments.tune_internals = true; + Kokkos::InitializationSettings settings{arguments}; + EXPECT_FALSE(settings.has_num_threads()); + EXPECT_TRUE(settings.has_device_id()); + EXPECT_EQ(settings.get_device_id(), 1); + EXPECT_FALSE(settings.has_num_devices()); + EXPECT_FALSE(settings.has_skip_device()); + EXPECT_FALSE(settings.has_disable_warnings()); + EXPECT_TRUE(settings.has_tune_internals()); + EXPECT_TRUE(settings.get_tune_internals()); + EXPECT_FALSE(settings.has_tools_help()); + EXPECT_FALSE(settings.has_tools_libs()); + EXPECT_FALSE(settings.has_tools_args()); +} +#endif + +TEST(defaultdevicetype, initialization_settings) { + auto const settings = Kokkos::InitializationSettings() + .set_num_threads(255) + .set_disable_warnings(false) + .set_tools_libs("my_custom_tool.so"); + EXPECT_TRUE(settings.has_num_threads()); + EXPECT_EQ(settings.get_num_threads(), 255); + EXPECT_FALSE(settings.has_device_id()); + EXPECT_FALSE(settings.has_num_devices()); + EXPECT_FALSE(settings.has_skip_device()); + EXPECT_TRUE(settings.has_disable_warnings()); + EXPECT_FALSE(settings.get_disable_warnings()); + EXPECT_FALSE(settings.has_tune_internals()); + EXPECT_FALSE(settings.has_tools_help()); + EXPECT_TRUE(settings.has_tools_libs()); + EXPECT_EQ(settings.get_tools_libs(), "my_custom_tool.so"); + EXPECT_FALSE(settings.has_tools_args()); +} + +#define STATIC_ASSERT(...) static_assert(__VA_ARGS__, "") // FIXME C++17 + +constexpr bool test_initialization_settings_getter() { +#define CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(NAME, TYPE) \ + STATIC_ASSERT(std::is_same< \ + decltype(std::declval<Kokkos::InitializationSettings const&>() \ + .has_##NAME()), \ + bool>::value); \ + STATIC_ASSERT(std::is_same< \ + decltype(std::declval<Kokkos::InitializationSettings const&>() \ + .get_##NAME()), \ + TYPE>::value); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(num_threads, int); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(device_id, int); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(num_devices, int); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(skip_device, int); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(disable_warnings, bool); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(tune_internals, bool); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(tools_help, bool); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(tools_libs, std::string); + CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE(tools_args, std::string); +#undef CHECK_INITIALIZATION_SETTINGS_GETTER_RETURN_TYPE + return true; +} + +STATIC_ASSERT(test_initialization_settings_getter()); + +STATIC_ASSERT( + std::is_default_constructible<Kokkos::InitializationSettings>::value); + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestInterOp.cpp b/packages/kokkos/core/unit_test/TestInterOp.cpp index 7f08afada9d7c74e6d949949a6f52c2b7e3b7ada..50238a93a5a8ea9b1125e42cb0d60ab8dc762e16 100644 --- a/packages/kokkos/core/unit_test/TestInterOp.cpp +++ b/packages/kokkos/core/unit_test/TestInterOp.cpp @@ -50,9 +50,10 @@ static_assert( std::is_same< Kokkos::Experimental::python_view_type_t<Kokkos::View<double*>>, - Kokkos::View< - double*, typename Kokkos::DefaultExecutionSpace::array_layout, - typename Kokkos::DefaultExecutionSpace::memory_space>>::value, + Kokkos::View<double*, + typename Kokkos::DefaultExecutionSpace::array_layout, + typename Kokkos::DefaultExecutionSpace::memory_space, + Kokkos::Experimental::DefaultViewHooks>>::value, "Error! Unexpected python_view_type for: View"); // DynRankView @@ -69,9 +70,10 @@ static_assert( std::is_same< Kokkos::Experimental::python_view_type_t< Kokkos::View<double*, Kokkos::DefaultExecutionSpace>>, - Kokkos::View< - double*, typename Kokkos::DefaultExecutionSpace::array_layout, - typename Kokkos::DefaultExecutionSpace::memory_space>>::value, + Kokkos::View<double*, + typename Kokkos::DefaultExecutionSpace::array_layout, + typename Kokkos::DefaultExecutionSpace::memory_space, + Kokkos::Experimental::DefaultViewHooks>>::value, "Error! Unexpected python_view_type for: View + Execution Space"); // DynRankView + Execution Space @@ -85,11 +87,12 @@ static_assert( "Error! Unexpected python_view_type for: DynRankView + Execution Space"); // View + Memory space -static_assert(std::is_same<Kokkos::Experimental::python_view_type_t< - Kokkos::View<int64_t*, Kokkos::HostSpace>>, - Kokkos::View<int64_t*, Kokkos::LayoutRight, - Kokkos::HostSpace>>::value, - "Error! Unexpected python_view_type for: View + Memory space"); +static_assert( + std::is_same<Kokkos::Experimental::python_view_type_t< + Kokkos::View<int64_t*, Kokkos::HostSpace>>, + Kokkos::View<int64_t*, Kokkos::LayoutRight, Kokkos::HostSpace, + Kokkos::Experimental::DefaultViewHooks>>::value, + "Error! Unexpected python_view_type for: View + Memory space"); // DynRankView + Memory space static_assert( @@ -105,8 +108,8 @@ static_assert( Kokkos::Experimental::python_view_type_t<Kokkos::View< int**, Kokkos::LayoutLeft, Kokkos::DefaultExecutionSpace>>, Kokkos::View<int**, Kokkos::LayoutLeft, - typename Kokkos::DefaultExecutionSpace::memory_space>>:: - value, + typename Kokkos::DefaultExecutionSpace::memory_space, + Kokkos::Experimental::DefaultViewHooks>>::value, "Error! Unexpected python_view_type for: View + Layout + Execution space"); // DynRankView + Layout + Execution space @@ -121,10 +124,10 @@ static_assert( // View + Layout + Memory Space static_assert( - std::is_same< - Kokkos::Experimental::python_view_type_t< - Kokkos::View<uint32_t**, Kokkos::LayoutLeft, Kokkos::HostSpace>>, - Kokkos::View<uint32_t**, Kokkos::LayoutLeft, Kokkos::HostSpace>>::value, + std::is_same<Kokkos::Experimental::python_view_type_t<Kokkos::View< + uint32_t**, Kokkos::LayoutLeft, Kokkos::HostSpace>>, + Kokkos::View<uint32_t**, Kokkos::LayoutLeft, Kokkos::HostSpace, + Kokkos::Experimental::DefaultViewHooks>>::value, "Error! Unexpected python_view_type for: View + Layout + Memory Space"); // DynRankView + Layout + Memory Space @@ -144,6 +147,7 @@ static_assert( Kokkos::MemoryTraits<Kokkos::RandomAccess>>>, Kokkos::View<float***, Kokkos::LayoutLeft, typename Kokkos::DefaultHostExecutionSpace::memory_space, + Kokkos::Experimental::DefaultViewHooks, Kokkos::MemoryTraits<Kokkos::RandomAccess>>>::value, "Error! Unexpected python_view_type for: View + Layout + Execution space + " "Memory Trait"); diff --git a/packages/kokkos/core/unit_test/TestIrregularLayout.hpp b/packages/kokkos/core/unit_test/TestIrregularLayout.hpp index 86f9353e2d539ec931e0d373c3f6746497222ebc..af4fd22452fa0427c3d0536e0a45168503b00677 100644 --- a/packages/kokkos/core/unit_test/TestIrregularLayout.hpp +++ b/packages/kokkos/core/unit_test/TestIrregularLayout.hpp @@ -44,7 +44,6 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <stdexcept> #include <sstream> #include <iostream> #define OFFSET_LIST_MAX_SIZE 100 diff --git a/packages/kokkos/core/unit_test/TestJoinBackwardCompatibility.hpp b/packages/kokkos/core/unit_test/TestJoinBackwardCompatibility.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ab5ccd0039583a3e3d626f7b7bd789e783b3b66e --- /dev/null +++ b/packages/kokkos/core/unit_test/TestJoinBackwardCompatibility.hpp @@ -0,0 +1,154 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> + +namespace { + +enum MyErrorCode { + no_error = 0b000, + error_operator_plus_equal = 0b001, + error_operator_plus_equal_volatile = 0b010, + error_join_volatile = 0b100 + +}; + +KOKKOS_FUNCTION constexpr MyErrorCode operator|(MyErrorCode lhs, + MyErrorCode rhs) { + return static_cast<MyErrorCode>(static_cast<int>(lhs) | + static_cast<int>(rhs)); +} + +static_assert((no_error | error_operator_plus_equal_volatile) == + error_operator_plus_equal_volatile, + ""); +static_assert((error_join_volatile | error_operator_plus_equal) == 0b101, ""); + +struct MyJoinBackCompatValueType { + MyErrorCode err = no_error; +}; + +KOKKOS_FUNCTION void operator+=(MyJoinBackCompatValueType &x, + const MyJoinBackCompatValueType &y) { + x.err = x.err | y.err | error_operator_plus_equal; +} + +KOKKOS_FUNCTION void operator+=(volatile MyJoinBackCompatValueType &x, + const volatile MyJoinBackCompatValueType &y) { + x.err = x.err | y.err | error_operator_plus_equal_volatile; +} + +struct ReducerWithJoinThatTakesNonVolatileQualifiedArgs { + using reducer = ReducerWithJoinThatTakesNonVolatileQualifiedArgs; + using value_type = MyJoinBackCompatValueType; + KOKKOS_FUNCTION void join(MyJoinBackCompatValueType &x, + MyJoinBackCompatValueType const &y) const { + x.err = x.err | y.err; + } + KOKKOS_FUNCTION void operator()(int, MyJoinBackCompatValueType &) const {} + KOKKOS_FUNCTION + ReducerWithJoinThatTakesNonVolatileQualifiedArgs() {} +}; + +struct ReducerWithJoinThatTakesBothVolatileAndNonVolatileQualifiedArgs { + using reducer = + ReducerWithJoinThatTakesBothVolatileAndNonVolatileQualifiedArgs; + using value_type = MyJoinBackCompatValueType; + KOKKOS_FUNCTION void join(MyJoinBackCompatValueType &x, + MyJoinBackCompatValueType const &y) const { + x.err = x.err | y.err; + } + KOKKOS_FUNCTION void join(MyJoinBackCompatValueType volatile &x, + MyJoinBackCompatValueType const volatile &y) const { + x.err = x.err | y.err | error_join_volatile; + } + KOKKOS_FUNCTION void operator()(int, MyJoinBackCompatValueType &) const {} + KOKKOS_FUNCTION + ReducerWithJoinThatTakesBothVolatileAndNonVolatileQualifiedArgs() {} +}; + +struct ReducerWithJoinThatTakesVolatileQualifiedArgs { + using reducer = ReducerWithJoinThatTakesVolatileQualifiedArgs; + using value_type = MyJoinBackCompatValueType; + KOKKOS_FUNCTION void join(MyJoinBackCompatValueType volatile &x, + MyJoinBackCompatValueType const volatile &y) const { + x.err = x.err | y.err; + } + KOKKOS_FUNCTION void operator()(int, MyJoinBackCompatValueType &) const {} + KOKKOS_FUNCTION ReducerWithJoinThatTakesVolatileQualifiedArgs() {} +}; + +void test_join_backward_compatibility() { + MyJoinBackCompatValueType result; + Kokkos::RangePolicy<> policy(0, 1); + +#if defined KOKKOS_ENABLE_DEPRECATED_CODE_3 + Kokkos::parallel_reduce( + policy, ReducerWithJoinThatTakesVolatileQualifiedArgs{}, result); + ASSERT_EQ(result.err, no_error); +#endif + + Kokkos::parallel_reduce( + policy, ReducerWithJoinThatTakesBothVolatileAndNonVolatileQualifiedArgs{}, + result); + ASSERT_EQ(result.err, no_error); + Kokkos::parallel_reduce( + policy, ReducerWithJoinThatTakesNonVolatileQualifiedArgs{}, result); + ASSERT_EQ(result.err, no_error); + + // avoid warnings unused function 'operator+=' + result += {}; + ASSERT_EQ(result.err, error_operator_plus_equal); + static_cast<MyJoinBackCompatValueType volatile &>(result) += + static_cast<MyJoinBackCompatValueType const volatile &>(result); + ASSERT_EQ(result.err, + error_operator_plus_equal | error_operator_plus_equal_volatile); +} + +TEST(TEST_CATEGORY, join_backward_compatibility) { + test_join_backward_compatibility(); +} + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestLegionInteroperability.cpp b/packages/kokkos/core/unit_test/TestLegionInteroperability.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1838558451d52dbb3d0bc26d46cb50621d00343c --- /dev/null +++ b/packages/kokkos/core/unit_test/TestLegionInteroperability.cpp @@ -0,0 +1,159 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if defined(KOKKOS_COMPILER_INTEL) && (KOKKOS_COMPILER_INTEL < 1800) + +namespace { + +// error: expression must have a constant value +// std::enable_if_t<!has_deprecated_cuda_impl_initialize_v<T>> +constexpr bool +test_compiler_upgrade_needed_for_detection_idiom_and_variable_template() { + return true; +} +static_assert( + test_compiler_upgrade_needed_for_detection_idiom_and_variable_template(), + "Intel C++ compiler is awesome"); + +} // namespace + +#else + +// The purpose of this compile-only test is twofold: +// 1. mimic Legion's use of Kokkos implementation details for initializing the +// exectution environment +// 2. demonstrate how to leverage SFINAE to support Kokkos version through the +// ExecutionSpace::impl_initialize breaking change before release 3.7 +namespace { +#define STATIC_ASSERT(...) static_assert(__VA_ARGS__, "") // FIXME C++17 + +#ifdef KOKKOS_ENABLE_CUDA +template <class T> +using deprecated_cuda_impl_initialize_t = + decltype(T::impl_initialize(typename T::SelectDevice(0), 1)); + +template <class T> +constexpr bool has_deprecated_cuda_impl_initialize_v = + Kokkos::is_detected<deprecated_cuda_impl_initialize_t, T>::value; + +template <class T> +std::enable_if_t<has_deprecated_cuda_impl_initialize_v<T> > +legion_initialize_kokkos_cuda() { + int cuda_device_id = 0; + int num_instances = 1; + T::impl_initialize(typename T::SelectDevice(cuda_device_id), num_instances); +} + +template <class T> +std::enable_if_t<!has_deprecated_cuda_impl_initialize_v<T> > +legion_initialize_kokkos_cuda() { + int cuda_device_id = 0; + auto const settings = + Kokkos::InitializationSettings().set_device_id(cuda_device_id); + T::impl_initialize(settings); +} + +STATIC_ASSERT(std::is_void< + decltype(legion_initialize_kokkos_cuda<Kokkos::Cuda>())>::value); +#endif + +#ifdef KOKKOS_ENABLE_OPENMP +template <class T> +using deprecated_openmp_impl_initialize_t = decltype(T::impl_initialize(0)); + +template <class T> +constexpr bool has_deprecated_openmp_impl_initialize_v = + Kokkos::is_detected<deprecated_openmp_impl_initialize_t, T>::value; + +template <class T> +std::enable_if_t<has_deprecated_openmp_impl_initialize_v<T> > +legion_initialize_kokkos_openmp() { + int thread_count = -1; + T::impl_initialize(thread_count); +} + +template <class T> +std::enable_if_t<!has_deprecated_openmp_impl_initialize_v<T> > +legion_initialize_kokkos_openmp() { + int thread_count = -1; + auto const settings = + Kokkos::InitializationSettings().set_num_threads(thread_count); + T::impl_initialize(settings); +} + +STATIC_ASSERT(std::is_void<decltype( + legion_initialize_kokkos_openmp<Kokkos::OpenMP>())>::value); + +#endif + +#ifdef KOKKOS_ENABLE_SERIAL +template <class T> +using deprecated_serial_impl_initialize_t = decltype(T::impl_initialize()); + +template <class T> +constexpr bool has_deprecated_serial_impl_initialize_v = + Kokkos::is_detected<deprecated_serial_impl_initialize_t, T>::value; + +template <class T> +std::enable_if_t<has_deprecated_serial_impl_initialize_v<T> > +legion_initialize_kokkos_serial() { + T::impl_initialize(); +} + +template <class T> +std::enable_if_t<!has_deprecated_serial_impl_initialize_v<T> > +legion_initialize_kokkos_serial() { + Kokkos::InitializationSettings settings; + T::impl_initialize(settings); +} + +STATIC_ASSERT(std::is_void<decltype( + legion_initialize_kokkos_serial<Kokkos::Serial>())>::value); +#endif + +} // namespace + +#endif diff --git a/packages/kokkos/core/unit_test/TestLocalDeepCopy.hpp b/packages/kokkos/core/unit_test/TestLocalDeepCopy.hpp index 80feb11f9b711bdbe2816d45d5df4c313e4c0865..cdb14fec54291c745ad266f37a9dd846f0921d7a 100644 --- a/packages/kokkos/core/unit_test/TestLocalDeepCopy.hpp +++ b/packages/kokkos/core/unit_test/TestLocalDeepCopy.hpp @@ -44,7 +44,6 @@ #include <gtest/gtest.h> -#include <stdexcept> #include <sstream> #include <iostream> #include <time.h> diff --git a/packages/kokkos/core/unit_test/TestMDRange.hpp b/packages/kokkos/core/unit_test/TestMDRange.hpp index 5ff87f8d9afed9fb4e8aebaadecf1788087a0870..e0a880a7a1435bc6879fc67c7e7b53876e81bb74 100644 --- a/packages/kokkos/core/unit_test/TestMDRange.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange.hpp @@ -70,21 +70,6 @@ struct TestMDRange_ReduceArray_2D { const unsigned array_size) : input_view("input_view", N0, N1), value_count(array_size) {} - KOKKOS_INLINE_FUNCTION - void init(scalar_type dst[]) const { - for (unsigned i = 0; i < value_count; ++i) { - dst[i] = 0.0; - } - } - - KOKKOS_INLINE_FUNCTION - void join(volatile scalar_type dst[], - const volatile scalar_type src[]) const { - for (unsigned i = 0; i < value_count; ++i) { - dst[i] += src[i]; - } - } - KOKKOS_INLINE_FUNCTION void operator()(const int i, const int j) const { input_view(i, j) = 1; } @@ -167,8 +152,7 @@ struct TestMDRange_ReduceArray_3D { } KOKKOS_INLINE_FUNCTION - void join(volatile scalar_type dst[], - const volatile scalar_type src[]) const { + void join(scalar_type dst[], const scalar_type src[]) const { for (unsigned i = 0; i < value_count; ++i) { dst[i] += src[i]; } @@ -385,7 +369,7 @@ struct TestMDRange_2D { parallel_reduce( "rank2-min-reducer", range, KOKKOS_LAMBDA(const int i, const int j, double &min_val) { - min_val = Kokkos::Experimental::fmin(v_in(i, j), min_val); + min_val = Kokkos::fmin(v_in(i, j), min_val); }, reducer_scalar); @@ -3902,14 +3886,6 @@ struct TestMDRange_ReduceScalar { void operator+=(const Scalar &src) { for (int i = 0; i < 4; i++) v[i] += src.v[i]; } - KOKKOS_INLINE_FUNCTION - void operator=(const volatile Scalar &src) volatile { - for (int i = 0; i < 4; i++) v[i] = src.v[i]; - } - KOKKOS_INLINE_FUNCTION - void operator+=(const volatile Scalar &src) volatile { - for (int i = 0; i < 4; i++) v[i] += src.v[i]; - } }; static void test_scalar_reduce(const int N0, const int N1) { diff --git a/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp b/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp index 0e1514a33f4d95733bc88f486cff8a5026411116..7294f8e62e2ccf5c9fcd6106f424f00e2e221862 100644 --- a/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp +++ b/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp @@ -48,13 +48,12 @@ #include <algorithm> #include <initializer_list> #include <type_traits> -#include "Kokkos_ExecPolicy.hpp" -#include "Kokkos_Parallel_Reduce.hpp" #include <cfloat> -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ - defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) || \ + defined(KOKKOS_ENABLE_OPENACC) #else #define MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS #endif @@ -239,14 +238,14 @@ struct FloatingPointComparison { // Using absolute here instead of abs, since we actually test abs ... template <class T> - KOKKOS_FUNCTION typename std::enable_if<std::is_signed<T>::value, T>::type - absolute(T val) const { + KOKKOS_FUNCTION std::enable_if_t<std::is_signed<T>::value, T> absolute( + T val) const { return val < T(0) ? -val : val; } template <class T> - KOKKOS_FUNCTION typename std::enable_if<!std::is_signed<T>::value, T>::type - absolute(T val) const { + KOKKOS_FUNCTION std::enable_if_t<!std::is_signed<T>::value, T> absolute( + T val) const { return val; } @@ -257,10 +256,9 @@ struct FloatingPointComparison { bool ar = absolute(fpv) < abs_tol; if (!ar) { -#if !defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ENABLE_HIP) - printf("absolute value exceeds tolerance [|%e| > %e]\n", (double)fpv, - abs_tol); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "absolute value exceeds tolerance [|%e| > %e]\n", (double)fpv, + abs_tol); } return ar; @@ -279,12 +277,11 @@ struct FloatingPointComparison { double min_denom = static_cast<double>( absolute(rhs) < absolute(lhs) ? absolute(rhs) : absolute(lhs)); double rel_diff = abs_diff / min_denom; - bool ar = rel_diff < rel_tol; + bool ar = abs_diff == 0 || rel_diff < rel_tol; if (!ar) { -#if !defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ENABLE_HIP) - printf("relative difference exceeds tolerance [%e > %e]\n", - (double)rel_diff, rel_tol); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "relative difference exceeds tolerance [%e > %e]\n", + (double)rel_diff, rel_tol); } return ar; @@ -299,10 +296,10 @@ struct math_function_name; struct MathUnaryFunction_##FUNC { \ template <typename T> \ static KOKKOS_FUNCTION auto eval(T x) { \ - static_assert(std::is_same<decltype(Kokkos::Experimental::FUNC((T)0)), \ + static_assert(std::is_same<decltype(Kokkos::FUNC((T)0)), \ math_unary_function_return_type_t<T>>::value, \ ""); \ - return Kokkos::Experimental::FUNC(x); \ + return Kokkos::FUNC(x); \ } \ template <typename T> \ static auto eval_std(T x) { \ @@ -320,11 +317,17 @@ struct math_function_name; }; \ constexpr char math_function_name<MathUnaryFunction_##FUNC>::name[] +#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1 // Generally the expected ULP error should come from here: // https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html // For now 1s largely seem to work ... DEFINE_UNARY_FUNCTION_EVAL(exp, 2); +#ifdef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC exp2 not device callable, + // workaround computes it via exp +DEFINE_UNARY_FUNCTION_EVAL(exp2, 30); +#else DEFINE_UNARY_FUNCTION_EVAL(exp2, 2); +#endif DEFINE_UNARY_FUNCTION_EVAL(expm1, 2); DEFINE_UNARY_FUNCTION_EVAL(log, 2); DEFINE_UNARY_FUNCTION_EVAL(log10, 2); @@ -347,7 +350,9 @@ DEFINE_UNARY_FUNCTION_EVAL(tanh, 2); DEFINE_UNARY_FUNCTION_EVAL(asinh, 4); DEFINE_UNARY_FUNCTION_EVAL(acosh, 2); DEFINE_UNARY_FUNCTION_EVAL(atanh, 2); +#endif +#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2 #if defined(__APPLE__) // Apple's standard library implementation seems to have a poor implementation DEFINE_UNARY_FUNCTION_EVAL(erf, 5); @@ -365,10 +370,14 @@ DEFINE_UNARY_FUNCTION_EVAL(lgamma, 2); DEFINE_UNARY_FUNCTION_EVAL(ceil, 2); DEFINE_UNARY_FUNCTION_EVAL(floor, 2); DEFINE_UNARY_FUNCTION_EVAL(trunc, 2); +DEFINE_UNARY_FUNCTION_EVAL(round, 1); #ifndef KOKKOS_ENABLE_SYCL DEFINE_UNARY_FUNCTION_EVAL(nearbyint, 2); #endif +DEFINE_UNARY_FUNCTION_EVAL(logb, 2); +#endif + #undef DEFINE_UNARY_FUNCTION_EVAL #define DEFINE_BINARY_FUNCTION_EVAL(FUNC, ULP_FACTOR) \ @@ -376,10 +385,10 @@ DEFINE_UNARY_FUNCTION_EVAL(nearbyint, 2); template <typename T, typename U> \ static KOKKOS_FUNCTION auto eval(T x, U y) { \ static_assert( \ - std::is_same<decltype(Kokkos::Experimental::FUNC((T)0, (U)0)), \ + std::is_same<decltype(Kokkos::FUNC((T)0, (U)0)), \ math_binary_function_return_type_t<T, U>>::value, \ ""); \ - return Kokkos::Experimental::FUNC(x, y); \ + return Kokkos::FUNC(x, y); \ } \ template <typename T, typename U> \ static auto eval_std(T x, U y) { \ @@ -398,8 +407,14 @@ DEFINE_UNARY_FUNCTION_EVAL(nearbyint, 2); }; \ constexpr char math_function_name<MathBinaryFunction_##FUNC>::name[] +#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1 DEFINE_BINARY_FUNCTION_EVAL(pow, 2); DEFINE_BINARY_FUNCTION_EVAL(hypot, 2); +#endif +#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2 +DEFINE_BINARY_FUNCTION_EVAL(nextafter, 1); +DEFINE_BINARY_FUNCTION_EVAL(copysign, 1); +#endif #undef DEFINE_BINARY_FUNCTION_EVAL @@ -443,10 +458,9 @@ struct TestMathUnaryFunction : FloatingPointComparison { bool ar = compare(Func::eval(val_[i]), res_[i], Func::ulp_factor()); if (!ar) { ++e; -#if !defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ENABLE_HIP) - printf("value at %f which is %f was expected to be %f\n", (double)val_[i], - (double)Func::eval(val_[i]), (double)res_[i]); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "value at %f which is %f was expected to be %f\n", (double)val_[i], + (double)Func::eval(val_[i]), (double)res_[i]); } } }; @@ -482,11 +496,9 @@ struct TestMathBinaryFunction : FloatingPointComparison { bool ar = compare(Func::eval(val1_, val2_), res_, Func::ulp_factor()); if (!ar) { ++e; -#if !defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ENABLE_HIP) - printf("value at %f, %f which is %f was expected to be %f\n", - (double)val1_, (double)val2_, (double)Func::eval(val1_, val2_), - (double)res_); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "value at %f, %f which is %f was expected to be %f\n", (double)val1_, + (double)val2_, (double)Func::eval(val1_, val2_), (double)res_); } } }; @@ -497,6 +509,8 @@ void do_test_math_binary_function(Arg1 arg1, Arg2 arg2) { (TestMathBinaryFunction<Space, Func, Arg1, Arg2>(arg1, arg2), 0)...}; } +#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1 + TEST(TEST_CATEGORY, mathematical_functions_trigonometric_functions) { TEST_MATH_FUNCTION(sin)({true, false}); TEST_MATH_FUNCTION(sin)({-3, -2, -1, 0, 1}); @@ -785,6 +799,9 @@ TEST(TEST_CATEGORY, mathematical_functions_hyperbolic_functions) { TEST_MATH_FUNCTION(atanh)({-.97l, .86l, -.53l, .42l, -.1l, 0.l}); #endif } +#endif + +#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2 TEST(TEST_CATEGORY, mathematical_functions_error_and_gamma_functions) { TEST_MATH_FUNCTION(erf)({-3, -2, -1, 0, 1}); @@ -874,6 +891,18 @@ TEST(TEST_CATEGORY, TEST_MATH_FUNCTION(trunc)({12.3l, 4.56l, 789.l}); #endif + TEST_MATH_FUNCTION(round)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(round)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(round)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(round)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(round)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(round)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(round)({2.3f, 2.5f, 2.7f, -2.3f, -2.5f, -2.7f, -0.0f}); + TEST_MATH_FUNCTION(round)({2.3, 2.5, 2.7, -2.3, -2.5, -2.7, -0.0}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(round)({2.3l, 2.5l, 2.7l, -2.3l, -2.5l, -2.7l, -0.0l}); +#endif + #ifndef KOKKOS_ENABLE_SYCL TEST_MATH_FUNCTION(nearbyint)({-3, -2, -1, 0, 1}); TEST_MATH_FUNCTION(nearbyint)({-3l, -2l, -1l, 0l, 1l}); @@ -889,6 +918,43 @@ TEST(TEST_CATEGORY, #endif } +TEST(TEST_CATEGORY, + mathematical_functions_floating_point_manipulation_functions) { + TEST_MATH_FUNCTION(logb)({2, 3, 4, 56, 789}); + TEST_MATH_FUNCTION(logb)({2l, 3l, 4l, 56l, 789l}); + TEST_MATH_FUNCTION(logb)({2ll, 3ll, 4ll, 56ll, 789ll}); + TEST_MATH_FUNCTION(logb)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(logb)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(logb)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(logb)({123.45f, 6789.0f}); + TEST_MATH_FUNCTION(logb)({123.45, 6789.0}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(logb)({123.45l, 6789.0l}); +#endif + + do_test_math_binary_function<TEST_EXECSPACE, kk_nextafter>(0, 1.f); + do_test_math_binary_function<TEST_EXECSPACE, kk_nextafter>(1, 2.f); + do_test_math_binary_function<TEST_EXECSPACE, kk_nextafter>(0.1, 0); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + do_test_math_binary_function<TEST_EXECSPACE, kk_nextafter>(1, 2.l); + do_test_math_binary_function<TEST_EXECSPACE, kk_nextafter>(1.l, 2.l); +#endif + + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(0, 1.f); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1, 2.f); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(0.1, 0); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1.f, +2.f); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1.f, -2.f); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1., +2.); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1., -2.); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1, +2.l); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1.l, +2); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1.l, +2.l); + do_test_math_binary_function<TEST_EXECSPACE, kk_copysign>(1.l, -2.l); +#endif +} + template <class Space> struct TestAbsoluteValueFunction { TestAbsoluteValueFunction() { run(); } @@ -898,7 +964,7 @@ struct TestAbsoluteValueFunction { ASSERT_EQ(errors, 0); } KOKKOS_FUNCTION void operator()(int, int& e) const { - using Kokkos::Experimental::abs; + using Kokkos::abs; if (abs(1) != 1 || abs(-1) != 1) { ++e; KOKKOS_IMPL_DO_NOT_USE_PRINTF("failed abs(int)\n"); @@ -926,8 +992,8 @@ struct TestAbsoluteValueFunction { } #endif // special values - using Kokkos::Experimental::isinf; - using Kokkos::Experimental::isnan; + using Kokkos::isinf; + using Kokkos::isnan; if (abs(-0.) != 0. #ifndef KOKKOS_IMPL_WORKAROUND_INTEL_LLVM_DEFAULT_FLOATING_POINT_MODEL || !isinf(abs(-INFINITY)) || !isnan(abs(-NAN)) @@ -962,7 +1028,7 @@ struct TestIsNaN { ASSERT_EQ(errors, 0); } KOKKOS_FUNCTION void operator()(int, int& e) const { - using Kokkos::Experimental::isnan; + using Kokkos::isnan; using Kokkos::Experimental::quiet_NaN; using Kokkos::Experimental::signaling_NaN; if (isnan(1) || isnan(INT_MAX)) { @@ -1022,3 +1088,4 @@ struct TestIsNaN { TEST(TEST_CATEGORY, mathematical_functions_isnan) { TestIsNaN<TEST_EXECSPACE>(); } +#endif diff --git a/packages/kokkos/core/unit_test/TestMathematicalFunctions1.hpp b/packages/kokkos/core/unit_test/TestMathematicalFunctions1.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0fd56a5ac91c2a5ed01fccf5e6534a0afa814e58 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestMathematicalFunctions1.hpp @@ -0,0 +1,47 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#define KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2 +#include "TestMathematicalFunctions.hpp" +#undef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2 diff --git a/packages/kokkos/core/unit_test/TestMathematicalFunctions2.hpp b/packages/kokkos/core/unit_test/TestMathematicalFunctions2.hpp new file mode 100644 index 0000000000000000000000000000000000000000..74e7443b1306c1ee4a26ba9e4890cf8a26c195e8 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestMathematicalFunctions2.hpp @@ -0,0 +1,47 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#define KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1 +#include "TestMathematicalFunctions.hpp" +#undef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1 diff --git a/packages/kokkos/core/unit_test/TestMathematicalSpecialFunctions.hpp b/packages/kokkos/core/unit_test/TestMathematicalSpecialFunctions.hpp index 45d8bd08ab270818f26ee755f2e29de3037065e4..26f237a71d7c1667062f27c93e1b58fd83d3e44b 100644 --- a/packages/kokkos/core/unit_test/TestMathematicalSpecialFunctions.hpp +++ b/packages/kokkos/core/unit_test/TestMathematicalSpecialFunctions.hpp @@ -17,7 +17,7 @@ struct TestExponentialIntergral1Function { HostViewType h_ref; void testit() { - using Kokkos::Experimental::fabs; + using Kokkos::fabs; using Kokkos::Experimental::infinity; d_x = ViewType("d_x", 15); @@ -1641,27 +1641,17 @@ struct TestComplexBesselH1Function { Kokkos::complex<double>(-5.430453818237824e-02, -1.530182458039000e-02); EXPECT_EQ(h_ref_ch10(0), h_ch10(0)); - std::cout << "h_ch10(0): " << h_ch10(0) - << ", h_ref_ch10(0): " << h_ref_ch10(0) << std::endl; for (int i = 1; i < N; i++) { EXPECT_LE(Kokkos::abs(h_ch10(i) - h_ref_ch10(i)), - Kokkos::abs(h_ref_ch10(i)) * 1e-13); - std::cout << i - << ", actual diff: " << Kokkos::abs(h_ch10(i) - h_ref_ch10(i)) - << ", expected diff: " << Kokkos::abs(h_ref_ch10(i)) * 1e-13 - << std::endl; + Kokkos::abs(h_ref_ch10(i)) * 1e-13) + << "at index " << i; } EXPECT_EQ(h_ref_ch11(0), h_ch11(0)); - std::cout << "h_ch11(0): " << h_ch11(0) - << ", h_ref_ch11(0): " << h_ref_ch11(0) << std::endl; for (int i = 1; i < N; i++) { EXPECT_LE(Kokkos::abs(h_ch11(i) - h_ref_ch11(i)), - Kokkos::abs(h_ref_ch11(i)) * 1e-13); - std::cout << i - << ", actual diff: " << Kokkos::abs(h_ch11(i) - h_ref_ch11(i)) - << ", expected diff: " << Kokkos::abs(h_ref_ch11(i)) * 1e-13 - << std::endl; + Kokkos::abs(h_ref_ch11(i)) * 1e-13) + << "at index " << i; } } diff --git a/packages/kokkos/core/unit_test/TestMemoryPool.hpp b/packages/kokkos/core/unit_test/TestMemoryPool.hpp index 829e8d641a5b00a0be67200bdf30495951e95457..75deae13a108aa40833836eef30502560ed07933 100644 --- a/packages/kokkos/core/unit_test/TestMemoryPool.hpp +++ b/packages/kokkos/core/unit_test/TestMemoryPool.hpp @@ -45,12 +45,7 @@ #ifndef KOKKOS_UNITTEST_MEMPOOL_HPP #define KOKKOS_UNITTEST_MEMPOOL_HPP -#include <cstdio> -#include <iostream> -#include <cmath> -#include <algorithm> - -#include <Kokkos_Timer.hpp> +#include <Kokkos_Core.hpp> namespace TestMemoryPool { @@ -489,8 +484,8 @@ struct TestMemoryPoolHuge { template <class DeviceType> struct TestMemoryPoolHuge< DeviceType, - typename std::enable_if<std::is_same< - Kokkos::HostSpace, typename DeviceType::memory_space>::value>::type> { + std::enable_if_t<std::is_same<Kokkos::HostSpace, + typename DeviceType::memory_space>::value>> { using ptrs_type = Kokkos::View<uintptr_t*, DeviceType>; using pool_type = Kokkos::MemoryPool<DeviceType>; using memory_space = typename DeviceType::memory_space; diff --git a/packages/kokkos/core/unit_test/TestMinMaxClamp.hpp b/packages/kokkos/core/unit_test/TestMinMaxClamp.hpp index abf24ef97cbbf6a203450eab72c2499ec9d25842..42f83f348ef41eeec96d2bccb6e8e9e22a068b4b 100644 --- a/packages/kokkos/core/unit_test/TestMinMaxClamp.hpp +++ b/packages/kokkos/core/unit_test/TestMinMaxClamp.hpp @@ -72,32 +72,30 @@ struct PairIntCompareFirst { // test max() // ---------------------------------------------------------- TEST(TEST_CATEGORY, max) { - namespace KE = Kokkos::Experimental; - int a = 1; int b = 2; - EXPECT_TRUE(KE::max(a, b) == 2); + EXPECT_EQ(Kokkos::max(a, b), 2); a = 3; b = 1; - EXPECT_TRUE(KE::max(a, b) == 3); + EXPECT_EQ(Kokkos::max(a, b), 3); - STATIC_ASSERT(KE::max(1, 2) == 2); - STATIC_ASSERT(KE::max(1, 2, ::Test::Greater<int>{}) == 1); + STATIC_ASSERT(Kokkos::max(1, 2) == 2); + STATIC_ASSERT(Kokkos::max(1, 2, ::Test::Greater<int>{}) == 1); - EXPECT_TRUE(KE::max({3.f, -1.f, 0.f}) == 3.f); + EXPECT_EQ(Kokkos::max({3.f, -1.f, 0.f}), 3.f); - STATIC_ASSERT(KE::max({3, -1, 0}) == 3); - STATIC_ASSERT(KE::max({3, -1, 0}, ::Test::Greater<int>{}) == -1); + STATIC_ASSERT(Kokkos::max({3, -1, 0}) == 3); + STATIC_ASSERT(Kokkos::max({3, -1, 0}, ::Test::Greater<int>{}) == -1); - STATIC_ASSERT(KE::max({ - ::Test::PairIntCompareFirst{255, 0}, - ::Test::PairIntCompareFirst{255, 1}, - ::Test::PairIntCompareFirst{0, 2}, - ::Test::PairIntCompareFirst{0, 3}, - ::Test::PairIntCompareFirst{255, 4}, - ::Test::PairIntCompareFirst{0, 5}, - }) + STATIC_ASSERT(Kokkos::max({ + ::Test::PairIntCompareFirst{255, 0}, + ::Test::PairIntCompareFirst{255, 1}, + ::Test::PairIntCompareFirst{0, 2}, + ::Test::PairIntCompareFirst{0, 3}, + ::Test::PairIntCompareFirst{255, 4}, + ::Test::PairIntCompareFirst{0, 5}, + }) .second == 0); // leftmost element } @@ -107,9 +105,8 @@ struct StdAlgoMinMaxOpsTestMax { KOKKOS_INLINE_FUNCTION void operator()(const int& ind) const { - namespace KE = Kokkos::Experimental; - auto v1 = 10.; - if (KE::max(v1, m_view(ind)) == 10.) { + auto v1 = 10.; + if (Kokkos::max(v1, m_view(ind)) == 10.) { m_view(ind) = 6.; } } @@ -136,32 +133,30 @@ TEST(TEST_CATEGORY, max_within_parfor) { // test min() // ---------------------------------------------------------- TEST(TEST_CATEGORY, min) { - namespace KE = Kokkos::Experimental; - int a = 1; int b = 2; - EXPECT_TRUE(KE::min(a, b) == 1); + EXPECT_EQ(Kokkos::min(a, b), 1); a = 3; b = 2; - EXPECT_TRUE(KE::min(a, b) == 2); + EXPECT_EQ(Kokkos::min(a, b), 2); - STATIC_ASSERT(KE::min(3.f, 2.f) == 2.f); - STATIC_ASSERT(KE::min(3.f, 2.f, ::Test::Greater<int>{}) == 3.f); + STATIC_ASSERT(Kokkos::min(3.f, 2.f) == 2.f); + STATIC_ASSERT(Kokkos::min(3.f, 2.f, ::Test::Greater<int>{}) == 3.f); - EXPECT_TRUE(KE::min({3.f, -1.f, 0.f}) == -1.f); + EXPECT_EQ(Kokkos::min({3.f, -1.f, 0.f}), -1.f); - STATIC_ASSERT(KE::min({3, -1, 0}) == -1); - STATIC_ASSERT(KE::min({3, -1, 0}, ::Test::Greater<int>{}) == 3); + STATIC_ASSERT(Kokkos::min({3, -1, 0}) == -1); + STATIC_ASSERT(Kokkos::min({3, -1, 0}, ::Test::Greater<int>{}) == 3); - STATIC_ASSERT(KE::min({ - ::Test::PairIntCompareFirst{255, 0}, - ::Test::PairIntCompareFirst{255, 1}, - ::Test::PairIntCompareFirst{0, 2}, - ::Test::PairIntCompareFirst{0, 3}, - ::Test::PairIntCompareFirst{255, 4}, - ::Test::PairIntCompareFirst{0, 5}, - }) + STATIC_ASSERT(Kokkos::min({ + ::Test::PairIntCompareFirst{255, 0}, + ::Test::PairIntCompareFirst{255, 1}, + ::Test::PairIntCompareFirst{0, 2}, + ::Test::PairIntCompareFirst{0, 3}, + ::Test::PairIntCompareFirst{255, 4}, + ::Test::PairIntCompareFirst{0, 5}, + }) .second == 2); // leftmost element } @@ -171,9 +166,8 @@ struct StdAlgoMinMaxOpsTestMin { KOKKOS_INLINE_FUNCTION void operator()(const int& ind) const { - namespace KE = Kokkos::Experimental; - auto v1 = 10.; - if (KE::min(v1, m_view(ind)) == 0.) { + auto v1 = 10.; + if (Kokkos::min(v1, m_view(ind)) == 0.) { m_view(ind) = 8.; } } @@ -199,49 +193,53 @@ TEST(TEST_CATEGORY, min_within_parfor) { // test minmax() // ---------------------------------------------------------- TEST(TEST_CATEGORY, minmax) { - namespace KE = Kokkos::Experimental; int a = 1; int b = 2; - const auto& r = KE::minmax(a, b); - EXPECT_TRUE(r.first == 1); - EXPECT_TRUE(r.second == 2); + const auto& r = Kokkos::minmax(a, b); + EXPECT_EQ(r.first, 1); + EXPECT_EQ(r.second, 2); a = 3; b = 2; - const auto& r2 = KE::minmax(a, b); - EXPECT_TRUE(r2.first == 2); - EXPECT_TRUE(r2.second == 3); - - STATIC_ASSERT((Kokkos::pair<float, float>(KE::minmax(3.f, 2.f)) == + const auto& r2 = Kokkos::minmax(a, b); + EXPECT_EQ(r2.first, 2); + EXPECT_EQ(r2.second, 3); + +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC nvhpc can't deal with device side + // constexpr constructors so I removed the + // constexpr in pair, which makes STATIC_ASSERT + // here fail + STATIC_ASSERT((Kokkos::pair<float, float>(Kokkos::minmax(3.f, 2.f)) == Kokkos::make_pair(2.f, 3.f))); STATIC_ASSERT( - (Kokkos::pair<float, float>(KE::minmax( + (Kokkos::pair<float, float>(Kokkos::minmax( 3.f, 2.f, ::Test::Greater<int>{})) == Kokkos::make_pair(3.f, 2.f))); - EXPECT_TRUE(KE::minmax({3.f, -1.f, 0.f}) == Kokkos::make_pair(-1.f, 3.f)); + EXPECT_EQ(Kokkos::minmax({3.f, -1.f, 0.f}), Kokkos::make_pair(-1.f, 3.f)); - STATIC_ASSERT(KE::minmax({3, -1, 0}) == Kokkos::make_pair(-1, 3)); - STATIC_ASSERT(KE::minmax({3, -1, 0}, ::Test::Greater<int>{}) == + STATIC_ASSERT(Kokkos::minmax({3, -1, 0}) == Kokkos::make_pair(-1, 3)); + STATIC_ASSERT(Kokkos::minmax({3, -1, 0}, ::Test::Greater<int>{}) == Kokkos::make_pair(3, -1)); - STATIC_ASSERT(KE::minmax({ - ::Test::PairIntCompareFirst{255, 0}, - ::Test::PairIntCompareFirst{255, 1}, - ::Test::PairIntCompareFirst{0, 2}, - ::Test::PairIntCompareFirst{0, 3}, - ::Test::PairIntCompareFirst{255, 4}, - ::Test::PairIntCompareFirst{0, 5}, - }) + STATIC_ASSERT(Kokkos::minmax({ + ::Test::PairIntCompareFirst{255, 0}, + ::Test::PairIntCompareFirst{255, 1}, + ::Test::PairIntCompareFirst{0, 2}, + ::Test::PairIntCompareFirst{0, 3}, + ::Test::PairIntCompareFirst{255, 4}, + ::Test::PairIntCompareFirst{0, 5}, + }) .first.second == 2); // leftmost - STATIC_ASSERT(KE::minmax({ - ::Test::PairIntCompareFirst{255, 0}, - ::Test::PairIntCompareFirst{255, 1}, - ::Test::PairIntCompareFirst{0, 2}, - ::Test::PairIntCompareFirst{0, 3}, - ::Test::PairIntCompareFirst{255, 4}, - ::Test::PairIntCompareFirst{0, 5}, - }) + STATIC_ASSERT(Kokkos::minmax({ + ::Test::PairIntCompareFirst{255, 0}, + ::Test::PairIntCompareFirst{255, 1}, + ::Test::PairIntCompareFirst{0, 2}, + ::Test::PairIntCompareFirst{0, 3}, + ::Test::PairIntCompareFirst{255, 4}, + ::Test::PairIntCompareFirst{0, 5}, + }) .second.second == 4); // rightmost +#endif } template <class ViewType> @@ -250,9 +248,8 @@ struct StdAlgoMinMaxOpsTestMinMax { KOKKOS_INLINE_FUNCTION void operator()(const int& ind) const { - namespace KE = Kokkos::Experimental; auto v1 = 7.; - const auto& r = KE::minmax(v1, m_view(ind)); + const auto& r = Kokkos::minmax(v1, m_view(ind)); m_view(ind) = (double)(r.first - r.second); } @@ -261,7 +258,6 @@ struct StdAlgoMinMaxOpsTestMinMax { }; TEST(TEST_CATEGORY, minmax_within_parfor) { - namespace KE = Kokkos::Experimental; using view_t = Kokkos::View<double*>; view_t a("a", 10); @@ -277,28 +273,26 @@ TEST(TEST_CATEGORY, minmax_within_parfor) { // test clamp() // ---------------------------------------------------------- TEST(TEST_CATEGORY, clamp) { - namespace KE = Kokkos::Experimental; - int a = 1; int b = 2; int c = 19; - const auto& r = KE::clamp(a, b, c); - EXPECT_TRUE(&r == &b); - EXPECT_TRUE(r == b); + const auto& r = Kokkos::clamp(a, b, c); + EXPECT_EQ(&r, &b); + EXPECT_EQ(r, b); a = 5; b = -2; c = 3; - const auto& r2 = KE::clamp(a, b, c); - EXPECT_TRUE(&r2 == &c); - EXPECT_TRUE(r2 == c); + const auto& r2 = Kokkos::clamp(a, b, c); + EXPECT_EQ(&r2, &c); + EXPECT_EQ(r2, c); a = 5; b = -2; c = 7; - const auto& r3 = KE::clamp(a, b, c); - EXPECT_TRUE(&r3 == &a); - EXPECT_TRUE(r3 == a); + const auto& r3 = Kokkos::clamp(a, b, c); + EXPECT_EQ(&r3, &a); + EXPECT_EQ(r3, a); } template <class ViewType> @@ -307,11 +301,10 @@ struct StdAlgoMinMaxOpsTestClamp { KOKKOS_INLINE_FUNCTION void operator()(const int& ind) const { - namespace KE = Kokkos::Experimental; m_view(ind) = 10.; const auto b = -2.; const auto c = 3.; - const auto& r = KE::clamp(m_view(ind), b, c); + const auto& r = Kokkos::clamp(m_view(ind), b, c); m_view(ind) = (double)(r); } @@ -320,7 +313,6 @@ struct StdAlgoMinMaxOpsTestClamp { }; TEST(TEST_CATEGORY, clamp_within_parfor) { - namespace KE = Kokkos::Experimental; using view_t = Kokkos::View<double*>; view_t a("a", 10); diff --git a/packages/kokkos/core/unit_test/TestNonTrivialScalarTypes.hpp b/packages/kokkos/core/unit_test/TestNonTrivialScalarTypes.hpp index d7607c4f71b0df4f32af77f4441c8c909992b14a..02064d2fc3debc09aa0fcbfed16e6366fc9cd14f 100644 --- a/packages/kokkos/core/unit_test/TestNonTrivialScalarTypes.hpp +++ b/packages/kokkos/core/unit_test/TestNonTrivialScalarTypes.hpp @@ -82,37 +82,6 @@ struct my_complex { return *this; } - KOKKOS_INLINE_FUNCTION - my_complex &operator=(const volatile my_complex &src) { - re = src.re; - im = src.im; - dummy = src.dummy; - return *this; - } - - KOKKOS_INLINE_FUNCTION - volatile my_complex &operator=(const my_complex &src) volatile { - re = src.re; - im = src.im; - dummy = src.dummy; - return *this; - } - - KOKKOS_INLINE_FUNCTION - volatile my_complex &operator=(const volatile my_complex &src) volatile { - re = src.re; - im = src.im; - dummy = src.dummy; - return *this; - } - - KOKKOS_INLINE_FUNCTION - my_complex(const volatile my_complex &src) { - re = src.re; - im = src.im; - dummy = src.dummy; - } - KOKKOS_INLINE_FUNCTION my_complex(const double &val) { re = val; @@ -128,13 +97,6 @@ struct my_complex { return *this; } - KOKKOS_INLINE_FUNCTION - void operator+=(const volatile my_complex &src) volatile { - re += src.re; - im += src.im; - dummy += src.dummy; - } - KOKKOS_INLINE_FUNCTION my_complex operator+(const my_complex &src) { my_complex tmp = *this; @@ -144,15 +106,6 @@ struct my_complex { return tmp; } - KOKKOS_INLINE_FUNCTION - my_complex operator+(const volatile my_complex &src) volatile { - my_complex tmp = *this; - tmp.re += src.re; - tmp.im += src.im; - tmp.dummy += src.dummy; - return tmp; - } - KOKKOS_INLINE_FUNCTION my_complex &operator*=(const my_complex &src) { double re_tmp = re * src.re - im * src.im; @@ -163,15 +116,6 @@ struct my_complex { return *this; } - KOKKOS_INLINE_FUNCTION - void operator*=(const volatile my_complex &src) volatile { - double re_tmp = re * src.re - im * src.im; - double im_tmp = re * src.im + im * src.re; - re = re_tmp; - im = im_tmp; - dummy *= src.dummy; - } - KOKKOS_INLINE_FUNCTION bool operator==(const my_complex &src) const { return (re == src.re) && (im == src.im) && (dummy == src.dummy); @@ -229,12 +173,6 @@ struct array_reduce { return *this; } - KOKKOS_INLINE_FUNCTION - array_reduce &operator=(const volatile array_reduce &src) { - for (int i = 0; i < N; i++) data[i] = src.data[i]; - return *this; - } - KOKKOS_INLINE_FUNCTION // add operator array_reduce & operator=(const scalar_t val) { @@ -254,11 +192,6 @@ struct array_reduce { for (int i = 0; i < N; i++) data[i] += src.data[i]; return *this; } - KOKKOS_INLINE_FUNCTION // volatile add operator - void - operator+=(const volatile array_reduce &src) volatile { - for (int i = 0; i < N; i++) data[i] += src.data[i]; - } KOKKOS_INLINE_FUNCTION // add operator array_reduce operator+(const array_reduce &src) const { @@ -279,11 +212,6 @@ struct array_reduce { for (int i = 0; i < N; i++) data[i] *= src.data[i]; return *this; } - KOKKOS_INLINE_FUNCTION // volatile add operator - void - operator*=(const volatile array_reduce &src) volatile { - for (int i = 0; i < N; i++) data[i] *= src.data[i]; - } KOKKOS_INLINE_FUNCTION // add operator array_reduce operator*(const array_reduce &src) const { @@ -320,9 +248,6 @@ struct point_t { KOKKOS_FUNCTION point_t(const point_t &val) : x(val.x), y(val.y), z(val.z){}; - KOKKOS_FUNCTION - point_t(const volatile point_t &val) : x(val.x), y(val.y), z(val.z){}; - KOKKOS_FUNCTION point_t(const int rhs) { x = y = z = static_cast<uint8_t>(rhs); } @@ -330,19 +255,19 @@ struct point_t { explicit operator int() const { return static_cast<int>(x + y + z); } KOKKOS_FUNCTION - bool operator==(const volatile point_t rhs) const volatile { + bool operator==(const point_t rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); } KOKKOS_FUNCTION - void operator=(point_t rhs) volatile { + void operator=(point_t rhs) { x = rhs.x; y = rhs.y; z = rhs.z; } KOKKOS_FUNCTION - volatile point_t operator+=(const volatile point_t rhs) volatile { + point_t operator+=(const point_t rhs) { x += rhs.x; y += rhs.y; z += rhs.z; diff --git a/packages/kokkos/core/unit_test/TestNumericTraits.hpp b/packages/kokkos/core/unit_test/TestNumericTraits.hpp index 52989aa5dd0ebe49993f6a59e8bba6323c8228ac..0f34ff436f0fe21b0e38e2055fde34e7a5e60b0b 100644 --- a/packages/kokkos/core/unit_test/TestNumericTraits.hpp +++ b/packages/kokkos/core/unit_test/TestNumericTraits.hpp @@ -48,7 +48,6 @@ #include <type_traits> #include <limits> #include "Kokkos_NumericTraits.hpp" -#include "Kokkos_ExecPolicy.hpp" struct extrema { #define DEFINE_EXTREMA(T, m, M) \ @@ -213,8 +212,8 @@ struct TestNumericTraits { } }; -#if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_SYCL) || \ - defined(KOKKOS_ENABLE_OPENMPTARGET) +#if (defined(KOKKOS_COMPILER_NVCC) && defined(KOKKOS_ENABLE_CUDA)) || \ + defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) template <class Tag> struct TestNumericTraits< #if defined(KOKKOS_ENABLE_CUDA) @@ -237,15 +236,12 @@ struct TestNumericTraits< }; #endif -#ifdef KOKKOS_COMPILER_NVHPC -// warning: 'long double' is treated as 'double' in device code -#pragma diag_suppress 20208 -#endif - TEST(TEST_CATEGORY, numeric_traits_infinity) { TestNumericTraits<TEST_EXECSPACE, float, Infinity>(); TestNumericTraits<TEST_EXECSPACE, double, Infinity>(); -#ifndef KOKKOS_COMPILER_IBM // fails with XL 16.1.1 see issue #4100 + // fails with XL 16.1.1 see issue #4100 + // FIXME_NVHPC long double not supported +#if !defined(KOKKOS_COMPILER_IBM) && !defined(KOKKOS_COMPILER_NVHPC) TestNumericTraits<TEST_EXECSPACE, long double, Infinity>(); #endif } @@ -253,7 +249,9 @@ TEST(TEST_CATEGORY, numeric_traits_infinity) { TEST(TEST_CATEGORY, numeric_traits_epsilon) { TestNumericTraits<TEST_EXECSPACE, float, Epsilon>(); TestNumericTraits<TEST_EXECSPACE, double, Epsilon>(); -#ifndef KOKKOS_COMPILER_IBM // fails with XL 16.1.1 + // fails with XL 16.1.1 see issue #4100 + // FIXME_NVHPC long double not supported +#if !defined(KOKKOS_COMPILER_IBM) && !defined(KOKKOS_COMPILER_NVHPC) TestNumericTraits<TEST_EXECSPACE, long double, Epsilon>(); #endif } @@ -261,25 +259,41 @@ TEST(TEST_CATEGORY, numeric_traits_epsilon) { TEST(TEST_CATEGORY, numeric_traits_round_error) { TestNumericTraits<TEST_EXECSPACE, float, RoundError>(); TestNumericTraits<TEST_EXECSPACE, double, RoundError>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, RoundError>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_norm_min) { TestNumericTraits<TEST_EXECSPACE, float, NormMin>(); TestNumericTraits<TEST_EXECSPACE, double, NormMin>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, NormMin>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_denorm_min) { TestNumericTraits<TEST_EXECSPACE, float, DenormMin>(); TestNumericTraits<TEST_EXECSPACE, double, DenormMin>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, DenormMin>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_reciprocal_overflow_threshold) { TestNumericTraits<TEST_EXECSPACE, float, ReciprocalOverflowThreshold>(); TestNumericTraits<TEST_EXECSPACE, double, ReciprocalOverflowThreshold>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, ReciprocalOverflowThreshold>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_finite_min_max) { @@ -314,8 +328,12 @@ TEST(TEST_CATEGORY, numeric_traits_finite_min_max) { TestNumericTraits<TEST_EXECSPACE, float, FiniteMax>(); TestNumericTraits<TEST_EXECSPACE, double, FiniteMin>(); TestNumericTraits<TEST_EXECSPACE, double, FiniteMax>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, FiniteMin>(); TestNumericTraits<TEST_EXECSPACE, long double, FiniteMax>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_digits) { @@ -333,7 +351,11 @@ TEST(TEST_CATEGORY, numeric_traits_digits) { TestNumericTraits<TEST_EXECSPACE, unsigned long long int, Digits>(); TestNumericTraits<TEST_EXECSPACE, float, Digits>(); TestNumericTraits<TEST_EXECSPACE, double, Digits>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, Digits>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_digits10) { @@ -351,15 +373,22 @@ TEST(TEST_CATEGORY, numeric_traits_digits10) { TestNumericTraits<TEST_EXECSPACE, unsigned long long int, Digits10>(); TestNumericTraits<TEST_EXECSPACE, float, Digits10>(); TestNumericTraits<TEST_EXECSPACE, double, Digits10>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, Digits10>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_max_digits10) { TestNumericTraits<TEST_EXECSPACE, float, MaxDigits10>(); TestNumericTraits<TEST_EXECSPACE, double, MaxDigits10>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, MaxDigits10>(); +#endif } - TEST(TEST_CATEGORY, numeric_traits_radix) { TestNumericTraits<TEST_EXECSPACE, bool, Radix>(); TestNumericTraits<TEST_EXECSPACE, char, Radix>(); @@ -375,7 +404,11 @@ TEST(TEST_CATEGORY, numeric_traits_radix) { TestNumericTraits<TEST_EXECSPACE, unsigned long long int, Radix>(); TestNumericTraits<TEST_EXECSPACE, float, Radix>(); TestNumericTraits<TEST_EXECSPACE, double, Radix>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, Radix>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_min_max_exponent) { @@ -383,8 +416,12 @@ TEST(TEST_CATEGORY, numeric_traits_min_max_exponent) { TestNumericTraits<TEST_EXECSPACE, float, MaxExponent>(); TestNumericTraits<TEST_EXECSPACE, double, MinExponent>(); TestNumericTraits<TEST_EXECSPACE, double, MaxExponent>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, MinExponent>(); TestNumericTraits<TEST_EXECSPACE, long double, MaxExponent>(); +#endif } TEST(TEST_CATEGORY, numeric_traits_min_max_exponent10) { @@ -392,17 +429,27 @@ TEST(TEST_CATEGORY, numeric_traits_min_max_exponent10) { TestNumericTraits<TEST_EXECSPACE, float, MaxExponent10>(); TestNumericTraits<TEST_EXECSPACE, double, MinExponent10>(); TestNumericTraits<TEST_EXECSPACE, double, MaxExponent10>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, MinExponent10>(); TestNumericTraits<TEST_EXECSPACE, long double, MaxExponent10>(); +#endif } - TEST(TEST_CATEGORY, numeric_traits_quiet_and_signaling_nan) { TestNumericTraits<TEST_EXECSPACE, float, QuietNaN>(); TestNumericTraits<TEST_EXECSPACE, float, SignalingNaN>(); TestNumericTraits<TEST_EXECSPACE, double, QuietNaN>(); TestNumericTraits<TEST_EXECSPACE, double, SignalingNaN>(); +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: + // Unsupported unknown data type 38. + // Unsupported unknown data type 38. + // Unsupported unknown data type 38. + // nvc++-Fatal-/home/projects/x86-64/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/tools/cpp2 + // TERMINATED by signal 11 TestNumericTraits<TEST_EXECSPACE, long double, QuietNaN>(); TestNumericTraits<TEST_EXECSPACE, long double, SignalingNaN>(); +#endif } namespace NumericTraitsSFINAE { @@ -604,7 +651,10 @@ CHECK_SAME_AS_NUMERIC_LIMITS_MEMBER_CONSTANT(long double, max_exponent10); // Workaround compiler issue error: expression must have a constant value // See kokkos/kokkos#4574 -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC +// There is the same bug with CUDA 11.6 +// FIXME_NVHPC FIXME_CUDA FIXME_NVCC +#if !defined(KOKKOS_COMPILER_NVHPC) && (CUDA_VERSION < 11060) && \ + !(defined(KOKKOS_COMPILER_NVCC) && !defined(KOKKOS_ENABLE_CUDA)) CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(float, quiet_NaN); CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(double, quiet_NaN); CHECK_NAN_SAME_AS_NUMERIC_LIMITS_MEMBER_FUNCTION(long double, quiet_NaN); diff --git a/packages/kokkos/core/unit_test/TestOther.hpp b/packages/kokkos/core/unit_test/TestOther.hpp index c068d250cdb61435cf774fdc3eab19957b521a5e..5596f373bf2d45636c6d03562886c50bf22fb435 100644 --- a/packages/kokkos/core/unit_test/TestOther.hpp +++ b/packages/kokkos/core/unit_test/TestOther.hpp @@ -44,9 +44,13 @@ #ifndef KOKKOS_TEST_OTHER_HPP #define KOKKOS_TEST_OTHER_HPP -#include <TestTemplateMetaFunctions.hpp> #include <TestAggregate.hpp> +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC: +// NVC++-F-0000-Internal compiler error. Basic LLVM base data type required 23 +// (/ascldap/users/crtrott/Kokkos/kokkos/build/core/unit_test/cuda/TestCuda_Other.cpp: +// 204) NVC++/x86-64 Linux 22.3-0: compilation aborted #include <TestMemoryPool.hpp> +#endif #include <TestCXX11.hpp> #include <TestViewCtorPropEmbeddedDim.hpp> diff --git a/packages/kokkos/core/unit_test/TestParseCmdLineArgsAndEnvVars.cpp b/packages/kokkos/core/unit_test/TestParseCmdLineArgsAndEnvVars.cpp new file mode 100644 index 0000000000000000000000000000000000000000..11684f8a80abc9d6f6010f2f3d6b40287d513cdc --- /dev/null +++ b/packages/kokkos/core/unit_test/TestParseCmdLineArgsAndEnvVars.cpp @@ -0,0 +1,499 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <impl/Kokkos_ParseCommandLineArgumentsAndEnvironmentVariables.hpp> +#include <impl/Kokkos_InitializationSettings.hpp> +#include <impl/Kokkos_DeviceManagement.hpp> +#include <impl/Kokkos_Command_Line_Parsing.hpp> + +#include <cstdlib> +#include <memory> +#include <mutex> +#include <regex> +#include <string> +#include <unordered_map> + +namespace { + +class EnvVarsHelper { + // do not let GTest run unit tests that set the environment concurrently + static std::mutex mutex_; + std::vector<std::string> vars_; + // FIXME_CXX17 prefer optional + // store name of env var that was already set (if any) + // in which case unit test is skipped + std::unique_ptr<std::string> skip_; + + void setup(std::unordered_map<std::string, std::string> const& vars) { + for (auto const& x : vars) { + auto const& name = x.first; + auto const& value = x.second; + // skip unit test if env var is already set + if (getenv(name.c_str())) { + skip_ = std::make_unique<std::string>(name); + break; + } +#ifdef _WIN32 + int const error_code = _putenv((name + "=" + value).c_str()); +#else + int const error_code = + setenv(name.c_str(), value.c_str(), /*overwrite=*/0); +#endif + if (error_code != 0) { + std::cerr << "failed to set environment variable '" << name << "=" + << value << "'\n"; + std::abort(); + } + vars_.push_back(name); + } + } + void teardown() { + for (auto const& name : vars_) { +#ifdef _WIN32 + int const error_code = _putenv((name + "=").c_str()); +#else + int const error_code = unsetenv(name.c_str()); +#endif + if (error_code != 0) { + std::cerr << "failed to unset environment variable '" << name << "'\n"; + std::abort(); + } + } + } + + public: + auto& skip() { return skip_; } + EnvVarsHelper(std::unordered_map<std::string, std::string> const& vars) { + mutex_.lock(); + setup(vars); + } + EnvVarsHelper& operator=( + std::unordered_map<std::string, std::string> const& vars) { + teardown(); + setup(vars); + return *this; + } + ~EnvVarsHelper() { + teardown(); + mutex_.unlock(); + } + EnvVarsHelper(EnvVarsHelper&) = delete; + EnvVarsHelper& operator=(EnvVarsHelper&) = delete; + friend std::ostream& operator<<(std::ostream& os, EnvVarsHelper const& ev) { + for (auto const& name : ev.vars_) { + os << name << '=' << std::getenv(name.c_str()) << '\n'; + } + return os; + } +}; +std::mutex EnvVarsHelper::mutex_; +#define SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev) \ + if (ev.skip()) { \ + GTEST_SKIP() << "environment variable '" << *ev.skip() \ + << "' is already set"; \ + } \ + static_assert(true, "no-op to require trailing semicolon") + +class CmdLineArgsHelper { + int argc_; + std::vector<char*> argv_; + std::vector<std::unique_ptr<char[]>> args_; + + public: + CmdLineArgsHelper(std::vector<std::string> const& args) : argc_(args.size()) { + for (auto const& x : args) { + args_.emplace_back(new char[x.size() + 1]); + char* ptr = args_.back().get(); + strcpy(ptr, x.c_str()); + argv_.push_back(ptr); + } + argv_.push_back(nullptr); + } + int& argc() { return argc_; } + char** argv() { return argv_.data(); } +}; +#define EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, ...) \ + do { \ + std::vector<std::string> expected_argv = __VA_ARGS__; \ + \ + int expected_argc = expected_argv.size(); \ + EXPECT_EQ(cla.argc(), expected_argc); \ + for (int i = 0; i < expected_argc; ++i) { \ + EXPECT_EQ(cla.argv()[i], expected_argv[i]) \ + << "arguments differ at index " << i; \ + } \ + EXPECT_EQ(cla.argv()[cla.argc()], nullptr); \ + } while (false) + +TEST(defaultdevicetype, cmd_line_args_num_threads) { + CmdLineArgsHelper cla = {{ + "--foo=bar", + "--kokkos-num-threads=1", + "--kokkos-num-threads=2", + }}; + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + EXPECT_TRUE(settings.has_num_threads()); + EXPECT_EQ(settings.get_num_threads(), 2); + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {"--foo=bar"}); +} + +TEST(defaultdevicetype, cmd_line_args_device_id) { + CmdLineArgsHelper cla = {{ + "--kokkos-device-id=3", + "--dummy", + "--kokkos-device-id=4", + }}; + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + EXPECT_TRUE(settings.has_device_id()); + EXPECT_EQ(settings.get_device_id(), 4); + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {"--dummy"}); +} + +TEST(defaultdevicetype, cmd_line_args_num_devices) { + CmdLineArgsHelper cla = {{ + "--kokkos-num-devices=5,6", + "--kokkos-num-devices=7", + "-v", + }}; + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + EXPECT_TRUE(settings.has_num_devices()); + EXPECT_EQ(settings.get_num_devices(), 7); + // this is the current behavior, not suggesting this cannot be revisited + EXPECT_TRUE(settings.has_skip_device()) << "behavior changed see comment"; + EXPECT_EQ(settings.get_skip_device(), 6) << "behavior changed see comment"; + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {"-v"}); +} + +TEST(defaultdevicetype, cmd_line_args_disable_warning) { + CmdLineArgsHelper cla = {{ + "--kokkos-disable-warnings=1", + "--kokkos-disable-warnings=false", + }}; + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + EXPECT_TRUE(settings.has_disable_warnings()); + EXPECT_FALSE(settings.get_disable_warnings()); + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {}); +} + +TEST(defaultdevicetype, cmd_line_args_tune_internals) { + CmdLineArgsHelper cla = {{ + "--kokkos-tune-internals", + "--kokkos-num-threads=3", + }}; + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + EXPECT_TRUE(settings.has_tune_internals()); + EXPECT_TRUE(settings.get_tune_internals()); + EXPECT_TRUE(settings.has_num_threads()); + EXPECT_EQ(settings.get_num_threads(), 3); + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {}); +} + +TEST(defaultdevicetype, cmd_line_args_help) { + CmdLineArgsHelper cla = {{ + "--help", + }}; + Kokkos::InitializationSettings settings; + ::testing::internal::CaptureStdout(); + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + auto captured = ::testing::internal::GetCapturedStdout(); + // check that error message was only printed once + EXPECT_EQ(captured.find("--kokkos-help"), captured.rfind("--kokkos-help")); + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {"--help"}); + auto const help_message_length = captured.length(); + + cla = {{ + {"--kokkos-help"}, + }}; + ::testing::internal::CaptureStdout(); + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + captured = ::testing::internal::GetCapturedStdout(); + EXPECT_EQ(captured.length(), help_message_length); + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {}); + + cla = {{ + {"--kokkos-help"}, + {"--help"}, + {"--kokkos-help"}, + }}; + ::testing::internal::CaptureStdout(); + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + captured = ::testing::internal::GetCapturedStdout(); + EXPECT_EQ(captured.length(), help_message_length); + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {"--help"}); +} + +TEST(defaultdevicetype, cmd_line_args_tools_arguments) { + CmdLineArgsHelper cla = {{ + "--kokkos-tool-libs=ich_tue_nur.so", + }}; + Kokkos::InitializationSettings settings; + ::testing::internal::CaptureStderr(); + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + auto captured = ::testing::internal::GetCapturedStderr(); + EXPECT_TRUE(captured.find("not recognized") != std::string::npos && + captured.find("--kokkos-tool-libs=ich_tue_nur.so") != + std::string::npos && + !settings.has_tools_libs()) + << captured; + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS( + cla, {"--kokkos-tool-libs=ich_tue_nur.so"}); + + cla = {{ + "--kokkos-tools-libs=ich_tue_nur.so", + }}; + settings = {}; + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + EXPECT_TRUE(settings.has_tools_libs()); + EXPECT_EQ(settings.get_tools_libs(), "ich_tue_nur.so"); + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {}); +} + +TEST(defaultdevicetype, cmd_line_args_unrecognized_flag) { + CmdLineArgsHelper cla = {{ + "--kokkos_num_threads=4", // underscores instead of dashes + }}; + Kokkos::InitializationSettings settings; + ::testing::internal::CaptureStderr(); + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + auto captured = ::testing::internal::GetCapturedStderr(); + EXPECT_TRUE(captured.find("not recognized") != std::string::npos && + captured.find("--kokkos_num_threads=4") != std::string::npos && + !settings.has_num_threads()) + << captured; + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {"--kokkos_num_threads=4"}); + + cla = {{ + "-kokkos-num-threads=4", // missing one leading dash + }}; + ::testing::internal::CaptureStderr(); + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + captured = ::testing::internal::GetCapturedStderr(); + EXPECT_TRUE(captured.find("not recognized") != std::string::npos && + captured.find("-kokkos-num-threads=4") != std::string::npos && + !settings.has_num_threads()) + << captured; + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {"-kokkos-num-threads=4"}); + + cla = {{ + "--kokko-num-threads=4", // no warning when prefix misspelled + }}; + ::testing::internal::CaptureStderr(); + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + captured = ::testing::internal::GetCapturedStderr(); + EXPECT_TRUE(captured.empty() && !settings.has_num_threads()) << captured; + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, {"--kokko-num-threads=4"}); + + Kokkos::Impl::do_not_warn_not_recognized_command_line_argument( + std::regex{"^--kokkos-extension.*"}); + cla = {{ + "--kokkos-extension-option=value", // user explicitly asked not to warn + // about that prefix + }}; + ::testing::internal::CaptureStderr(); + Kokkos::Impl::parse_command_line_arguments(cla.argc(), cla.argv(), settings); + captured = ::testing::internal::GetCapturedStderr(); + EXPECT_TRUE(captured.empty()) << captured; + EXPECT_REMAINING_COMMAND_LINE_ARGUMENTS(cla, + {"--kokkos-extension-option=value"}); +} + +TEST(defaultdevicetype, env_vars_num_threads) { + EnvVarsHelper ev = {{ + {"KOKKOS_NUM_THREADS", "24"}, + {"KOKKOS_DISABLE_WARNINGS", "1"}, + }}; + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_environment_variables(settings); + EXPECT_TRUE(settings.has_num_threads()); + EXPECT_EQ(settings.get_num_threads(), 24); + EXPECT_TRUE(settings.has_disable_warnings()); + EXPECT_TRUE(settings.get_disable_warnings()); + + ev = {{ + {"KOKKOS_NUM_THREADS", "1ABC"}, + }}; + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); + settings = {}; + Kokkos::Impl::parse_environment_variables(settings); + EXPECT_TRUE(settings.has_num_threads()); + EXPECT_EQ(settings.get_num_threads(), 1); +} + +TEST(defaultdevicetype, env_vars_device_id) { + EnvVarsHelper ev = {{ + {"KOKKOS_DEVICE_ID", "33"}, + }}; + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_environment_variables(settings); + EXPECT_TRUE(settings.has_device_id()); + EXPECT_EQ(settings.get_device_id(), 33); +} + +TEST(defaultdevicetype, env_vars_num_devices) { + EnvVarsHelper ev = {{ + {"KOKKOS_NUM_DEVICES", "4"}, + {"KOKKOS_SKIP_DEVICE", "1"}, + }}; + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_environment_variables(settings); + EXPECT_TRUE(settings.has_num_devices()); + EXPECT_EQ(settings.get_num_devices(), 4); + EXPECT_TRUE(settings.has_skip_device()); + EXPECT_EQ(settings.get_skip_device(), 1); +} + +TEST(defaultdevicetype, env_vars_disable_warnings) { + for (auto const& value_true : {"1", "true", "TRUE", "yEs"}) { + EnvVarsHelper ev = {{ + {"KOKKOS_DISABLE_WARNINGS", value_true}, + }}; + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_environment_variables(settings); + EXPECT_TRUE(settings.has_disable_warnings()) + << "KOKKOS_DISABLE_WARNINGS=" << value_true; + EXPECT_TRUE(settings.get_disable_warnings()) + << "KOKKOS_DISABLE_WARNINGS=" << value_true; + } + for (auto const& value_false : {"0", "fAlse", "No"}) { + EnvVarsHelper ev = {{ + {"KOKKOS_DISABLE_WARNINGS", value_false}, + }}; + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_environment_variables(settings); + EXPECT_TRUE(settings.has_disable_warnings()) + << "KOKKOS_DISABLE_WARNINGS=" << value_false; + EXPECT_FALSE(settings.get_disable_warnings()) + << "KOKKOS_DISABLE_WARNINGS=" << value_false; + } +} + +TEST(defaultdevicetype, env_vars_tune_internals) { + for (auto const& value_true : {"1", "yES", "true", "TRUE", "tRuE"}) { + EnvVarsHelper ev = {{ + {"KOKKOS_TUNE_INTERNALS", value_true}, + }}; + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_environment_variables(settings); + EXPECT_TRUE(settings.has_tune_internals()) + << "KOKKOS_TUNE_INTERNALS=" << value_true; + EXPECT_TRUE(settings.get_tune_internals()) + << "KOKKOS_TUNE_INTERNALS=" << value_true; + } + for (auto const& value_false : {"0", "false", "no"}) { + EnvVarsHelper ev = {{ + {"KOKKOS_TUNE_INTERNALS", value_false}, + }}; + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); + Kokkos::InitializationSettings settings; + Kokkos::Impl::parse_environment_variables(settings); + EXPECT_TRUE(settings.has_tune_internals()) + << "KOKKOS_TUNE_INTERNALS=" << value_false; + EXPECT_FALSE(settings.get_tune_internals()) + << "KOKKOS_TUNE_INTERNALS=" << value_false; + } +} + +TEST(defaultdevicetype, visible_devices) { +#define KOKKOS_TEST_VISIBLE_DEVICES(ENV, CNT, DEV) \ + do { \ + EnvVarsHelper ev{ENV}; \ + SKIP_IF_ENVIRONMENT_VARIABLE_ALREADY_SET(ev); \ + Kokkos::InitializationSettings settings; \ + Kokkos::Impl::parse_environment_variables(settings); \ + auto computed = Kokkos::Impl::get_visible_devices(settings, CNT); \ + std::vector<int> expected = DEV; \ + EXPECT_EQ(expected.size(), computed.size()) \ + << ev << "device count: " << CNT; \ + auto n = std::min<int>(expected.size(), computed.size()); \ + for (int i = 0; i < n; ++i) { \ + EXPECT_EQ(expected[i], computed[i]) \ + << "devices differ at index " << i << '\n' \ + << ev << "device count: " << CNT; \ + } \ + } while (false) + +#define DEV(...) \ + std::vector<int> { __VA_ARGS__ } +#define ENV(...) std::unordered_map<std::string, std::string>{__VA_ARGS__} + + // first test with all environment variables that are involved in determining + // the visible devices so user set var do not mess up the logic below. + KOKKOS_TEST_VISIBLE_DEVICES( + ENV({"KOKKOS_VISIBLE_DEVICES", "2,1"}, {"KOKKOS_NUM_DEVICES", "8"}, + {"KOKKOS_SKIP_DEVICE", "1"}), + 6, DEV(2, 1)); + KOKKOS_TEST_VISIBLE_DEVICES( + ENV({"KOKKOS_VISIBLE_DEVICES", "2,1"}, {"KOKKOS_NUM_DEVICES", "8"}, ), 6, + DEV(2, 1)); + KOKKOS_TEST_VISIBLE_DEVICES(ENV({"KOKKOS_NUM_DEVICES", "3"}), 6, + DEV(0, 1, 2)); + KOKKOS_TEST_VISIBLE_DEVICES( + ENV({"KOKKOS_NUM_DEVICES", "4"}, {"KOKKOS_SKIP_DEVICE", "1"}, ), 6, + DEV(0, 2, 3)); + KOKKOS_TEST_VISIBLE_DEVICES(ENV({"KOKKOS_VISIBLE_DEVICES", "1,3,4"}), 6, + DEV(1, 3, 4)); + KOKKOS_TEST_VISIBLE_DEVICES( + ENV({"KOKKOS_VISIBLE_DEVICES", "2,1"}, {"KOKKOS_SKIP_DEVICE", "1"}, ), 6, + DEV(2, 1)); + KOKKOS_TEST_VISIBLE_DEVICES(ENV(), 4, DEV(0, 1, 2, 3)); + +#undef ENV +#undef DEV +#undef KOKKOS_TEST_VISIBLE_DEVICES +} + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp b/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp index 26eb22670134c96b1ace40e15777027fa9a7262b..b25acb455518493761029f1f1c3453a7e180171c 100644 --- a/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp +++ b/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -45,7 +45,6 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <stdexcept> #include <sstream> #include <iostream> #include <type_traits> @@ -76,7 +75,7 @@ class TestRangePolicyConstruction { typename execution_space::size_type>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Static>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -91,7 +90,7 @@ class TestRangePolicyConstruction { typename execution_space::size_type>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Static>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -107,7 +106,7 @@ class TestRangePolicyConstruction { typename execution_space::size_type>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -123,7 +122,7 @@ class TestRangePolicyConstruction { ASSERT_TRUE((std::is_same<index_type, long>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -139,7 +138,7 @@ class TestRangePolicyConstruction { ASSERT_TRUE((std::is_same<index_type, long>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -203,7 +202,7 @@ class TestRangePolicyConstruction { typename execution_space::size_type>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -219,7 +218,7 @@ class TestRangePolicyConstruction { ASSERT_TRUE((std::is_same<index_type, long>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -235,7 +234,7 @@ class TestRangePolicyConstruction { ASSERT_TRUE((std::is_same<index_type, long>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -351,7 +350,7 @@ class TestTeamPolicyConstruction { typename execution_space::size_type>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Static>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -366,7 +365,7 @@ class TestTeamPolicyConstruction { typename execution_space::size_type>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Static>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -382,7 +381,7 @@ class TestTeamPolicyConstruction { typename execution_space::size_type>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -398,7 +397,7 @@ class TestTeamPolicyConstruction { ASSERT_TRUE((std::is_same<index_type, long>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -414,7 +413,7 @@ class TestTeamPolicyConstruction { ASSERT_TRUE((std::is_same<index_type, long>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -478,7 +477,7 @@ class TestTeamPolicyConstruction { typename execution_space::size_type>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -494,7 +493,7 @@ class TestTeamPolicyConstruction { ASSERT_TRUE((std::is_same<index_type, long>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { @@ -510,7 +509,7 @@ class TestTeamPolicyConstruction { ASSERT_TRUE((std::is_same<index_type, long>::value)); ASSERT_TRUE((std::is_same<schedule_type, Kokkos::Schedule<Kokkos::Dynamic>>::value)); - ASSERT_TRUE((std::is_same<work_tag, void>::value)); + ASSERT_TRUE((std::is_void<work_tag>::value)); } { diff --git a/packages/kokkos/core/unit_test/TestQuadPrecisionMath.hpp b/packages/kokkos/core/unit_test/TestQuadPrecisionMath.hpp index e45d84e7e05b5beaed658fff20201968fd0d1050..3be6b70ecf0cc165b73d4f1a947e8d276f0222c2 100644 --- a/packages/kokkos/core/unit_test/TestQuadPrecisionMath.hpp +++ b/packages/kokkos/core/unit_test/TestQuadPrecisionMath.hpp @@ -50,6 +50,8 @@ #include <gtest/gtest.h> +namespace { + // FIXME instantiate only once for default host execution space TEST(TEST_CATEGORY, quad_precision_reductions) { int const n = 100; @@ -98,12 +100,52 @@ TEST(TEST_CATEGORY, quad_precision_common_math_functions) { Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, 1), KOKKOS_LAMBDA(int) { - (void)Kokkos::Experimental::fabs((__float128)0); - (void)Kokkos::Experimental::sqrt((__float128)1); - (void)Kokkos::Experimental::exp((__float128)2); - (void)Kokkos::Experimental::sin((__float128)3); - (void)Kokkos::Experimental::cosh((__float128)4); + (void)Kokkos::fabs((__float128)0); + (void)Kokkos::sqrt((__float128)1); + (void)Kokkos::exp((__float128)2); + (void)Kokkos::sin((__float128)3); + (void)Kokkos::cosh((__float128)4); }); } +#define STATIC_ASSERT(...) static_assert(__VA_ARGS__, "") // FIXME C++17 + +constexpr bool test_quad_precision_promotion_traits() { + STATIC_ASSERT( + std::is_same<__float128, decltype(Kokkos::pow(__float128(1), 2))>::value); + STATIC_ASSERT(std::is_same<__float128, + decltype(Kokkos::hypot(3, __float128(4)))>::value); + return true; +} + +STATIC_ASSERT(test_quad_precision_promotion_traits()); + +constexpr bool test_quad_precision_math_constants() { + // compare to mathematical constants defined in libquadmath when available + // clang-format off + STATIC_ASSERT(Kokkos::Experimental::e_v <__float128> == M_Eq); + STATIC_ASSERT(Kokkos::Experimental::log2e_v <__float128> == M_LOG2Eq); + STATIC_ASSERT(Kokkos::Experimental::log10e_v<__float128> == M_LOG10Eq); + STATIC_ASSERT(Kokkos::Experimental::pi_v <__float128> == M_PIq); +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU >= 930) + STATIC_ASSERT(Kokkos::Experimental::inv_pi_v<__float128> == M_1_PIq); +#endif + // inv_sqrtpi_v + STATIC_ASSERT(Kokkos::Experimental::ln2_v <__float128> == M_LN2q); + STATIC_ASSERT(Kokkos::Experimental::ln10_v <__float128> == M_LN10q); +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU >= 930) + STATIC_ASSERT(Kokkos::Experimental::sqrt2_v <__float128> == M_SQRT2q); +#endif + // sqrt3_v + // inv_sqrt3_v + // egamma_v + // phi_v + // clang-format on + return true; +} + +STATIC_ASSERT(test_quad_precision_math_constants()); + +} // namespace + #endif diff --git a/packages/kokkos/core/unit_test/TestRealloc.hpp b/packages/kokkos/core/unit_test/TestRealloc.hpp index 2b3e1ac3738223f1eaaa03d79a7ed757ee064d5a..3de42070e7d14b87e63f4dbea891c1cdd3f15693 100644 --- a/packages/kokkos/core/unit_test/TestRealloc.hpp +++ b/packages/kokkos/core/unit_test/TestRealloc.hpp @@ -71,81 +71,105 @@ void impl_testRealloc() { using view_type = Kokkos::View<int*, DeviceType>; view_type view_1d("view_1d", sizes[0]); const int* oldPointer = view_1d.data(); - EXPECT_TRUE(oldPointer != nullptr); + auto const& oldLabel = view_1d.label(); + EXPECT_NE(oldPointer, nullptr); realloc_dispatch(Tag{}, view_1d, sizes[0]); + auto const& newLabel = view_1d.label(); + EXPECT_EQ(oldLabel, newLabel); const int* newPointer = view_1d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int**, DeviceType>; view_type view_2d("view_2d", sizes[0], sizes[1]); + auto const& oldLabel = view_2d.label(); const int* oldPointer = view_2d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); realloc_dispatch(Tag{}, view_2d, sizes[0], sizes[1]); + auto const& newLabel = view_2d.label(); + EXPECT_EQ(oldLabel, newLabel); const int* newPointer = view_2d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int***, DeviceType>; view_type view_3d("view_3d", sizes[0], sizes[1], sizes[2]); + auto const& oldLabel = view_3d.label(); const int* oldPointer = view_3d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); realloc_dispatch(Tag{}, view_3d, sizes[0], sizes[1], sizes[2]); + auto const& newLabel = view_3d.label(); + EXPECT_EQ(oldLabel, newLabel); const int* newPointer = view_3d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int****, DeviceType>; view_type view_4d("view_4d", sizes[0], sizes[1], sizes[2], sizes[3]); + auto const& oldLabel = view_4d.label(); const int* oldPointer = view_4d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); realloc_dispatch(Tag{}, view_4d, sizes[0], sizes[1], sizes[2], sizes[3]); + auto const& newLabel = view_4d.label(); + EXPECT_EQ(oldLabel, newLabel); const int* newPointer = view_4d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int*****, DeviceType>; view_type view_5d("view_5d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4]); + auto const& oldLabel = view_5d.label(); const int* oldPointer = view_5d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); realloc_dispatch(Tag{}, view_5d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4]); + auto const& newLabel = view_5d.label(); + EXPECT_EQ(oldLabel, newLabel); const int* newPointer = view_5d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int******, DeviceType>; view_type view_6d("view_6d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5]); const int* oldPointer = view_6d.data(); - EXPECT_TRUE(oldPointer != nullptr); + auto const& oldLabel = view_6d.label(); + EXPECT_NE(oldPointer, nullptr); realloc_dispatch(Tag{}, view_6d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5]); + auto const& newLabel = view_6d.label(); + EXPECT_EQ(oldLabel, newLabel); const int* newPointer = view_6d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int*******, DeviceType>; view_type view_7d("view_7d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]); + auto const& oldLabel = view_7d.label(); const int* oldPointer = view_7d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); realloc_dispatch(Tag{}, view_7d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]); + auto const& newLabel = view_7d.label(); + EXPECT_EQ(oldLabel, newLabel); const int* newPointer = view_7d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int********, DeviceType>; view_type view_8d("view_8d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6], sizes[7]); + auto const& oldLabel = view_8d.label(); const int* oldPointer = view_8d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); realloc_dispatch(Tag{}, view_8d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6], sizes[7]); + auto const& newLabel = view_8d.label(); + EXPECT_EQ(oldLabel, newLabel); const int* newPointer = view_8d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } } diff --git a/packages/kokkos/core/unit_test/TestReduce.hpp b/packages/kokkos/core/unit_test/TestReduce.hpp index 161b21615febdd9064c94482429ddd5495514952..c136b409b1ed6d82e4f1a67856348328176562e1 100644 --- a/packages/kokkos/core/unit_test/TestReduce.hpp +++ b/packages/kokkos/core/unit_test/TestReduce.hpp @@ -42,7 +42,6 @@ //@HEADER */ -#include <stdexcept> #include <sstream> #include <iostream> #include <limits> @@ -82,7 +81,7 @@ class ReduceFunctor { */ KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dst, const volatile value_type& src) const { + void join(value_type& dst, const value_type& src) const { dst.value[0] += src.value[0]; dst.value[1] += src.value[1]; dst.value[2] += src.value[2]; @@ -129,8 +128,7 @@ class ReduceFunctorFinalTag { ReduceFunctorFinalTag(const size_type arg_nwork) : nwork(arg_nwork) {} KOKKOS_INLINE_FUNCTION - void join(const ReducerTag, volatile value_type& dst, - const volatile value_type& src) const { + void join(const ReducerTag, value_type& dst, const value_type& src) const { dst.value[0] += src.value[0]; dst.value[1] += src.value[1]; dst.value[2] += src.value[2]; @@ -174,7 +172,7 @@ class RuntimeReduceFunctor { } KOKKOS_INLINE_FUNCTION - void join(volatile ScalarType dst[], const volatile ScalarType src[]) const { + void join(ScalarType dst[], const ScalarType src[]) const { for (unsigned i = 0; i < value_count; ++i) dst[i] += src[i]; } @@ -218,7 +216,7 @@ class RuntimeReduceMinMax { } KOKKOS_INLINE_FUNCTION - void join(volatile ScalarType dst[], const volatile ScalarType src[]) const { + void join(ScalarType dst[], const ScalarType src[]) const { for (unsigned i = 0; i < value_count; ++i) { dst[i] = i % 2 ? (dst[i] < src[i] ? dst[i] : src[i]) // min : (dst[i] > src[i] ? dst[i] : src[i]); // max @@ -634,21 +632,35 @@ TEST(TEST_CATEGORY, int_combined_reduce_mixed) { constexpr uint64_t nw = 1000; uint64_t nsum = (nw / 2) * (nw + 1); - - auto result1_v = Kokkos::View<int64_t, Kokkos::HostSpace>{"result1_v"}; - - int64_t result2 = 0; - - auto result3_v = Kokkos::View<int64_t, Kokkos::HostSpace>{"result3_v"}; - - Kokkos::parallel_reduce("int_combined-reduce_mixed", - Kokkos::RangePolicy<TEST_EXECSPACE>(0, nw), - functor_type(nw), result1_v, result2, - Kokkos::Sum<int64_t, Kokkos::HostSpace>{result3_v}); - - ASSERT_EQ(int64_t(nw), result1_v()); - ASSERT_EQ(int64_t(nsum), result2); - ASSERT_EQ(int64_t(nsum), result3_v()); + { + auto result1_v = Kokkos::View<int64_t, Kokkos::HostSpace>{"result1_v"}; + int64_t result2 = 0; + auto result3_v = Kokkos::View<int64_t, Kokkos::HostSpace>{"result3_v"}; + Kokkos::parallel_reduce("int_combined-reduce_mixed", + Kokkos::RangePolicy<TEST_EXECSPACE>(0, nw), + functor_type(nw), result1_v, result2, + Kokkos::Sum<int64_t, Kokkos::HostSpace>{result3_v}); + ASSERT_EQ(int64_t(nw), result1_v()); + ASSERT_EQ(int64_t(nsum), result2); + ASSERT_EQ(int64_t(nsum), result3_v()); + } + { + using MemorySpace = typename TEST_EXECSPACE::memory_space; + auto result1_v = Kokkos::View<int64_t, MemorySpace>{"result1_v"}; + int64_t result2 = 0; + auto result3_v = Kokkos::View<int64_t, MemorySpace>{"result3_v"}; + Kokkos::parallel_reduce("int_combined-reduce_mixed", + Kokkos::RangePolicy<TEST_EXECSPACE>(0, nw), + functor_type(nw), result1_v, result2, + Kokkos::Sum<int64_t, MemorySpace>{result3_v}); + int64_t result1; + Kokkos::deep_copy(result1, result1_v); + ASSERT_EQ(int64_t(nw), result1); + ASSERT_EQ(int64_t(nsum), result2); + int64_t result3; + Kokkos::deep_copy(result3, result3_v); + ASSERT_EQ(int64_t(nsum), result3); + } } #endif } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestReduceCombinatorical.hpp b/packages/kokkos/core/unit_test/TestReduceCombinatorical.hpp index 4664f265594b858e8879e7d2faa3aca62d320a0d..2217b9b8a3f46fc28c19d9cc1479e716a9e32903 100644 --- a/packages/kokkos/core/unit_test/TestReduceCombinatorical.hpp +++ b/packages/kokkos/core/unit_test/TestReduceCombinatorical.hpp @@ -42,7 +42,6 @@ //@HEADER */ -#include <stdexcept> #include <sstream> #include <iostream> #include <limits> @@ -73,11 +72,6 @@ struct AddPlus { KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { dest += src + 1; } - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest += src + 1; - } - // Optional. KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val = value_type(); } @@ -195,9 +189,7 @@ struct FunctorScalarJoin<0> { void operator()(const int& i, double& update) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { - dst += update; - } + void join(double& dst, const double& update) const { dst += update; } }; template <> @@ -214,9 +206,7 @@ struct FunctorScalarJoin<1> { } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { - dst += update; - } + void join(double& dst, const double& update) const { dst += update; } }; template <int ISTEAM> @@ -232,9 +222,7 @@ struct FunctorScalarJoinFinal<0> { void operator()(const int& i, double& update) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { - dst += update; - } + void join(double& dst, const double& update) const { dst += update; } KOKKOS_INLINE_FUNCTION void final(double& update) const { result() = update; } @@ -254,9 +242,7 @@ struct FunctorScalarJoinFinal<1> { } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { - dst += update; - } + void join(double& dst, const double& update) const { dst += update; } KOKKOS_INLINE_FUNCTION void final(double& update) const { result() = update; } @@ -275,9 +261,7 @@ struct FunctorScalarJoinInit<0> { void operator()(const int& i, double& update) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { - dst += update; - } + void join(double& dst, const double& update) const { dst += update; } KOKKOS_INLINE_FUNCTION void init(double& update) const { update = 0.0; } @@ -297,9 +281,7 @@ struct FunctorScalarJoinInit<1> { } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { - dst += update; - } + void join(double& dst, const double& update) const { dst += update; } KOKKOS_INLINE_FUNCTION void init(double& update) const { update = 0.0; } @@ -318,9 +300,7 @@ struct FunctorScalarJoinFinalInit<0> { void operator()(const int& i, double& update) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { - dst += update; - } + void join(double& dst, const double& update) const { dst += update; } KOKKOS_INLINE_FUNCTION void final(double& update) const { result() = update; } @@ -343,9 +323,7 @@ struct FunctorScalarJoinFinalInit<1> { } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { - dst += update; - } + void join(double& dst, const double& update) const { dst += update; } KOKKOS_INLINE_FUNCTION void final(double& update) const { result() = update; } @@ -379,7 +357,7 @@ struct Functor2 { } KOKKOS_INLINE_FUNCTION - void join(volatile double dst[], const volatile double src[]) const { + void join(double dst[], const double src[]) const { for (unsigned i = 0; i < value_count; ++i) dst[i] += src[i]; } }; @@ -545,9 +523,9 @@ struct TestReduceCombinatoricalInstantiation { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA AddLambdaRange( N, - typename std::conditional< + std::conditional_t< std::is_same<ExecSpace, Kokkos::DefaultExecutionSpace>::value, - void*, Kokkos::InvalidType>::type(), + void*, Kokkos::InvalidType>(), args...); #endif } @@ -558,9 +536,9 @@ struct TestReduceCombinatoricalInstantiation { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA AddLambdaTeam( N, - typename std::conditional< + std::conditional_t< std::is_same<ExecSpace, Kokkos::DefaultExecutionSpace>::value, - void*, Kokkos::InvalidType>::type(), + void*, Kokkos::InvalidType>(), args...); #endif } diff --git a/packages/kokkos/core/unit_test/TestReducers.hpp b/packages/kokkos/core/unit_test/TestReducers.hpp index 7584227945e8f8fc35c398c05507351de4deab66..10e0f3e9544fc2344b3b9c8387cd34ed1d5a70d3 100644 --- a/packages/kokkos/core/unit_test/TestReducers.hpp +++ b/packages/kokkos/core/unit_test/TestReducers.hpp @@ -42,7 +42,6 @@ //@HEADER */ -#include <stdexcept> #include <sstream> #include <iostream> #include <limits> @@ -296,8 +295,24 @@ struct TestReducers { Scalar reference_sum = 0; for (int i = 0; i < N; i++) { - int denom = sizeof(Scalar) <= 2 ? 10 : 100; - h_values(i) = (Scalar)(rand() % denom); + int denom = sizeof(Scalar) <= 2 ? 10 : 100; + // clang-format off + // For bhalf, we start overflowing integer values at 2^8. + // after 2^8, we lose representation of odd numbers; + // after 2^9, we lose representation of odd and even numbers in position 1. + // after 2^10, we lose representation of odd and even numbers in position 1-3. + // after 2^11, we lose representation of odd and even numbers in position 1-7. + // ... + // Generally, for IEEE 754 floating point numbers, we start this overflow pattern at: 2^(num_fraction_bits+1). + // brain float has num_fraction_bits = 7. + // This mask addresses #4719 for N <= 51. + // The mask is not needed for N <= 25. + // clang-format on + int mask = + std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value && N > 25 + ? (int)0xfffffffe + : (int)0xffffffff; + h_values(i) = (Scalar)((rand() % denom) & mask); reference_sum += h_values(i); } Kokkos::deep_copy(values, h_values); @@ -314,19 +329,19 @@ struct TestReducers { Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, 0), f, reducer_scalar); - ASSERT_EQ(sum_scalar, init); + ASSERT_EQ(sum_scalar, init) << "N: " << N; Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, N), f, reducer_scalar); - ASSERT_EQ(sum_scalar, reference_sum); + ASSERT_EQ(sum_scalar, reference_sum) << "N: " << N; sum_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace, ReducerTag>(0, N), f_tag, reducer_scalar); - ASSERT_EQ(sum_scalar, reference_sum); + ASSERT_EQ(sum_scalar, reference_sum) << "N: " << N; Scalar sum_scalar_view = reducer_scalar.reference(); - ASSERT_EQ(sum_scalar_view, reference_sum); + ASSERT_EQ(sum_scalar_view, reference_sum) << "N: " << N; } { @@ -337,16 +352,16 @@ struct TestReducers { reducer_view); Kokkos::fence(); Scalar sum_view_scalar = sum_view(); - ASSERT_EQ(sum_view_scalar, init); + ASSERT_EQ(sum_view_scalar, init) << "N: " << N; Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, N), f, reducer_view); Kokkos::fence(); sum_view_scalar = sum_view(); - ASSERT_EQ(sum_view_scalar, reference_sum); + ASSERT_EQ(sum_view_scalar, reference_sum) << "N: " << N; Scalar sum_view_view = reducer_view.reference(); - ASSERT_EQ(sum_view_view, reference_sum); + ASSERT_EQ(sum_view_view, reference_sum) << "N: " << N; } { @@ -359,13 +374,13 @@ struct TestReducers { Kokkos::fence(); Scalar sum_view_scalar; Kokkos::deep_copy(sum_view_scalar, sum_view); - ASSERT_EQ(sum_view_scalar, init); + ASSERT_EQ(sum_view_scalar, init) << "N: " << N; Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0, N), f, reducer_view); Kokkos::fence(); Kokkos::deep_copy(sum_view_scalar, sum_view); - ASSERT_EQ(sum_view_scalar, reference_sum); + ASSERT_EQ(sum_view_scalar, reference_sum) << "N: " << N; } } diff --git a/packages/kokkos/core/unit_test/TestReducers_d.hpp b/packages/kokkos/core/unit_test/TestReducers_d.hpp index 67f30e6cf238a8c3362012e110bd716cde1820fe..a84a6e20f0456c367a6f49a335db0f71a7a9e6bb 100644 --- a/packages/kokkos/core/unit_test/TestReducers_d.hpp +++ b/packages/kokkos/core/unit_test/TestReducers_d.hpp @@ -79,50 +79,19 @@ TEST(TEST_CATEGORY, reducers_half_t) { TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(25); } -// TODO: File a bug report for this? -// This fails on the CUDA-11.0-NVCC-C++17-RDC CI check. -// TEST(TEST_CATEGORY, openmp_cuda11_reduction_bug_with_bhalf_t) { -// using ThisTestType = Kokkos::Experimental::bhalf_t; -// TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(50); -// TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(51); -// // For some reason commenting out reductions of 52,53,54,55 causes -// // the reduction of 56 to fail on OpenMP with Cuda/11.0 -// //TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(52); -// //TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(53); -// //TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(54); -// //TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(55); -// TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(56); -//} - TEST(TEST_CATEGORY, reducers_bhalf_t) { -#if defined(KOKKOS_ENABLE_OPENMP) - if (!std::is_same<TEST_EXECSPACE, Kokkos::OpenMP>::value) -#else - if (true) -#endif // ENABLE_OPENMP - { - using ThisTestType = Kokkos::Experimental::bhalf_t; - TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(2); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(50); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(51); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(52); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(53); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(54); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(55); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(56); - // TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(57); - // This could be 57 on device but there seems to be a loss of precision when - // running on OpenMP with Cuda/11.0 - TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(5); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(10); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(15); -#if (CUDA_VERSION < 11000) - TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(20); - TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(21); -#endif - } else { - GTEST_SKIP(); - } + using ThisTestType = Kokkos::Experimental::bhalf_t; + + TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(2); + TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(25); + TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(50); + TestReducers<ThisTestType, TEST_EXECSPACE>::test_sum(51); + + TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(5); + TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(10); + TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(15); + TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(20); + TestReducers<ThisTestType, TEST_EXECSPACE>::test_prod(25); } TEST(TEST_CATEGORY, reducers_int8_t) { diff --git a/packages/kokkos/core/unit_test/TestResize.hpp b/packages/kokkos/core/unit_test/TestResize.hpp index cf5c0df6f9163039fbd3ca1df8aee2a4b24ac882..cd1fde4a9da716b39723a5755209f5d831215927 100644 --- a/packages/kokkos/core/unit_test/TestResize.hpp +++ b/packages/kokkos/core/unit_test/TestResize.hpp @@ -71,81 +71,81 @@ void impl_testResize() { using view_type = Kokkos::View<int*, DeviceType>; view_type view_1d("view_1d", sizes[0]); const int* oldPointer = view_1d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); resize_dispatch(Tag{}, view_1d, sizes[0]); const int* newPointer = view_1d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int**, DeviceType>; view_type view_2d("view_2d", sizes[0], sizes[1]); const int* oldPointer = view_2d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); resize_dispatch(Tag{}, view_2d, sizes[0], sizes[1]); const int* newPointer = view_2d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int***, DeviceType>; view_type view_3d("view_3d", sizes[0], sizes[1], sizes[2]); const int* oldPointer = view_3d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); resize_dispatch(Tag{}, view_3d, sizes[0], sizes[1], sizes[2]); const int* newPointer = view_3d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int****, DeviceType>; view_type view_4d("view_4d", sizes[0], sizes[1], sizes[2], sizes[3]); const int* oldPointer = view_4d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); resize_dispatch(Tag{}, view_4d, sizes[0], sizes[1], sizes[2], sizes[3]); const int* newPointer = view_4d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int*****, DeviceType>; view_type view_5d("view_5d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4]); const int* oldPointer = view_5d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); resize_dispatch(Tag{}, view_5d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4]); const int* newPointer = view_5d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int******, DeviceType>; view_type view_6d("view_6d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5]); const int* oldPointer = view_6d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); resize_dispatch(Tag{}, view_6d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5]); const int* newPointer = view_6d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int*******, DeviceType>; view_type view_7d("view_7d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]); const int* oldPointer = view_7d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); resize_dispatch(Tag{}, view_7d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]); const int* newPointer = view_7d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } { using view_type = Kokkos::View<int********, DeviceType>; view_type view_8d("view_8d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6], sizes[7]); const int* oldPointer = view_8d.data(); - EXPECT_TRUE(oldPointer != nullptr); + EXPECT_NE(oldPointer, nullptr); resize_dispatch(Tag{}, view_8d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6], sizes[7]); const int* newPointer = view_8d.data(); - EXPECT_TRUE(oldPointer == newPointer); + EXPECT_EQ(oldPointer, newPointer); } // Resize without initialization: check if data preserved { @@ -156,7 +156,7 @@ void impl_testResize() { Kokkos::deep_copy(view_1d, 111); Kokkos::deep_copy(h_view_1d_old, view_1d); resize_dispatch(Tag{}, view_1d, 2 * sizes[0]); - EXPECT_TRUE(view_1d.extent(0) == 2 * sizes[0]); + EXPECT_EQ(view_1d.extent(0), 2 * sizes[0]); typename view_type::HostMirror h_view_1d = Kokkos::create_mirror_view(view_1d); Kokkos::deep_copy(h_view_1d, view_1d); @@ -167,7 +167,7 @@ void impl_testResize() { break; } } - EXPECT_TRUE(test == true); + EXPECT_TRUE(test); } { using view_type = Kokkos::View<int**, DeviceType>; @@ -177,7 +177,7 @@ void impl_testResize() { Kokkos::deep_copy(view_2d, 222); Kokkos::deep_copy(h_view_2d_old, view_2d); resize_dispatch(Tag{}, view_2d, 2 * sizes[0], sizes[1]); - EXPECT_TRUE(view_2d.extent(0) == 2 * sizes[0]); + EXPECT_EQ(view_2d.extent(0), 2 * sizes[0]); typename view_type::HostMirror h_view_2d = Kokkos::create_mirror_view(view_2d); Kokkos::deep_copy(h_view_2d, view_2d); @@ -190,7 +190,7 @@ void impl_testResize() { } } } - EXPECT_TRUE(test == true); + EXPECT_TRUE(test); } { using view_type = Kokkos::View<int***, DeviceType>; @@ -200,7 +200,7 @@ void impl_testResize() { Kokkos::deep_copy(view_3d, 333); Kokkos::deep_copy(h_view_3d_old, view_3d); resize_dispatch(Tag{}, view_3d, 2 * sizes[0], sizes[1], sizes[2]); - EXPECT_TRUE(view_3d.extent(0) == 2 * sizes[0]); + EXPECT_EQ(view_3d.extent(0), 2 * sizes[0]); typename view_type::HostMirror h_view_3d = Kokkos::create_mirror_view(view_3d); Kokkos::deep_copy(h_view_3d, view_3d); @@ -215,7 +215,7 @@ void impl_testResize() { } } } - EXPECT_TRUE(test == true); + EXPECT_TRUE(test); } { using view_type = Kokkos::View<int****, DeviceType>; @@ -225,7 +225,7 @@ void impl_testResize() { Kokkos::deep_copy(view_4d, 444); Kokkos::deep_copy(h_view_4d_old, view_4d); resize_dispatch(Tag{}, view_4d, 2 * sizes[0], sizes[1], sizes[2], sizes[3]); - EXPECT_TRUE(view_4d.extent(0) == 2 * sizes[0]); + EXPECT_EQ(view_4d.extent(0), 2 * sizes[0]); typename view_type::HostMirror h_view_4d = Kokkos::create_mirror_view(view_4d); Kokkos::deep_copy(h_view_4d, view_4d); @@ -242,7 +242,7 @@ void impl_testResize() { } } } - EXPECT_TRUE(test == true); + EXPECT_TRUE(test); } { using view_type = Kokkos::View<int*****, DeviceType>; @@ -254,7 +254,7 @@ void impl_testResize() { Kokkos::deep_copy(h_view_5d_old, view_5d); resize_dispatch(Tag{}, view_5d, 2 * sizes[0], sizes[1], sizes[2], sizes[3], sizes[4]); - EXPECT_TRUE(view_5d.extent(0) == 2 * sizes[0]); + EXPECT_EQ(view_5d.extent(0), 2 * sizes[0]); typename view_type::HostMirror h_view_5d = Kokkos::create_mirror_view(view_5d); Kokkos::deep_copy(h_view_5d, view_5d); @@ -274,7 +274,7 @@ void impl_testResize() { } } } - EXPECT_TRUE(test == true); + EXPECT_TRUE(test); } { using view_type = Kokkos::View<int******, DeviceType>; @@ -286,7 +286,7 @@ void impl_testResize() { Kokkos::deep_copy(h_view_6d_old, view_6d); resize_dispatch(Tag{}, view_6d, 2 * sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5]); - EXPECT_TRUE(view_6d.extent(0) == 2 * sizes[0]); + EXPECT_EQ(view_6d.extent(0), 2 * sizes[0]); typename view_type::HostMirror h_view_6d = Kokkos::create_mirror_view(view_6d); Kokkos::deep_copy(h_view_6d, view_6d); @@ -308,7 +308,7 @@ void impl_testResize() { } } } - EXPECT_TRUE(test == true); + EXPECT_TRUE(test); } { using view_type = Kokkos::View<int*******, DeviceType>; @@ -320,7 +320,7 @@ void impl_testResize() { Kokkos::deep_copy(h_view_7d_old, view_7d); resize_dispatch(Tag{}, view_7d, 2 * sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]); - EXPECT_TRUE(view_7d.extent(0) == 2 * sizes[0]); + EXPECT_EQ(view_7d.extent(0), 2 * sizes[0]); typename view_type::HostMirror h_view_7d = Kokkos::create_mirror_view(view_7d); Kokkos::deep_copy(h_view_7d, view_7d); @@ -344,7 +344,7 @@ void impl_testResize() { } } } - EXPECT_TRUE(test == true); + EXPECT_TRUE(test); } { using view_type = Kokkos::View<int********, DeviceType>; @@ -356,7 +356,7 @@ void impl_testResize() { Kokkos::deep_copy(h_view_8d_old, view_8d); resize_dispatch(Tag{}, view_8d, 2 * sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6], sizes[7]); - EXPECT_TRUE(view_8d.extent(0) == 2 * sizes[0]); + EXPECT_EQ(view_8d.extent(0), 2 * sizes[0]); typename view_type::HostMirror h_view_8d = Kokkos::create_mirror_view(view_8d); Kokkos::deep_copy(h_view_8d, view_8d); @@ -382,7 +382,7 @@ void impl_testResize() { } } } - EXPECT_TRUE(test == true); + EXPECT_TRUE(test); } } diff --git a/packages/kokkos/core/unit_test/TestScan.hpp b/packages/kokkos/core/unit_test/TestScan.hpp index 67cb85553d6bf7ccd9cc76b85f7bc32bb0e2e5a7..1a4056af07d3f9584b105cd536e5abca051b30c2 100644 --- a/packages/kokkos/core/unit_test/TestScan.hpp +++ b/packages/kokkos/core/unit_test/TestScan.hpp @@ -88,8 +88,7 @@ struct TestScan { void init(value_type& update) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& update, - volatile const value_type& input) const { + void join(value_type& update, const value_type& input) const { update += input; } diff --git a/packages/kokkos/core/unit_test/TestSharedAlloc.hpp b/packages/kokkos/core/unit_test/TestSharedAlloc.hpp index 46534eeb13db2f4816d352d2ef302298b97d8486..f66b35dc9f9de2a8b3eabc0338683f3875dbd2b1 100644 --- a/packages/kokkos/core/unit_test/TestSharedAlloc.hpp +++ b/packages/kokkos/core/unit_test/TestSharedAlloc.hpp @@ -44,7 +44,6 @@ #include <gtest/gtest.h> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestStringManipulation.cpp b/packages/kokkos/core/unit_test/TestStringManipulation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..92b2afa475f1a5274cd3bbf80e4834542f7c7dd7 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestStringManipulation.cpp @@ -0,0 +1,217 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <impl/Kokkos_StringManipulation.hpp> +#include <climits> + +namespace { + +#define STATIC_ASSERT(cond) static_assert(cond, "") + +KOKKOS_FUNCTION constexpr bool test_strlen() { + using Kokkos::Impl::strlen; + constexpr char str[] = "How many characters does this string contain?"; + STATIC_ASSERT(strlen(str) == 45); // without null character + STATIC_ASSERT(sizeof str == 46); // with null character + STATIC_ASSERT(strlen("") == 0); + return true; +} +STATIC_ASSERT(test_strlen()); + +KOKKOS_FUNCTION constexpr bool test_strcmp() { + using Kokkos::Impl::strcmp; + constexpr char cat1[] = "Heathcliff"; + constexpr char cat2[] = "Snagglepuss"; + constexpr char cat3[] = "Hobbes"; + constexpr char cat4[] = "Garfield"; + STATIC_ASSERT(strcmp(cat1, cat1) == 0); +#if (!defined(KOKKOS_COMPILER_NVCC) || \ + ((__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 3))) && \ + (!defined(__INTEL_COMPILER_BUILD_DATE) || \ + (__INTEL_COMPILER_BUILD_DATE >= 20210228)) + STATIC_ASSERT(strcmp(cat1, cat2) < 0); + STATIC_ASSERT(strcmp(cat1, cat3) < 0); +#endif + STATIC_ASSERT(strcmp(cat1, cat4) > 0); + STATIC_ASSERT(strcmp(cat2, cat2) == 0); + STATIC_ASSERT(strcmp(cat2, cat3) > 0); + STATIC_ASSERT(strcmp(cat2, cat4) > 0); + STATIC_ASSERT(strcmp(cat3, cat3) == 0); + STATIC_ASSERT(strcmp(cat3, cat4) > 0); + STATIC_ASSERT(strcmp(cat4, cat4) == 0); + return true; +} +STATIC_ASSERT(test_strcmp()); + +KOKKOS_FUNCTION constexpr bool test_strncmp() { + using Kokkos::Impl::strncmp; + constexpr char greet1[] = "Hello, world!"; + constexpr char greet2[] = "Hello, everybody!"; + constexpr char greet3[] = "Hello, somebody!"; + STATIC_ASSERT(strncmp(greet1, greet2, 13) > 0); + STATIC_ASSERT(strncmp(greet2, greet1, 13) < 0); + STATIC_ASSERT(strncmp(greet2, greet1, 7) == 0); +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 610) + (void)greet3; +#elif defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 710) + STATIC_ASSERT(strncmp(&greet2[12], &greet3[11], 5) == 0); +#else + STATIC_ASSERT(strncmp(greet2 + 12, greet3 + 11, 5) == 0); +#endif + STATIC_ASSERT(strncmp(greet1, greet2, 0) == 0); + return true; +} +STATIC_ASSERT(test_strncmp()); + +KOKKOS_FUNCTION constexpr bool strcpy_helper(const char* dest, const char* src, + const char* ref) { + using Kokkos::Impl::strcmp; + using Kokkos::Impl::strcpy; + char buffer[50] = {}; + strcpy(buffer, dest); + strcpy(buffer, src); + return strcmp(buffer, ref) == 0; +} + +KOKKOS_FUNCTION constexpr bool test_strcpy() { + STATIC_ASSERT(strcpy_helper("abcdef", "hi", "hi\0\0\0f")); + return true; +} +STATIC_ASSERT(test_strcpy()); + +KOKKOS_FUNCTION constexpr bool strncpy_helper(const char* dest, const char* src, + std::size_t count, + const char* ref) { + using Kokkos::Impl::strcmp; + using Kokkos::Impl::strlen; + using Kokkos::Impl::strncpy; + char buffer[50] = {}; + strncpy(buffer, dest, strlen(dest)); + strncpy(buffer, src, count); + return strcmp(buffer, ref) == 0; +} + +KOKKOS_FUNCTION constexpr bool test_strncpy() { + STATIC_ASSERT(strncpy_helper("abcdef", "hi", 5, "hi\0\0\0f")); + STATIC_ASSERT(strncpy_helper("abcdef", "hi", 0, "abcdef")); + return true; +} +STATIC_ASSERT(test_strncpy()); + +KOKKOS_FUNCTION constexpr bool strcat_helper(const char* dest, const char* src, + const char* ref) { + using Kokkos::Impl::strcat; + using Kokkos::Impl::strcmp; + char buffer[50] = {}; + strcat(buffer, dest); + strcat(buffer, src); + return strcmp(buffer, ref) == 0; +} + +KOKKOS_FUNCTION constexpr bool test_strcat() { + STATIC_ASSERT(strcat_helper("Hello ", "World!", "Hello World!")); + STATIC_ASSERT(strcat_helper("Hello World!", " Goodbye World!", + "Hello World! Goodbye World!")); + return true; +} +STATIC_ASSERT(test_strcat()); + +KOKKOS_FUNCTION constexpr bool strncat_helper(const char* dest, const char* src, + std::size_t count, + const char* ref) { + using Kokkos::Impl::strcmp; + using Kokkos::Impl::strlen; + using Kokkos::Impl::strncat; + char buffer[50] = {}; + strncat(buffer, dest, strlen(dest)); + strncat(buffer, src, count); + return strcmp(buffer, ref) == 0; +} + +KOKKOS_FUNCTION constexpr bool test_strncat() { + STATIC_ASSERT( + strncat_helper("Hello World!", " Goodbye World!", 3, "Hello World! Go")); + STATIC_ASSERT( + strncat_helper("Hello World!", " Goodbye World!", 0, "Hello World!")); + return true; +} +STATIC_ASSERT(test_strncat()); + +#if !defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU >= 540) +template <class Integral> +KOKKOS_FUNCTION constexpr bool to_chars_helper(Integral val, char const* ref) { + using Kokkos::Impl::strcmp; + using Kokkos::Impl::strlen; + using Kokkos::Impl::to_chars_i; + constexpr int BUFFER_SIZE = 21; + char buffer[BUFFER_SIZE] = {}; + return (buffer + strlen(ref) == + to_chars_i(buffer, buffer + BUFFER_SIZE, val).ptr) && + (strcmp(buffer, ref) == 0); +} + +KOKKOS_FUNCTION constexpr bool test_to_chars() { + STATIC_ASSERT(to_chars_helper(0, "0")); + STATIC_ASSERT(to_chars_helper(123, "123")); + STATIC_ASSERT(to_chars_helper(-456, "-456")); + STATIC_ASSERT(to_chars_helper(INT_MAX, "2147483647")); + STATIC_ASSERT(to_chars_helper(INT_MIN, "-2147483648")); + + STATIC_ASSERT(to_chars_helper(0u, "0")); + STATIC_ASSERT(to_chars_helper(78u, "78")); + STATIC_ASSERT(to_chars_helper(UINT_MAX, "4294967295")); + + STATIC_ASSERT(to_chars_helper(0ll, "0")); + STATIC_ASSERT(to_chars_helper(LLONG_MAX, "9223372036854775807")); + STATIC_ASSERT(to_chars_helper(LLONG_MIN, "-9223372036854775808")); + + STATIC_ASSERT(to_chars_helper(0ull, "0")); + STATIC_ASSERT(to_chars_helper(ULLONG_MAX, "18446744073709551615")); + + return true; +} +STATIC_ASSERT(test_to_chars()); +#endif + +} // namespace diff --git a/packages/kokkos/core/unit_test/TestTaskScheduler.hpp b/packages/kokkos/core/unit_test/TestTaskScheduler.hpp index 6b9cd2c90f5d3b9a999e33bbbe7400c53b15aaf1..62fd68b6cb70c3c387fee6d1f8175ef212cfdb79 100644 --- a/packages/kokkos/core/unit_test/TestTaskScheduler.hpp +++ b/packages/kokkos/core/unit_test/TestTaskScheduler.hpp @@ -48,7 +48,6 @@ #include <Kokkos_Macros.hpp> #if defined(KOKKOS_ENABLE_TASKDAG) #include <Kokkos_Core.hpp> -#include <impl/Kokkos_FixedBufferMemoryPool.hpp> #include <cstdio> #include <iostream> #include <cmath> diff --git a/packages/kokkos/core/unit_test/TestTeam.hpp b/packages/kokkos/core/unit_test/TestTeam.hpp index cade6b0243ff4554fd7b023b67f5d7f3b9086890..f1d0f9cb3b8a37f35f9b4962e2f183f26701072c 100644 --- a/packages/kokkos/core/unit_test/TestTeam.hpp +++ b/packages/kokkos/core/unit_test/TestTeam.hpp @@ -43,7 +43,6 @@ */ #include <cstdio> -#include <stdexcept> #include <sstream> #include <iostream> @@ -62,17 +61,20 @@ struct TestTeamPolicy { view_type m_flags; TestTeamPolicy(const size_t league_size) - : m_flags( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), + : m_flags(Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 #ifdef KOKKOS_ENABLE_OPENMPTARGET - Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 32).team_size_max( - *this, Kokkos::ParallelReduceTag()), + Kokkos::TeamPolicy<ScheduleType, ExecSpace>( + 1, std::is_same<ExecSpace, + Kokkos::Experimental::OpenMPTarget>::value + ? 32 + : 1) + .team_size_max(*this, Kokkos::ParallelReduceTag()), #else - Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 1).team_size_max( - *this, Kokkos::ParallelReduceTag()), + Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 1).team_size_max( + *this, Kokkos::ParallelReduceTag()), #endif - league_size) { + league_size) { } struct VerifyInitTag {}; @@ -131,8 +133,12 @@ struct TestTeamPolicy { constexpr const int smallest_work = 1; // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 #ifdef KOKKOS_ENABLE_OPENMPTARGET - Kokkos::TeamPolicy<ExecSpace, NoOpTag> none_auto(smallest_work, 32, - smallest_work); + Kokkos::TeamPolicy<ExecSpace, NoOpTag> none_auto( + smallest_work, + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? 32 + : smallest_work, + smallest_work); #else Kokkos::TeamPolicy<ExecSpace, NoOpTag> none_auto( smallest_work, smallest_work, smallest_work); @@ -143,8 +149,12 @@ struct TestTeamPolicy { (void)both_auto; // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 #ifdef KOKKOS_ENABLE_OPENMPTARGET - Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_vector(smallest_work, 32, - Kokkos::AUTO()); + Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_vector( + smallest_work, + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? 32 + : smallest_work, + Kokkos::AUTO()); #else Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_vector( smallest_work, smallest_work, Kokkos::AUTO()); @@ -166,10 +176,18 @@ struct TestTeamPolicy { // 32 #ifdef KOKKOS_ENABLE_OPENMPTARGET const int team_size = - policy_type(league_size, 32) + policy_type( + league_size, + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? 32 + : 1) .team_size_max(functor, Kokkos::ParallelForTag()); const int team_size_init = - policy_type_init(league_size, 32) + policy_type_init( + league_size, + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? 32 + : 1) .team_size_max(functor, Kokkos::ParallelForTag()); #else const int team_size = @@ -215,7 +233,11 @@ struct TestTeamPolicy { // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 #ifdef KOKKOS_ENABLE_OPENMPTARGET const int team_size = - policy_type_reduce(league_size, 32) + policy_type_reduce( + league_size, + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? 32 + : 1) .team_size_max(functor, Kokkos::ParallelReduceTag()); #else const int team_size = @@ -272,7 +294,7 @@ class ReduceTeamFunctor { } KOKKOS_INLINE_FUNCTION - void join(volatile value_type &dst, const volatile value_type &src) const { + void join(value_type &dst, const value_type &src) const { dst.value[0] += src.value[0]; dst.value[1] += src.value[1]; dst.value[2] += src.value[2]; @@ -371,8 +393,7 @@ class ScanTeamFunctor { void init(value_type &error) const { error = 0; } KOKKOS_INLINE_FUNCTION - void join(value_type volatile &error, - value_type volatile const &input) const { + void join(value_type &error, value_type const &input) const { if (input) error = 1; } @@ -380,8 +401,7 @@ class ScanTeamFunctor { using value_type = int64_t; KOKKOS_INLINE_FUNCTION - void join(value_type volatile &dst, - value_type volatile const &input) const { + void join(value_type &dst, value_type const &input) const { if (dst < input) dst = input; } }; @@ -571,11 +591,17 @@ struct TestSharedTeam { #ifdef KOKKOS_ENABLE_OPENMPTARGET const size_t team_size = - Kokkos::TeamPolicy<ScheduleType, ExecSpace>(64, 32).team_size_max( - Functor(), Kokkos::ParallelReduceTag()); - - Kokkos::TeamPolicy<ScheduleType, ExecSpace> team_exec(32 / team_size, - team_size); + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? Kokkos::TeamPolicy<ScheduleType, ExecSpace>(64, 32).team_size_max( + Functor(), Kokkos::ParallelReduceTag()) + : Kokkos::TeamPolicy<ScheduleType, ExecSpace>(8192, 1) + .team_size_max(Functor(), Kokkos::ParallelReduceTag()); + + Kokkos::TeamPolicy<ScheduleType, ExecSpace> team_exec( + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? 32 / team_size + : 8192 / team_size, + team_size); #else const size_t team_size = Kokkos::TeamPolicy<ScheduleType, ExecSpace>(8192, 1).team_size_max( @@ -616,7 +642,9 @@ struct TestLambdaSharedTeam { const int SHARED_COUNT = 1000; #ifdef KOKKOS_ENABLE_OPENMPTARGET - int team_size = 32; + int team_size = + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value ? 32 + : 1; #else int team_size = 1; #endif @@ -776,11 +804,18 @@ struct TestScratchTeam { Functor::SHARED_THREAD_COUNT); #ifdef KOKKOS_ENABLE_OPENMPTARGET - p_type team_exec = p_type(64, 32).set_scratch_size( - 1, - Kokkos::PerTeam(Functor::shared_int_array_type::shmem_size( - Functor::SHARED_TEAM_COUNT)), - Kokkos::PerThread(thread_scratch_size + 3 * sizeof(int))); + p_type team_exec = + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? p_type(64, 32).set_scratch_size( + 1, + Kokkos::PerTeam(Functor::shared_int_array_type::shmem_size( + Functor::SHARED_TEAM_COUNT)), + Kokkos::PerThread(thread_scratch_size + 3 * sizeof(int))) + : p_type(8192, 1).set_scratch_size( + 1, + Kokkos::PerTeam(Functor::shared_int_array_type::shmem_size( + Functor::SHARED_TEAM_COUNT)), + Kokkos::PerThread(thread_scratch_size + 3 * sizeof(int))); #else p_type team_exec = p_type(8192, 1).set_scratch_size( 1, @@ -797,7 +832,10 @@ struct TestScratchTeam { Functor::shared_int_array_type::shmem_size(3 * team_size); #ifdef KOKKOS_ENABLE_OPENMPTARGET - team_exec = p_type(64 / team_size, team_size); + team_exec = + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value + ? p_type(64 / team_size, team_size) + : p_type(8192 / team_size, team_size); #else team_exec = p_type(8192 / team_size, team_size); #endif @@ -825,31 +863,31 @@ namespace Test { template <class ExecSpace> KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body( const typename Kokkos::TeamPolicy<ExecSpace>::member_type &team) { - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0), 128); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0), 16); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0), 128); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0), 16); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1), 12800); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1), 1600); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1), 12800); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1), 1600); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0), 128); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0), 16); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1), 12800); - Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > + Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1), 1600); // The explicit types for 0 and 128 are here to test TeamThreadRange accepting @@ -945,7 +983,7 @@ struct ClassNoShmemSizeFunction { using member_type = typename Kokkos::TeamPolicy<ExecSpace, ScheduleType>::member_type; - Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic>> errors; KOKKOS_INLINE_FUNCTION void operator()(const TagFor &, const member_type &team) const { @@ -967,20 +1005,20 @@ struct ClassNoShmemSizeFunction { const int per_team0 = 3 * Kokkos::View<double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(128); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); const int per_thread0 = 3 * Kokkos::View<double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(16); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); const int per_team1 = 3 * Kokkos::View< double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(12800); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(12800); const int per_thread1 = - 3 * Kokkos::View< - double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(1600); + 3 * + Kokkos::View<double *, ExecSpace, + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(1600); #ifdef KOKKOS_ENABLE_SYCL int team_size = 4; @@ -1031,7 +1069,7 @@ struct ClassWithShmemSizeFunction { using member_type = typename Kokkos::TeamPolicy<ExecSpace, ScheduleType>::member_type; - Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic>> errors; KOKKOS_INLINE_FUNCTION void operator()(const TagFor &, const member_type &team) const { @@ -1053,11 +1091,11 @@ struct ClassWithShmemSizeFunction { const int per_team1 = 3 * Kokkos::View< double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(12800); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(12800); const int per_thread1 = - 3 * Kokkos::View< - double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(1600); + 3 * + Kokkos::View<double *, ExecSpace, + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(1600); int team_size = 8; if (team_size > ExecSpace::concurrency()) @@ -1097,11 +1135,11 @@ struct ClassWithShmemSizeFunction { const int per_team0 = 3 * Kokkos::View<double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(128); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); const int per_thread0 = 3 * Kokkos::View<double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(16); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); return per_team0 + team_size * per_thread0; } }; @@ -1109,27 +1147,26 @@ struct ClassWithShmemSizeFunction { template <class ExecSpace, class ScheduleType> void test_team_mulit_level_scratch_test_lambda() { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic>> errors; Kokkos::View<int, ExecSpace> d_errors("Errors"); errors = d_errors; const int per_team0 = 3 * Kokkos::View<double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(128); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); const int per_thread0 = - 3 * - Kokkos::View<double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(16); + 3 * Kokkos::View<double *, ExecSpace, + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); const int per_team1 = 3 * Kokkos::View<double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(12800); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(12800); const int per_thread1 = 3 * Kokkos::View<double *, ExecSpace, - Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(1600); + Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(1600); #ifdef KOKKOS_ENABLE_SYCL int team_size = 4; @@ -1244,9 +1281,8 @@ template <class ExecSpace, class ScheduleType, class T, class Enabled = void> struct TestTeamBroadcast; template <class ExecSpace, class ScheduleType, class T> -struct TestTeamBroadcast< - ExecSpace, ScheduleType, T, - typename std::enable_if<(sizeof(T) == sizeof(char)), void>::type> { +struct TestTeamBroadcast<ExecSpace, ScheduleType, T, + std::enable_if_t<(sizeof(T) == sizeof(char)), void>> { using team_member = typename Kokkos::TeamPolicy<ScheduleType, ExecSpace>::member_type; using memory_space = typename ExecSpace::memory_space; @@ -1358,9 +1394,8 @@ struct TestTeamBroadcast< }; template <class ExecSpace, class ScheduleType, class T> -struct TestTeamBroadcast< - ExecSpace, ScheduleType, T, - typename std::enable_if<(sizeof(T) > sizeof(char)), void>::type> { +struct TestTeamBroadcast<ExecSpace, ScheduleType, T, + std::enable_if_t<(sizeof(T) > sizeof(char)), void>> { using team_member = typename Kokkos::TeamPolicy<ScheduleType, ExecSpace>::member_type; using value_type = T; @@ -1434,9 +1469,8 @@ struct TestTeamBroadcast< } template <class ScalarType> - static inline - typename std::enable_if<!std::is_integral<ScalarType>::value, void>::type - compare_test(ScalarType A, ScalarType B, double epsilon_factor) { + static inline std::enable_if_t<!std::is_integral<ScalarType>::value, void> + compare_test(ScalarType A, ScalarType B, double epsilon_factor) { if (std::is_same<ScalarType, double>::value || std::is_same<ScalarType, float>::value) { ASSERT_NEAR((double)A, (double)B, @@ -1448,9 +1482,8 @@ struct TestTeamBroadcast< } template <class ScalarType> - static inline - typename std::enable_if<std::is_integral<ScalarType>::value, void>::type - compare_test(ScalarType A, ScalarType B, double) { + static inline std::enable_if_t<std::is_integral<ScalarType>::value, void> + compare_test(ScalarType A, ScalarType B, double) { ASSERT_EQ(A, B); } @@ -1528,7 +1561,9 @@ struct TestScratchAlignment { void test(bool allocate_small) { int shmem_size = ScratchView::shmem_size(11); #ifdef KOKKOS_ENABLE_OPENMPTARGET - int team_size = 32; + int team_size = + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value ? 32 + : 1; #else int team_size = 1; #endif diff --git a/packages/kokkos/core/unit_test/TestTeamBasic.hpp b/packages/kokkos/core/unit_test/TestTeamBasic.hpp index 1582783a76251508c91015cee1dfee82af8abb0d..be1848d70daa138bd38209dfe05c0b67a59a8d4e 100644 --- a/packages/kokkos/core/unit_test/TestTeamBasic.hpp +++ b/packages/kokkos/core/unit_test/TestTeamBasic.hpp @@ -114,6 +114,54 @@ TEST(TEST_CATEGORY, team_reduce_large) { } } +template <typename ExecutionSpace> +struct LargeTeamScratchFunctor { + using team_member = typename Kokkos::TeamPolicy<ExecutionSpace>::member_type; + const size_t m_per_team_bytes; + + KOKKOS_FUNCTION void operator()(const team_member& member) const { + double* team_shared = static_cast<double*>( + member.team_scratch(/*level*/ 1).get_shmem(m_per_team_bytes)); + if (team_shared == nullptr) + Kokkos::abort("Couldn't allocate required size!\n"); + double* team_shared_1 = static_cast<double*>( + member.team_scratch(/*level*/ 1).get_shmem(sizeof(double))); + if (team_shared_1 != nullptr) + Kokkos::abort("Allocated more memory than requested!\n"); + } +}; + +TEST(TEST_CATEGORY, large_team_scratch_size) { + const int level = 1; + const int n_teams = 1; + +#ifdef KOKKOS_ENABLE_OPENMPTARGET + // Allocate slightly more than (2^31-1) bytes. The other value resulted in + // problems allocating too much memory. + const size_t per_team_extent = 268435460; +#else + // Value originally chosen in the reproducer. + const size_t per_team_extent = 502795560; +#endif + + const size_t per_team_bytes = per_team_extent * sizeof(double); + +#ifdef KOKKOS_ENABLE_OPENMPTARGET + Kokkos::TeamPolicy<TEST_EXECSPACE> policy( + n_teams, + std::is_same<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>::value + ? 32 + : 1); +#else + Kokkos::TeamPolicy<TEST_EXECSPACE> policy(n_teams, 1); +#endif + policy.set_scratch_size(level, Kokkos::PerTeam(per_team_bytes)); + + Kokkos::parallel_for(policy, + LargeTeamScratchFunctor<TEST_EXECSPACE>{per_team_bytes}); + Kokkos::fence(); +} + TEST(TEST_CATEGORY, team_broadcast_long) { TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, long>::test_teambroadcast(0, 1); @@ -154,12 +202,6 @@ struct long_wrapper { lhs.value += rhs.value; } - KOKKOS_FUNCTION - friend void operator+=(volatile long_wrapper& lhs, - const volatile long_wrapper& rhs) { - lhs.value += rhs.value; - } - KOKKOS_FUNCTION void operator=(const long_wrapper& other) { value = other.value; } diff --git a/packages/kokkos/core/unit_test/TestTeamReductionScan.hpp b/packages/kokkos/core/unit_test/TestTeamReductionScan.hpp index 836134afe0cd4d537520b12c80dd4efaafc21f38..469bba23b73ee9bd316f7c2fbcd9389144f03e12 100644 --- a/packages/kokkos/core/unit_test/TestTeamReductionScan.hpp +++ b/packages/kokkos/core/unit_test/TestTeamReductionScan.hpp @@ -58,8 +58,7 @@ TEST(TEST_CATEGORY, team_reduction_scan) { } TEST(TEST_CATEGORY, team_long_reduce) { -#ifdef KOKKOS_ENABLE_OPENMPTARGET - // WORKAROUND OPENMPTARGET: Not implemented +#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET: Not implemented if constexpr (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>::value) #endif @@ -76,8 +75,7 @@ TEST(TEST_CATEGORY, team_long_reduce) { } TEST(TEST_CATEGORY, team_double_reduce) { -#ifdef KOKKOS_ENABLE_OPENMPTARGET - // WORKAROUND OPENMPTARGET: Not implemented +#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET: Not implemented if constexpr (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>::value) #endif @@ -97,5 +95,44 @@ TEST(TEST_CATEGORY, team_double_reduce) { } } +template <typename ExecutionSpace> +struct DummyTeamReductionFunctor { + using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>; + using TeamHandleType = typename TeamPolicy::member_type; + + KOKKOS_FUNCTION void operator()(const TeamHandleType&, double&) const {} +}; + +template <typename ExecutionSpace> +void test_team_parallel_reduce(const int num_loop_size) { + using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>; + + using ReducerType = Kokkos::Sum<double>; + double result = 10.; + ReducerType reducer(result); + + const int bytes_per_team = 0; + const int bytes_per_thread = 117; + + TeamPolicy team_exec(num_loop_size, Kokkos::AUTO); + team_exec.set_scratch_size(1, Kokkos::PerTeam(bytes_per_team), + Kokkos::PerThread(bytes_per_thread)); + + Kokkos::parallel_reduce(team_exec, + DummyTeamReductionFunctor<ExecutionSpace>{}, reducer); + ASSERT_EQ(result, 0.); +} + +TEST(TEST_CATEGORY, team_parallel_dummy_with_reducer_and_scratch_space) { +#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET: Not implemented + if constexpr (!std::is_same<TEST_EXECSPACE, + Kokkos::Experimental::OpenMPTarget>::value) +#endif + { + test_team_parallel_reduce<TEST_EXECSPACE>(0); + test_team_parallel_reduce<TEST_EXECSPACE>(1); + } +} + } // namespace Test #endif diff --git a/packages/kokkos/core/unit_test/TestTeamScan.hpp b/packages/kokkos/core/unit_test/TestTeamScan.hpp index 9edba57a06c3689d4b9ece32ed79048b1ce5089b..b93285b21e2b09dcc22dd718b35626a20fb5b8ec 100644 --- a/packages/kokkos/core/unit_test/TestTeamScan.hpp +++ b/packages/kokkos/core/unit_test/TestTeamScan.hpp @@ -92,18 +92,15 @@ struct TestTeamScan { N = _N; a_d = view_type("a_d", M, N); a_r = view_type("a_r", M, N); - // Set team size explicitly to - // a) check whether this works in CPU backends with team_size > 1 and - // b) make sure we have a power of 2 and for GPU backends due to limitation - // of the scan algorithm implemented in CUDA etc. - int team_size = 1; - if (ExecutionSpace().concurrency() > 2) { - if (ExecutionSpace().concurrency() > 10000) - team_size = 128; - else - team_size = 3; - } - Kokkos::parallel_for(policy_type(M, team_size), *this); + + // Set team size explicitly to check whether non-power-of-two team sizes can + // be used. + if (ExecutionSpace().concurrency() > 10000) + Kokkos::parallel_for(policy_type(M, 127), *this); + else if (ExecutionSpace().concurrency() > 2) + Kokkos::parallel_for(policy_type(M, 3), *this); + else + Kokkos::parallel_for(policy_type(M, 1), *this); auto a_i = Kokkos::create_mirror_view(a_d); auto a_o = Kokkos::create_mirror_view(a_r); diff --git a/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp index c0e698d92eed7fbfcae1df33381fd5f298200d56..5e637616c9a0240fc7350d903e6add0783a4a033 100644 --- a/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp +++ b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp @@ -43,7 +43,6 @@ */ #include <cstdio> -#include <stdexcept> #include <sstream> #include <iostream> @@ -64,14 +63,6 @@ class MyArray { void operator=(const MyArray& src) { for (int i = 0; i < N; i++) values[i] = src.values[i]; } - KOKKOS_INLINE_FUNCTION - void operator+=(const volatile MyArray& src) volatile { - for (int i = 0; i < N; i++) values[i] += src.values[i]; - } - KOKKOS_INLINE_FUNCTION - void operator=(const volatile MyArray& src) volatile { - for (int i = 0; i < N; i++) values[i] = src.values[i]; - } }; template <class T, int N, class PolicyType, int S> diff --git a/packages/kokkos/core/unit_test/TestTeamVector.hpp b/packages/kokkos/core/unit_test/TestTeamVector.hpp index dbed67475615606915cfcc05959de312f9eacbfd..8c302f9307c7b4f6eaa5b10e8cf935c89da36293 100644 --- a/packages/kokkos/core/unit_test/TestTeamVector.hpp +++ b/packages/kokkos/core/unit_test/TestTeamVector.hpp @@ -1031,8 +1031,8 @@ TEST(TEST_CATEGORY, parallel_scan_with_reducers) { using T = double; using namespace VectorScanReducer; - static constexpr int n = 1000000; - static constexpr int n_vector_range = 100; + constexpr int n = 1000000; + constexpr int n_vector_range = 100; checkScan<TEST_EXECSPACE, ScanType::Exclusive, n, n_vector_range, Kokkos::Prod<T, TEST_EXECSPACE>>() @@ -1054,6 +1054,9 @@ TEST(TEST_CATEGORY, parallel_scan_with_reducers) { checkScan<TEST_EXECSPACE, ScanType::Inclusive, n, n_vector_range, Kokkos::Min<T, TEST_EXECSPACE>>() .run(); + + (void)n; + (void)n_vector_range; } } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestTeamVectorRange.hpp b/packages/kokkos/core/unit_test/TestTeamVectorRange.hpp index c4116b91392e2020ecf0a030f96536c3a47a6dfa..44ffc7f3be4700948e66d385d9099215a1381953 100644 --- a/packages/kokkos/core/unit_test/TestTeamVectorRange.hpp +++ b/packages/kokkos/core/unit_test/TestTeamVectorRange.hpp @@ -78,37 +78,6 @@ struct my_complex { return *this; } - KOKKOS_INLINE_FUNCTION - my_complex& operator=(const volatile my_complex& src) { - re = src.re; - im = src.im; - dummy = src.dummy; - return *this; - } - - KOKKOS_INLINE_FUNCTION - volatile my_complex& operator=(const my_complex& src) volatile { - re = src.re; - im = src.im; - dummy = src.dummy; - return *this; - } - - KOKKOS_INLINE_FUNCTION - volatile my_complex& operator=(const volatile my_complex& src) volatile { - re = src.re; - im = src.im; - dummy = src.dummy; - return *this; - } - - KOKKOS_INLINE_FUNCTION - my_complex(const volatile my_complex& src) { - re = src.re; - im = src.im; - dummy = src.dummy; - } - KOKKOS_INLINE_FUNCTION my_complex(const double& val) { re = val; @@ -124,13 +93,6 @@ struct my_complex { return *this; } - KOKKOS_INLINE_FUNCTION - void operator+=(const volatile my_complex& src) volatile { - re += src.re; - im += src.im; - dummy += src.dummy; - } - KOKKOS_INLINE_FUNCTION my_complex operator+(const my_complex& src) { my_complex tmp = *this; @@ -140,15 +102,6 @@ struct my_complex { return tmp; } - KOKKOS_INLINE_FUNCTION - my_complex operator+(const volatile my_complex& src) volatile { - my_complex tmp = *this; - tmp.re += src.re; - tmp.im += src.im; - tmp.dummy += src.dummy; - return tmp; - } - KOKKOS_INLINE_FUNCTION my_complex& operator*=(const my_complex& src) { double re_tmp = re * src.re - im * src.im; @@ -159,15 +112,6 @@ struct my_complex { return *this; } - KOKKOS_INLINE_FUNCTION - void operator*=(const volatile my_complex& src) volatile { - double re_tmp = re * src.re - im * src.im; - double im_tmp = re * src.im + im * src.re; - re = re_tmp; - im = im_tmp; - dummy *= src.dummy; - } - KOKKOS_INLINE_FUNCTION bool operator==(const my_complex& src) const { return (re == src.re) && (im == src.im) && (dummy == src.dummy); diff --git a/packages/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp b/packages/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp deleted file mode 100644 index a0d00ded1b1586e67eb6bc09f93cf386239c3e2d..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp +++ /dev/null @@ -1,181 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <Kokkos_Core.hpp> - -#define KOKKOS_PRAGMA_UNROLL(a) - -namespace { - -template <class Scalar, class ExecutionSpace> -struct SumPlain { - using execution_space = ExecutionSpace; - using type = typename Kokkos::View<Scalar*, execution_space>; - - type view; - - SumPlain(type view_) : view(view_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(int /*i*/, Scalar& val) { val += Scalar(); } -}; - -template <class Scalar, class ExecutionSpace> -struct SumInitJoinFinalValueType { - using execution_space = ExecutionSpace; - using type = typename Kokkos::View<Scalar*, execution_space>; - using value_type = Scalar; - - type view; - - SumInitJoinFinalValueType(type view_) : view(view_) {} - - KOKKOS_INLINE_FUNCTION - void init(value_type& val) const { val = value_type(); } - - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& val, volatile value_type& src) const { - val += src; - } - - KOKKOS_INLINE_FUNCTION - void operator()(int /*i*/, value_type& val) const { val += value_type(); } -}; - -template <class Scalar, class ExecutionSpace> -struct SumInitJoinFinalValueType2 { - using execution_space = ExecutionSpace; - using type = typename Kokkos::View<Scalar*, execution_space>; - using value_type = Scalar; - - type view; - - SumInitJoinFinalValueType2(type view_) : view(view_) {} - - KOKKOS_INLINE_FUNCTION - void init(volatile value_type& val) const { val = value_type(); } - - KOKKOS_INLINE_FUNCTION - void join(volatile value_type& val, const volatile value_type& src) const { - val += src; - } - - KOKKOS_INLINE_FUNCTION - void operator()(int /*i*/, value_type& val) const { val += value_type(); } -}; - -template <class Scalar, class ExecutionSpace> -struct SumInitJoinFinalValueTypeArray { - using execution_space = ExecutionSpace; - using type = typename Kokkos::View<Scalar*, execution_space>; - using value_type = Scalar[]; - - type view; - int n; - - SumInitJoinFinalValueTypeArray(type view_, int n_) : view(view_), n(n_) {} - - KOKKOS_INLINE_FUNCTION - void init(value_type val) const { - for (int k = 0; k < n; k++) { - val[k] = 0; - } - } - - KOKKOS_INLINE_FUNCTION - void join(volatile value_type val, const volatile value_type src) const { - for (int k = 0; k < n; k++) { - val[k] += src[k]; - } - } - - KOKKOS_INLINE_FUNCTION - void operator()(int i, value_type val) const { - for (int k = 0; k < n; k++) { - val[k] += k * i; - } - } -}; - -template <class Scalar, class ExecutionSpace> -void TestTemplateMetaFunctions() { - static_assert( - Kokkos::Impl::ReduceFunctorHasInit<SumPlain<Scalar, ExecutionSpace>, - Scalar&>::value == false, - ""); - static_assert( - Kokkos::Impl::ReduceFunctorHasInit< - SumInitJoinFinalValueType<Scalar, ExecutionSpace>>::value == true, - ""); - static_assert( - Kokkos::Impl::ReduceFunctorHasInit< - SumInitJoinFinalValueType2<Scalar, ExecutionSpace>>::value == true, - ""); - - static_assert( - Kokkos::Impl::ReduceFunctorHasInit< - SumInitJoinFinalValueTypeArray<Scalar, ExecutionSpace>>::value == - true, - ""); - - static_assert(Kokkos::Impl::ReduceFunctorHasJoin< - SumPlain<Scalar, ExecutionSpace>>::value == false, - ""); - static_assert( - Kokkos::Impl::ReduceFunctorHasJoin< - SumInitJoinFinalValueType<Scalar, ExecutionSpace>>::value == true, - ""); - static_assert( - Kokkos::Impl::ReduceFunctorHasJoin< - SumInitJoinFinalValueType2<Scalar, ExecutionSpace>>::value == true, - ""); -} - -} // namespace - -namespace Test { -TEST(TEST_CATEGORY, template_meta_functions) { - TestTemplateMetaFunctions<int, TEST_EXECSPACE>(); -} -} // namespace Test diff --git a/packages/kokkos/core/unit_test/TestUniqueToken.hpp b/packages/kokkos/core/unit_test/TestUniqueToken.hpp index 4ba48bf73f069c6097a079ce1bcde5fd9452155c..224a97fef00330a8812590231295574501d1887a 100644 --- a/packages/kokkos/core/unit_test/TestUniqueToken.hpp +++ b/packages/kokkos/core/unit_test/TestUniqueToken.hpp @@ -42,11 +42,11 @@ //@HEADER */ -#include <iostream> +#include <gtest/gtest.h> #include <Kokkos_Core.hpp> -namespace Test { +namespace { template <class Space, Kokkos::Experimental::UniqueTokenScope Scope> class TestUniqueToken { @@ -152,14 +152,12 @@ class TestUniqueToken { } #endif - std::cout << "TestUniqueToken max reuse = " << max << std::endl; - typename view_type::HostMirror host_errors = Kokkos::create_mirror_view(self.errors); Kokkos::deep_copy(host_errors, self.errors); - ASSERT_EQ(host_errors(0), 0); + ASSERT_EQ(host_errors(0), 0) << "max reuse was " << max; } }; @@ -268,22 +266,24 @@ class TestAcquireTeamUniqueToken { } } - std::cout << "TestAcquireTeamUniqueToken max reuse = " << max << std::endl; - typename view_type::HostMirror host_errors = Kokkos::create_mirror_view(self.errors); Kokkos::deep_copy(host_errors, self.errors); - ASSERT_EQ(host_errors(0), 0); + ASSERT_EQ(host_errors(0), 0) << "max reuse was " << max; } }; -TEST(TEST_CATEGORY, acquire_team_unique_token) { - // FIXME_OPENMPTARGET - Not yet implemented. -#if !defined(KOKKOS_ENABLE_OPENMPTARGET) - TestAcquireTeamUniqueToken<TEST_EXECSPACE>::run(); +TEST(TEST_CATEGORY, unique_token_team_acquire) { +#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET + if constexpr (std::is_same<TEST_EXECSPACE, + Kokkos::Experimental::OpenMPTarget>::value) { + GTEST_SKIP() << "skipping because OpenMPTarget does not implement yet a " + "specialization of AcquireTeamUniqueToken"; + } else #endif + TestAcquireTeamUniqueToken<TEST_EXECSPACE>::run(); } -} // namespace Test +} // namespace diff --git a/packages/kokkos/core/unit_test/TestUtilities.hpp b/packages/kokkos/core/unit_test/TestUtilities.hpp index fc8e615dab5780bb1b06bc1092499a39dad5c299..cbe9b77af1bd8c2ca06a8b622221d5f4a0052956 100644 --- a/packages/kokkos/core/unit_test/TestUtilities.hpp +++ b/packages/kokkos/core/unit_test/TestUtilities.hpp @@ -44,7 +44,6 @@ #include <gtest/gtest.h> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestViewAPI.hpp b/packages/kokkos/core/unit_test/TestViewAPI.hpp index 83efae6170dfe098432f16383f600ea01412cbe0..320eb6f2ea9bed603fa22547e511f8cf5655cc08 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI.hpp @@ -45,7 +45,6 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <stdexcept> #include <sstream> #include <iostream> @@ -57,8 +56,14 @@ size_t allocation_count(const Kokkos::View<T, P...> &view) { const size_t alloc = view.span(); const int memory_span = Kokkos::View<int *>::required_allocation_size(100); - - return (card <= alloc && memory_span == 400) ? alloc : 0; + const int memory_span_layout = + Kokkos::View<int *, Kokkos::LayoutRight>::required_allocation_size( + Kokkos::LayoutRight(100)); + + return ((card <= alloc) && (memory_span == 400) && + (memory_span_layout == 400)) + ? alloc + : 0; } /*--------------------------------------------------------------------------*/ @@ -104,8 +109,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 8> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &input) { + static void join(value_type &update, const value_type &input) { update |= input; } @@ -200,8 +204,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 7> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &input) { + static void join(value_type &update, const value_type &input) { update |= input; } @@ -278,8 +281,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 6> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &input) { + static void join(value_type &update, const value_type &input) { update |= input; } @@ -354,8 +356,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 5> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &input) { + static void join(value_type &update, const value_type &input) { update |= input; } @@ -442,8 +443,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 4> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &input) { + static void join(value_type &update, const value_type &input) { update |= input; } @@ -512,8 +512,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 3> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &input) { + static void join(value_type &update, const value_type &input) { update |= input; } @@ -605,8 +604,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 2> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &input) { + static void join(value_type &update, const value_type &input) { update |= input; } @@ -681,8 +679,7 @@ struct TestViewOperator_LeftAndRight<DataType, DeviceType, 1> { using value_type = int; KOKKOS_INLINE_FUNCTION - static void join(volatile value_type &update, - const volatile value_type &input) { + static void join(value_type &update, const value_type &input) { update |= input; } @@ -1023,6 +1020,91 @@ class TestViewAPI { #endif } + static void run_test_contruction_from_layout() { + using hView0 = typename dView0::HostMirror; + using hView1 = typename dView1::HostMirror; + using hView2 = typename dView2::HostMirror; + using hView3 = typename dView3::HostMirror; + using hView4 = typename dView4::HostMirror; + + hView0 hv_0("dView0::HostMirror"); + hView1 hv_1("dView1::HostMirror", N0); + hView2 hv_2("dView2::HostMirror", N0); + hView3 hv_3("dView3::HostMirror", N0); + hView4 hv_4("dView4::HostMirror", N0); + + dView0 dv_0_1(nullptr, 0); + dView0 dv_0_2(hv_0.label(), hv_0.layout()); + + dView1 dv_1_1(nullptr, 0); + dView1 dv_1_2(hv_1.label(), hv_1.layout()); + + dView2 dv_2_1(nullptr, 0); + dView2 dv_2_2(hv_2.label(), hv_2.layout()); + + dView3 dv_3_1(nullptr, 0); + dView3 dv_3_2(hv_3.label(), hv_3.layout()); + + dView4 dv_4_1(nullptr, 0); + dView4 dv_4_2(hv_4.label(), hv_4.layout()); + } + + static void run_test_contruction_from_layout_2() { + using dView3_0 = Kokkos::View<T ***, device>; + using dView3_1 = Kokkos::View<T * * [N1], device>; + using dView3_2 = Kokkos::View<T * [N1][N2], device>; + using dView3_3 = Kokkos::View<T[N0][N1][N2], device>; + + dView3_0 v_0("v_0", N0, N1, N2); + dView3_1 v_1("v_1", N0, N1); + dView3_2 v_2("v_2", N0); + dView3_3 v_3("v_2"); + + dView3_1 v_1_a("v_1", N0, N1, N2); + dView3_2 v_2_a("v_2", N0, N1, N2); + dView3_3 v_3_a("v_2", N0, N1, N2); + + { + dView3_0 dv_1(v_0.label(), v_0.layout()); + dView3_0 dv_2(v_1.label(), v_1.layout()); + dView3_0 dv_3(v_2.label(), v_2.layout()); + dView3_0 dv_4(v_3.label(), v_3.layout()); + dView3_0 dv_5(v_1_a.label(), v_1_a.layout()); + dView3_0 dv_6(v_2_a.label(), v_2_a.layout()); + dView3_0 dv_7(v_3_a.label(), v_3_a.layout()); + } + + { + dView3_1 dv_1(v_0.label(), v_0.layout()); + dView3_1 dv_2(v_1.label(), v_1.layout()); + dView3_1 dv_3(v_2.label(), v_2.layout()); + dView3_1 dv_4(v_3.label(), v_3.layout()); + dView3_1 dv_5(v_1_a.label(), v_1_a.layout()); + dView3_1 dv_6(v_2_a.label(), v_2_a.layout()); + dView3_1 dv_7(v_3_a.label(), v_3_a.layout()); + } + + { + dView3_2 dv_1(v_0.label(), v_0.layout()); + dView3_2 dv_2(v_1.label(), v_1.layout()); + dView3_2 dv_3(v_2.label(), v_2.layout()); + dView3_2 dv_4(v_3.label(), v_3.layout()); + dView3_2 dv_5(v_1_a.label(), v_1_a.layout()); + dView3_2 dv_6(v_2_a.label(), v_2_a.layout()); + dView3_2 dv_7(v_3_a.label(), v_3_a.layout()); + } + + { + dView3_3 dv_1(v_0.label(), v_0.layout()); + dView3_3 dv_2(v_1.label(), v_1.layout()); + dView3_3 dv_3(v_2.label(), v_2.layout()); + dView3_3 dv_4(v_3.label(), v_3.layout()); + dView3_3 dv_5(v_1_a.label(), v_1_a.layout()); + dView3_3 dv_6(v_2_a.label(), v_2_a.layout()); + dView3_3 dv_7(v_3_a.label(), v_3_a.layout()); + } + } + static void run_test() { // mfh 14 Feb 2014: This test doesn't actually create instances of // these types. In order to avoid "unused type alias" diff --git a/packages/kokkos/core/unit_test/TestViewAPI_b.hpp b/packages/kokkos/core/unit_test/TestViewAPI_b.hpp index ad9069e397138957dd00d6326bee392876f22aec..5cd9446a1cd52ed0a57d3d9c27389d82f21c0e19 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_b.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_b.hpp @@ -50,6 +50,8 @@ TEST(TEST_CATEGORY, view_api_b) { TestViewAPI<double, TEST_EXECSPACE>::run_test_view_operator_a(); TestViewAPI<double, TEST_EXECSPACE>::run_test_mirror(); TestViewAPI<double, TEST_EXECSPACE>::run_test_scalar(); + TestViewAPI<double, TEST_EXECSPACE>::run_test_contruction_from_layout(); + TestViewAPI<double, TEST_EXECSPACE>::run_test_contruction_from_layout_2(); } } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestViewAPI_e.hpp b/packages/kokkos/core/unit_test/TestViewAPI_e.hpp index d1d38022a74e256e06103bd18d3e3cccd9db413e..2dfde085733a8077616cb56a024c5e7084f4d928 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_e.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_e.hpp @@ -45,7 +45,6 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <stdexcept> #include <sstream> #include <iostream> @@ -269,6 +268,43 @@ TEST(TEST_CATEGORY, view_allocation_large_rank) { Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, v_single); ASSERT_EQ(result(0, 0, 0, 0, 0, 0, 0, 0), 42); } + +template <typename ExecSpace, typename ViewType> +struct TestViewShmemSizeOnDevice { + using ViewTestType = Kokkos::View<size_t, ExecSpace>; + + TestViewShmemSizeOnDevice(size_t d1_, size_t d2_, size_t d3_) + : d1(d1_), d2(d2_), d3(d3_), shmemSize("shmemSize") {} + + KOKKOS_FUNCTION void operator()(const int&) const { + auto shmem = ViewType::shmem_size(d1, d2, d3); + shmemSize() = shmem; + } + + size_t d1, d2, d3; + ViewTestType shmemSize; +}; + +TEST(TEST_CATEGORY, view_shmem_size_on_device) { + using ExecSpace = typename TEST_EXECSPACE::execution_space; + using ViewType = Kokkos::View<int64_t***, ExecSpace>; + + constexpr size_t d1 = 5; + constexpr size_t d2 = 7; + constexpr size_t d3 = 11; + + TestViewShmemSizeOnDevice<ExecSpace, ViewType> testShmemSize(d1, d2, d3); + + Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0, 1), testShmemSize); + + auto size = ViewType::shmem_size(d1, d2, d3); + + auto shmemSizeHost = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), testShmemSize.shmemSize); + + ASSERT_EQ(size, shmemSizeHost()); +} + } // namespace Test #include <TestViewIsAssignable.hpp> diff --git a/packages/kokkos/core/unit_test/TestViewCtorDimMatch.hpp b/packages/kokkos/core/unit_test/TestViewCtorDimMatch.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e1eb88dba11fee8d03476c0272fac585b7d79135 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewCtorDimMatch.hpp @@ -0,0 +1,430 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> + +namespace Test { + +#define LIVE(EXPR, ARGS, DYNRANK) EXPECT_NO_THROW(EXPR) +#define DIE(EXPR, ARGS, DYNRANK) \ + ASSERT_DEATH( \ + EXPR, \ + "Constructor for Kokkos View 'v_" #ARGS \ + "' has mismatched number of arguments. Number of arguments = " #ARGS \ + " but dynamic rank = " #DYNRANK) + +#define PARAM_0 +#define PARAM_1 1 +#define PARAM_2 1, 1 +#define PARAM_3 1, 1, 1 +#define PARAM_4 1, 1, 1, 1 +#define PARAM_5 1, 1, 1, 1, 1 +#define PARAM_6 1, 1, 1, 1, 1, 1 +#define PARAM_7 1, 1, 1, 1, 1, 1, 1 + +#define PARAM_0_RANK 0 +#define PARAM_1_RANK 1 +#define PARAM_2_RANK 2 +#define PARAM_3_RANK 3 +#define PARAM_4_RANK 4 +#define PARAM_5_RANK 5 +#define PARAM_6_RANK 6 +#define PARAM_7_RANK 7 + +using DType = int; + +// Skip test execution when KOKKOS_ENABLE_OPENMPTARGET is enabled until +// Kokkos::abort() aborts properly on that backend +// Skip test execution when KOKKOS_COMPILER_NVHPC until fixed in GTEST +#if defined(KOKKOS_ENABLE_OPENMPTARGET) || (KOKKOS_COMPILER_NVHPC) +#else +TEST(TEST_CATEGORY_DEATH, view_construction_with_wrong_params_dyn) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using DType_0 = DType; + using DType_1 = DType *; + using DType_2 = DType **; + using DType_3 = DType ***; + using DType_4 = DType ****; + using DType_5 = DType *****; + using DType_6 = DType ******; + using DType_7 = DType *******; + { + // test View parameters for View dim = 0, dynamic = 0 + LIVE({ Kokkos::View<DType_0> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_0> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_0> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_0> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_0> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_0> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_0> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_0> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 1, dynamic = 1 + DIE({ Kokkos::View<DType_1> v_0("v_0" PARAM_0); }, 0, 1); + LIVE({ Kokkos::View<DType_1> v_1("v_1", PARAM_1); }, 1, 1); + DIE({ Kokkos::View<DType_1> v_2("v_2", PARAM_2); }, 2, 1); + DIE({ Kokkos::View<DType_1> v_3("v_3", PARAM_3); }, 3, 1); + DIE({ Kokkos::View<DType_1> v_4("v_4", PARAM_4); }, 4, 1); + DIE({ Kokkos::View<DType_1> v_5("v_5", PARAM_5); }, 5, 1); + DIE({ Kokkos::View<DType_1> v_6("v_6", PARAM_6); }, 6, 1); + DIE({ Kokkos::View<DType_1> v_7("v_7", PARAM_7); }, 7, 1); + } + + { + // test View parameters for View dim = 2, dynamic = 2 + DIE({ Kokkos::View<DType_2> v_0("v_0" PARAM_0); }, 0, 2); + DIE({ Kokkos::View<DType_2> v_1("v_1", PARAM_1); }, 1, 2); + LIVE({ Kokkos::View<DType_2> v_2("v_2", PARAM_2); }, 2, 2); + DIE({ Kokkos::View<DType_2> v_3("v_3", PARAM_3); }, 3, 2); + DIE({ Kokkos::View<DType_2> v_4("v_4", PARAM_4); }, 4, 2); + DIE({ Kokkos::View<DType_2> v_5("v_5", PARAM_5); }, 5, 2); + DIE({ Kokkos::View<DType_2> v_6("v_6", PARAM_6); }, 6, 2); + DIE({ Kokkos::View<DType_2> v_7("v_7", PARAM_7); }, 7, 2); + } + + { + // test View parameters for View dim = 3, dynamic = 3 + DIE({ Kokkos::View<DType_3> v_0("v_0" PARAM_0); }, 0, 3); + DIE({ Kokkos::View<DType_3> v_1("v_1", PARAM_1); }, 1, 3); + DIE({ Kokkos::View<DType_3> v_2("v_2", PARAM_2); }, 2, 3); + LIVE({ Kokkos::View<DType_3> v_3("v_3", PARAM_3); }, 3, 3); + DIE({ Kokkos::View<DType_3> v_4("v_4", PARAM_4); }, 4, 3); + DIE({ Kokkos::View<DType_3> v_5("v_5", PARAM_5); }, 5, 3); + DIE({ Kokkos::View<DType_3> v_6("v_6", PARAM_6); }, 6, 3); + DIE({ Kokkos::View<DType_3> v_7("v_7", PARAM_7); }, 7, 3); + } + + { + // test View parameters for View dim = 4, dynamic = 4 + DIE({ Kokkos::View<DType_4> v_0("v_0" PARAM_0); }, 0, 4); + DIE({ Kokkos::View<DType_4> v_1("v_1", PARAM_1); }, 1, 4); + DIE({ Kokkos::View<DType_4> v_2("v_2", PARAM_2); }, 2, 4); + DIE({ Kokkos::View<DType_4> v_3("v_3", PARAM_3); }, 3, 4); + LIVE({ Kokkos::View<DType_4> v_4("v_4", PARAM_4); }, 4, 4); + DIE({ Kokkos::View<DType_4> v_5("v_5", PARAM_5); }, 5, 4); + DIE({ Kokkos::View<DType_4> v_6("v_6", PARAM_6); }, 6, 4); + DIE({ Kokkos::View<DType_4> v_7("v_7", PARAM_7); }, 7, 4); + } + + { + // test View parameters for View dim = 5, dynamic = 5 + DIE({ Kokkos::View<DType_5> v_0("v_0" PARAM_0); }, 0, 5); + DIE({ Kokkos::View<DType_5> v_1("v_1", PARAM_1); }, 1, 5); + DIE({ Kokkos::View<DType_5> v_2("v_2", PARAM_2); }, 2, 5); + DIE({ Kokkos::View<DType_5> v_3("v_3", PARAM_3); }, 3, 5); + DIE({ Kokkos::View<DType_5> v_4("v_4", PARAM_4); }, 4, 5); + LIVE({ Kokkos::View<DType_5> v_5("v_5", PARAM_5); }, 5, 5); + DIE({ Kokkos::View<DType_5> v_6("v_6", PARAM_6); }, 6, 5); + DIE({ Kokkos::View<DType_5> v_7("v_7", PARAM_7); }, 7, 5); + } + + { + // test View parameters for View dim = 6, dynamic = 6 + DIE({ Kokkos::View<DType_6> v_0("v_0" PARAM_0); }, 0, 6); + DIE({ Kokkos::View<DType_6> v_1("v_1", PARAM_1); }, 1, 6); + DIE({ Kokkos::View<DType_6> v_2("v_2", PARAM_2); }, 2, 6); + DIE({ Kokkos::View<DType_6> v_3("v_3", PARAM_3); }, 3, 6); + DIE({ Kokkos::View<DType_6> v_4("v_4", PARAM_4); }, 4, 6); + DIE({ Kokkos::View<DType_6> v_5("v_5", PARAM_5); }, 5, 6); + LIVE({ Kokkos::View<DType_6> v_6("v_6", PARAM_6); }, 6, 6); + DIE({ Kokkos::View<DType_6> v_7("v_7", PARAM_7); }, 7, 6); + } + + { + // test View parameters for View dim = 7, dynamic = 7 + DIE({ Kokkos::View<DType_7> v_0("v_0" PARAM_0); }, 0, 7); + DIE({ Kokkos::View<DType_7> v_1("v_1", PARAM_1); }, 1, 7); + DIE({ Kokkos::View<DType_7> v_2("v_2", PARAM_2); }, 2, 7); + DIE({ Kokkos::View<DType_7> v_3("v_3", PARAM_3); }, 3, 7); + DIE({ Kokkos::View<DType_7> v_4("v_4", PARAM_4); }, 4, 7); + DIE({ Kokkos::View<DType_7> v_5("v_5", PARAM_5); }, 5, 7); + DIE({ Kokkos::View<DType_7> v_6("v_6", PARAM_6); }, 6, 7); + LIVE({ Kokkos::View<DType_7> v_7("v_7", PARAM_7); }, 7, 7); + } +} + +TEST(TEST_CATEGORY_DEATH, view_construction_with_wrong_params_stat) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using DType_0 = DType; + using DType_1 = DType[1]; + using DType_2 = DType[1][1]; + using DType_3 = DType[1][1][1]; + using DType_4 = DType[1][1][1][1]; + using DType_5 = DType[1][1][1][1][1]; + using DType_6 = DType[1][1][1][1][1][1]; + using DType_7 = DType[1][1][1][1][1][1][1]; + { + // test View parameters for View dim = 0, dynamic = 0 + LIVE({ Kokkos::View<DType_0> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_0> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_0> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_0> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_0> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_0> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_0> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_0> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 1, dynamic = 0 + LIVE({ Kokkos::View<DType_1> v_0("v_0" PARAM_0); }, 0, 0); + LIVE({ Kokkos::View<DType_1> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_1> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_1> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_1> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_1> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_1> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_1> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 2, dynamic = 0 + LIVE({ Kokkos::View<DType_2> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_2> v_1("v_1", PARAM_1); }, 1, 0); + LIVE({ Kokkos::View<DType_2> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_2> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_2> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_2> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_2> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_2> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 3, dynamic = 0 + LIVE({ Kokkos::View<DType_3> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_3> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_3> v_2("v_2", PARAM_2); }, 2, 0); + LIVE({ Kokkos::View<DType_3> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_3> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_3> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_3> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_3> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 4, dynamic = 0 + LIVE({ Kokkos::View<DType_4> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_4> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_4> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_4> v_3("v_3", PARAM_3); }, 3, 0); + LIVE({ Kokkos::View<DType_4> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_4> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_4> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_4> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 5, dynamic = 0 + LIVE({ Kokkos::View<DType_5> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_5> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_5> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_5> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_5> v_4("v_4", PARAM_4); }, 4, 0); + LIVE({ Kokkos::View<DType_5> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_5> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_5> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 6, dynamic = 0 + LIVE({ Kokkos::View<DType_6> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_6> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_6> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_6> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_6> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_6> v_5("v_5", PARAM_5); }, 5, 0); + LIVE({ Kokkos::View<DType_6> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_6> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 7, dynamic = 0 + LIVE({ Kokkos::View<DType_7> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_7> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_7> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_7> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_7> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_7> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_7> v_6("v_6", PARAM_6); }, 6, 0); + LIVE({ Kokkos::View<DType_7> v_7("v_7", PARAM_7); }, 7, 0); + } +} + +TEST(TEST_CATEGORY_DEATH, view_construction_with_wrong_params_mix) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using DType_0 = DType; + using DType_1 = DType[1]; + using DType_2 = DType * [1]; + using DType_3 = DType * * [1]; + using DType_4 = DType ** * [1]; + using DType_5 = DType *** * [1]; + using DType_6 = DType **** * [1]; + using DType_7 = DType ***** * [1]; + { + // test View parameters for View dim = 0, dynamic = 0 + LIVE({ Kokkos::View<DType_0> v_0("v_0" PARAM_0); }, 0, 0); + DIE({ Kokkos::View<DType_0> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_0> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_0> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_0> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_0> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_0> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_0> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 1, dynamic = 0 + LIVE({ Kokkos::View<DType_1> v_0("v_0" PARAM_0); }, 0, 0); + LIVE({ Kokkos::View<DType_1> v_1("v_1", PARAM_1); }, 1, 0); + DIE({ Kokkos::View<DType_1> v_2("v_2", PARAM_2); }, 2, 0); + DIE({ Kokkos::View<DType_1> v_3("v_3", PARAM_3); }, 3, 0); + DIE({ Kokkos::View<DType_1> v_4("v_4", PARAM_4); }, 4, 0); + DIE({ Kokkos::View<DType_1> v_5("v_5", PARAM_5); }, 5, 0); + DIE({ Kokkos::View<DType_1> v_6("v_6", PARAM_6); }, 6, 0); + DIE({ Kokkos::View<DType_1> v_7("v_7", PARAM_7); }, 7, 0); + } + + { + // test View parameters for View dim = 2, dynamic = 1 + DIE({ Kokkos::View<DType_2> v_0("v_0" PARAM_0); }, 0, 1); + LIVE({ Kokkos::View<DType_2> v_1("v_1", PARAM_1); }, 1, 1); + LIVE({ Kokkos::View<DType_2> v_2("v_2", PARAM_2); }, 2, 1); + DIE({ Kokkos::View<DType_2> v_3("v_3", PARAM_3); }, 3, 1); + DIE({ Kokkos::View<DType_2> v_4("v_4", PARAM_4); }, 4, 1); + DIE({ Kokkos::View<DType_2> v_5("v_5", PARAM_5); }, 5, 1); + DIE({ Kokkos::View<DType_2> v_6("v_6", PARAM_6); }, 6, 1); + DIE({ Kokkos::View<DType_2> v_7("v_7", PARAM_7); }, 7, 1); + } + + { + // test View parameters for View dim = 3, dynamic = 2 + DIE({ Kokkos::View<DType_3> v_0("v_0" PARAM_0); }, 0, 2); + DIE({ Kokkos::View<DType_3> v_1("v_1", PARAM_1); }, 1, 2); + LIVE({ Kokkos::View<DType_3> v_2("v_2", PARAM_2); }, 2, 2); + LIVE({ Kokkos::View<DType_3> v_3("v_3", PARAM_3); }, 3, 2); + DIE({ Kokkos::View<DType_3> v_4("v_4", PARAM_4); }, 4, 2); + DIE({ Kokkos::View<DType_3> v_5("v_5", PARAM_5); }, 5, 2); + DIE({ Kokkos::View<DType_3> v_6("v_6", PARAM_6); }, 6, 2); + DIE({ Kokkos::View<DType_3> v_7("v_7", PARAM_7); }, 7, 2); + } + + { + // test View parameters for View dim = 4, dynamic = 3 + DIE({ Kokkos::View<DType_4> v_0("v_0" PARAM_0); }, 0, 3); + DIE({ Kokkos::View<DType_4> v_1("v_1", PARAM_1); }, 1, 3); + DIE({ Kokkos::View<DType_4> v_2("v_2", PARAM_2); }, 2, 3); + LIVE({ Kokkos::View<DType_4> v_3("v_3", PARAM_3); }, 3, 3); + LIVE({ Kokkos::View<DType_4> v_4("v_4", PARAM_4); }, 4, 3); + DIE({ Kokkos::View<DType_4> v_5("v_5", PARAM_5); }, 5, 3); + DIE({ Kokkos::View<DType_4> v_6("v_6", PARAM_6); }, 6, 3); + DIE({ Kokkos::View<DType_4> v_7("v_7", PARAM_7); }, 7, 3); + } + + { + // test View parameters for View dim = 5, dynamic = 4 + DIE({ Kokkos::View<DType_5> v_0("v_0" PARAM_0); }, 0, 4); + DIE({ Kokkos::View<DType_5> v_1("v_1", PARAM_1); }, 1, 4); + DIE({ Kokkos::View<DType_5> v_2("v_2", PARAM_2); }, 2, 4); + DIE({ Kokkos::View<DType_5> v_3("v_3", PARAM_3); }, 3, 4); + LIVE({ Kokkos::View<DType_5> v_4("v_4", PARAM_4); }, 4, 4); + LIVE({ Kokkos::View<DType_5> v_5("v_5", PARAM_5); }, 5, 4); + DIE({ Kokkos::View<DType_5> v_6("v_6", PARAM_6); }, 6, 4); + DIE({ Kokkos::View<DType_5> v_7("v_7", PARAM_7); }, 7, 4); + } + + { + // test View parameters for View dim = 6, dynamic = 5 + DIE({ Kokkos::View<DType_6> v_0("v_0" PARAM_0); }, 0, 5); + DIE({ Kokkos::View<DType_6> v_1("v_1", PARAM_1); }, 1, 5); + DIE({ Kokkos::View<DType_6> v_2("v_2", PARAM_2); }, 2, 5); + DIE({ Kokkos::View<DType_6> v_3("v_3", PARAM_3); }, 3, 5); + DIE({ Kokkos::View<DType_6> v_4("v_4", PARAM_4); }, 4, 5); + LIVE({ Kokkos::View<DType_6> v_5("v_5", PARAM_5); }, 5, 5); + LIVE({ Kokkos::View<DType_6> v_6("v_6", PARAM_6); }, 6, 5); + DIE({ Kokkos::View<DType_6> v_7("v_7", PARAM_7); }, 7, 5); + } + + { + // test View parameters for View dim = 7, dynamic = 6 + DIE({ Kokkos::View<DType_7> v_0("v_0" PARAM_0); }, 0, 6); + DIE({ Kokkos::View<DType_7> v_1("v_1", PARAM_1); }, 1, 6); + DIE({ Kokkos::View<DType_7> v_2("v_2", PARAM_2); }, 2, 6); + DIE({ Kokkos::View<DType_7> v_3("v_3", PARAM_3); }, 3, 6); + DIE({ Kokkos::View<DType_7> v_4("v_4", PARAM_4); }, 4, 6); + DIE({ Kokkos::View<DType_7> v_5("v_5", PARAM_5); }, 5, 6); + LIVE({ Kokkos::View<DType_7> v_6("v_6", PARAM_6); }, 6, 6); + LIVE({ Kokkos::View<DType_7> v_7("v_7", PARAM_7); }, 7, 6); + } +} +#endif // KOKKOS_ENABLE_OPENMPTARGET + +#undef PARAM_0 +#undef PARAM_1 +#undef PARAM_2 +#undef PARAM_3 +#undef PARAM_4 +#undef PARAM_5 +#undef PARAM_6 +#undef PARAM_7 + +#undef PARAM_0_RANK +#undef PARAM_1_RANK +#undef PARAM_2_RANK +#undef PARAM_3_RANK +#undef PARAM_4_RANK +#undef PARAM_5_RANK +#undef PARAM_6_RANK +#undef PARAM_7_RANK + +#undef DType + +#undef LIVE +#undef DIE +} // namespace Test diff --git a/packages/kokkos/core/unit_test/TestViewHooks.hpp b/packages/kokkos/core/unit_test/TestViewHooks.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ac679bedbdf6f06f1d2431242c9c3a6e4e3bf5dc --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewHooks.hpp @@ -0,0 +1,159 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef TESTVIEWHOOKS_HPP_ +#define TESTVIEWHOOKS_HPP_ + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> + +namespace Test { +template <class DeviceType> +struct TestViewHooks { + struct TestSubscriber; + + static_assert( + Kokkos::Experimental::is_hooks_policy< + Kokkos::Experimental::SubscribableViewHooks<TestSubscriber> >::value, + "Must be a hooks policy"); + + using test_view_type = + Kokkos::View<double **, + Kokkos::Experimental::SubscribableViewHooks<TestSubscriber>, + DeviceType>; + + struct TestSubscriber { + static test_view_type *self_ptr; + static const test_view_type *other_ptr; + + template <typename View> + static void copy_constructed(View &self, const View &other) { + self_ptr = &self; + other_ptr = &other; + } + + template <typename View> + static void move_constructed(View &self, const View &other) { + self_ptr = &self; + other_ptr = &other; + } + + template <typename View> + static void copy_assigned(View &self, const View &other) { + self_ptr = &self; + other_ptr = &other; + } + + template <typename View> + static void move_assigned(View &self, const View &other) { + self_ptr = &self; + other_ptr = &other; + } + + static void reset() { + self_ptr = nullptr; + other_ptr = nullptr; + } + }; + + static void testViewHooksCopyConstruct() { + TestSubscriber::reset(); + test_view_type testa; + + test_view_type testb(testa); + EXPECT_EQ(TestSubscriber::self_ptr, &testb); + EXPECT_EQ(TestSubscriber::other_ptr, &testa); + } + + static void testViewHooksMoveConstruct() { + TestSubscriber::reset(); + test_view_type testa; + + test_view_type testb(std::move(testa)); + EXPECT_EQ(TestSubscriber::self_ptr, &testb); + + // This is valid, even if the view is moved-from + EXPECT_EQ(TestSubscriber::other_ptr, &testa); + } + + static void testViewHooksCopyAssign() { + TestSubscriber::reset(); + test_view_type testa; + + test_view_type testb; + testb = testa; + EXPECT_EQ(TestSubscriber::self_ptr, &testb); + EXPECT_EQ(TestSubscriber::other_ptr, &testa); + } + + static void testViewHooksMoveAssign() { + TestSubscriber::reset(); + test_view_type testa; + + test_view_type testb; + testb = std::move(testa); + EXPECT_EQ(TestSubscriber::self_ptr, &testb); + + // This is valid, even if the view is moved-from + EXPECT_EQ(TestSubscriber::other_ptr, &testa); + } +}; + +template <class DeviceType> +typename TestViewHooks<DeviceType>::test_view_type + *TestViewHooks<DeviceType>::TestSubscriber::self_ptr = nullptr; + +template <class DeviceType> +const typename TestViewHooks<DeviceType>::test_view_type + *TestViewHooks<DeviceType>::TestSubscriber::other_ptr = nullptr; + +TEST(TEST_CATEGORY, view_hooks) { + using ExecSpace = TEST_EXECSPACE; + TestViewHooks<ExecSpace>::testViewHooksCopyConstruct(); + TestViewHooks<ExecSpace>::testViewHooksMoveConstruct(); + TestViewHooks<ExecSpace>::testViewHooksCopyAssign(); + TestViewHooks<ExecSpace>::testViewHooksMoveAssign(); +} + +} // namespace Test +#endif // TESTVIEWHOOKS_HPP_ diff --git a/packages/kokkos/core/unit_test/TestViewIsAssignable.hpp b/packages/kokkos/core/unit_test/TestViewIsAssignable.hpp index fcf9f75f37a22019e5e5e0713104e2fb7fed30ee..03c3b977edeab7ec5b51c406da65b0e089f5a0de 100644 --- a/packages/kokkos/core/unit_test/TestViewIsAssignable.hpp +++ b/packages/kokkos/core/unit_test/TestViewIsAssignable.hpp @@ -12,14 +12,14 @@ struct TestAssignability { template <class MappingType> static void try_assign( ViewTypeDst& dst, ViewTypeSrc& src, - typename std::enable_if<MappingType::is_assignable>::type* = nullptr) { + std::enable_if_t<MappingType::is_assignable>* = nullptr) { dst = src; } template <class MappingType> static void try_assign( ViewTypeDst&, ViewTypeSrc&, - typename std::enable_if<!MappingType::is_assignable>::type* = nullptr) { + std::enable_if_t<!MappingType::is_assignable>* = nullptr) { Kokkos::Impl::throw_runtime_exception( "TestAssignability::try_assign: Unexpected call path"); } diff --git a/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp index d592fcaf20c407da8d2c639be98b9cb54b29711e..a8d28933af44b5c784bbf21d45648467354dce54 100644 --- a/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp +++ b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp @@ -44,7 +44,6 @@ #include <gtest/gtest.h> -#include <stdexcept> #include <sstream> #include <iostream> #include <time.h> @@ -56,10 +55,7 @@ namespace Test { TEST(TEST_CATEGORY, view_layoutstride_left_to_layoutleft_assignment) { using exec_space = TEST_EXECSPACE; - auto t = time(nullptr); - srand(t); // Use current time as seed for random generator - printf("view_layoutstride_left_to_layoutleft_assignment: srand(%lu)\n", - static_cast<unsigned long>(t)); + srand(123456); // arbitrary seed for random generator { // Assignment of rank-1 LayoutLeft = LayoutStride int ndims = 1; @@ -338,10 +334,7 @@ TEST(TEST_CATEGORY, view_layoutstride_left_to_layoutleft_assignment) { TEST(TEST_CATEGORY, view_layoutstride_right_to_layoutright_assignment) { using exec_space = TEST_EXECSPACE; - auto t = time(nullptr); - srand(t); // Use current time as seed for random generator - printf("view_layoutstride_right_to_layoutright_assignment: srand(%lu)\n", - static_cast<unsigned long>(t)); + srand(123456); // arbitrary seed for random generator { // Assignment of rank-1 LayoutRight = LayoutStride int ndims = 1; @@ -621,10 +614,7 @@ TEST(TEST_CATEGORY, view_layoutstride_right_to_layoutright_assignment) { TEST(TEST_CATEGORY_DEATH, view_layoutstride_right_to_layoutleft_assignment) { using exec_space = TEST_EXECSPACE; - auto t = time(nullptr); - srand(t); // Use current time as seed for random generator - printf("view_layoutstride_right_to_layoutleft_assignment: srand(%lu)\n", - static_cast<unsigned long>(t)); + srand(123456); // arbitrary seed for random generator { // Assignment of rank-1 LayoutLeft = LayoutStride (LayoutRight compatible) int ndims = 1; @@ -776,10 +766,7 @@ TEST(TEST_CATEGORY_DEATH, view_layoutstride_right_to_layoutleft_assignment) { TEST(TEST_CATEGORY_DEATH, view_layoutstride_left_to_layoutright_assignment) { using exec_space = TEST_EXECSPACE; - auto t = time(nullptr); - srand(t); // Use current time as seed for random generator - printf("view_layoutstride_left_to_layoutright_assignment: srand(%lu)\n", - static_cast<unsigned long>(t)); + srand(123456); // arbitrary seed for random generator { // Assignment of rank-1 LayoutRight = LayoutStride (LayoutLeft compatible) int ndims = 1; diff --git a/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp b/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp index 2510a1244664c4b902160faf5f93d022e48ed7aa..b0fabddbfc7424a7852e452a73ccaee428ab8646 100644 --- a/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp +++ b/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp @@ -42,6 +42,10 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#endif + #include <cstdio> #include <gtest/gtest.h> @@ -1776,3 +1780,5 @@ TEST(TEST_CATEGORY, view_layouttiled_subtile) { 4, 12, 16, 12); } } // namespace Test + +#undef KOKKOS_IMPL_PUBLIC_INCLUDE diff --git a/packages/kokkos/core/unit_test/TestViewMapping_a.hpp b/packages/kokkos/core/unit_test/TestViewMapping_a.hpp index edeb1abdaa5f8d1120ac141cb67283ec3d776ce0..5a54a93619a819bf2a460a4f377f23867df371bf 100644 --- a/packages/kokkos/core/unit_test/TestViewMapping_a.hpp +++ b/packages/kokkos/core/unit_test/TestViewMapping_a.hpp @@ -45,7 +45,6 @@ #include <gtest/gtest.h> #include <cstddef> -#include <stdexcept> #include <sstream> #include <iostream> @@ -249,11 +248,11 @@ void test_view_mapping() { ASSERT_EQ(layout.dimension[0], 2u); ASSERT_EQ(layout.dimension[1], 3u); ASSERT_EQ(layout.dimension[2], 4u); - ASSERT_EQ(layout.dimension[3], 1u); - ASSERT_EQ(layout.dimension[4], 1u); - ASSERT_EQ(layout.dimension[5], 1u); - ASSERT_EQ(layout.dimension[6], 1u); - ASSERT_EQ(layout.dimension[7], 1u); + ASSERT_EQ(layout.dimension[3], KOKKOS_INVALID_INDEX); + ASSERT_EQ(layout.dimension[4], KOKKOS_INVALID_INDEX); + ASSERT_EQ(layout.dimension[5], KOKKOS_INVALID_INDEX); + ASSERT_EQ(layout.dimension[6], KOKKOS_INVALID_INDEX); + ASSERT_EQ(layout.dimension[7], KOKKOS_INVALID_INDEX); ASSERT_EQ(stride3.m_dim.rank, 3u); ASSERT_EQ(stride3.m_dim.N0, 2u); @@ -447,8 +446,8 @@ void test_view_mapping() { Kokkos::Impl::ViewDimension<N0, N1, N2, N3> dim; SubviewExtents tmp(dim, N0 / 2, Kokkos::ALL, - std::pair<int, int>(N2 / 4, 10 + N2 / 4), - Kokkos::pair<int, int>(N3 / 4, 20 + N3 / 4)); + std::pair<size_t, size_t>(N2 / 4, 10 + N2 / 4), + Kokkos::pair<size_t, size_t>(N3 / 4, 20 + N3 / 4)); ASSERT_EQ(tmp.domain_offset(0), N0 / 2); ASSERT_EQ(tmp.domain_offset(1), 0u); @@ -632,8 +631,7 @@ void test_view_mapping() { using a_const_int_r1 = ViewDataAnalysis<const int[], void>; - static_assert( - std::is_same<typename a_const_int_r1::specialize, void>::value, ""); + static_assert(std::is_void<typename a_const_int_r1::specialize>::value, ""); static_assert(std::is_same<typename a_const_int_r1::dimension, Kokkos::Impl::ViewDimension<0> >::value, ""); @@ -664,8 +662,7 @@ void test_view_mapping() { using a_const_int_r3 = ViewDataAnalysis<const int* * [4], void>; - static_assert( - std::is_same<typename a_const_int_r3::specialize, void>::value, ""); + static_assert(std::is_void<typename a_const_int_r3::specialize>::value, ""); static_assert(std::is_same<typename a_const_int_r3::dimension, Kokkos::Impl::ViewDimension<0, 0, 4> >::value, diff --git a/packages/kokkos/core/unit_test/TestViewMapping_b.hpp b/packages/kokkos/core/unit_test/TestViewMapping_b.hpp index 3e6d91c0b5d245cb00274c4af95ef6539a52aeb7..b6f83e2f26649123fd68320346030b056a5e6bf5 100644 --- a/packages/kokkos/core/unit_test/TestViewMapping_b.hpp +++ b/packages/kokkos/core/unit_test/TestViewMapping_b.hpp @@ -44,7 +44,6 @@ #include <gtest/gtest.h> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestViewMapping_subview.hpp b/packages/kokkos/core/unit_test/TestViewMapping_subview.hpp index e52362a054577d5c1b0e1f261b9cf04010cf7137..0dd6a8d52134bb05b1cb392eee07f1bdf506ba6d 100644 --- a/packages/kokkos/core/unit_test/TestViewMapping_subview.hpp +++ b/packages/kokkos/core/unit_test/TestViewMapping_subview.hpp @@ -44,7 +44,6 @@ #include <gtest/gtest.h> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestViewMemoryAccessViolation.hpp b/packages/kokkos/core/unit_test/TestViewMemoryAccessViolation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..aeab9dbcb9f61739755c38c50de136de3f66a119 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewMemoryAccessViolation.hpp @@ -0,0 +1,221 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#include <gtest/gtest.h> + +#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC +template <class View, class ExecutionSpace> +struct TestViewMemoryAccessViolation { + View v; + static constexpr auto rank = View::rank; + + template <std::size_t... Is> + KOKKOS_FUNCTION decltype(auto) bad_access(std::index_sequence<Is...>) const { + return v((Is * 0)...); + } + + KOKKOS_FUNCTION void operator()(int) const { + ++bad_access(std::make_index_sequence<rank>{}); + } + + TestViewMemoryAccessViolation(View w, ExecutionSpace const& s, + std::string const& matcher) + : v(std::move(w)) { + constexpr bool view_accessible_from_execution_space = + Kokkos::SpaceAccessibility< + /*AccessSpace=*/ExecutionSpace, + /*MemorySpace=*/typename View::memory_space>::accessible; + EXPECT_FALSE(view_accessible_from_execution_space); + EXPECT_DEATH( + { + Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(s, 0, 1), + *this); + Kokkos::fence(); + }, + matcher); + } +}; + +template <class View, class ExecutionSpace> +void test_view_memory_access_violation(View v, ExecutionSpace const& s, + std::string const& m) { + TestViewMemoryAccessViolation<View, ExecutionSpace>(std::move(v), s, m); +} + +template <class View, class LblOrPtr, std::size_t... Is> +auto make_view_impl(LblOrPtr x, std::index_sequence<Is...>) { + return View(x, (Is + 1)...); +} + +template <class View, class LblOrPtr> +auto make_view(LblOrPtr x) { + return make_view_impl<View>(std::move(x), + std::make_index_sequence<View::rank>()); +} + +template <class ExecutionSpace> +void test_view_memory_access_violations_from_host() { + Kokkos::DefaultHostExecutionSpace const host_exec_space{}; + // clang-format off + using V0 = Kokkos::View<int, ExecutionSpace>; + using V1 = Kokkos::View<int*, ExecutionSpace>; + using V2 = Kokkos::View<int**, ExecutionSpace>; + using V3 = Kokkos::View<int***, ExecutionSpace>; + using V4 = Kokkos::View<int****, ExecutionSpace>; + using V5 = Kokkos::View<int*****, ExecutionSpace>; + using V6 = Kokkos::View<int******, ExecutionSpace>; + using V7 = Kokkos::View<int*******, ExecutionSpace>; + using V8 = Kokkos::View<int********, ExecutionSpace>; + std::string const prefix = "Kokkos::View ERROR: attempt to access inaccessible memory space"; + std::string const lbl = "my_label"; + test_view_memory_access_violation(make_view<V0>(lbl), host_exec_space, prefix + ".*" + lbl); + test_view_memory_access_violation(make_view<V1>(lbl), host_exec_space, prefix + ".*" + lbl); + test_view_memory_access_violation(make_view<V2>(lbl), host_exec_space, prefix + ".*" + lbl); + test_view_memory_access_violation(make_view<V3>(lbl), host_exec_space, prefix + ".*" + lbl); + test_view_memory_access_violation(make_view<V4>(lbl), host_exec_space, prefix + ".*" + lbl); + test_view_memory_access_violation(make_view<V5>(lbl), host_exec_space, prefix + ".*" + lbl); + test_view_memory_access_violation(make_view<V6>(lbl), host_exec_space, prefix + ".*" + lbl); + test_view_memory_access_violation(make_view<V7>(lbl), host_exec_space, prefix + ".*" + lbl); + test_view_memory_access_violation(make_view<V8>(lbl), host_exec_space, prefix + ".*" + lbl); + int* const ptr = nullptr; + test_view_memory_access_violation(make_view<V0>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + test_view_memory_access_violation(make_view<V1>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + test_view_memory_access_violation(make_view<V2>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + test_view_memory_access_violation(make_view<V3>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + test_view_memory_access_violation(make_view<V4>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + test_view_memory_access_violation(make_view<V5>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + test_view_memory_access_violation(make_view<V6>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + test_view_memory_access_violation(make_view<V7>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + test_view_memory_access_violation(make_view<V8>(ptr), host_exec_space, prefix + ".*UNMANAGED"); + // clang-format on +} + +template <class ExecutionSpace> +void test_view_memory_access_violations_from_device() { + ExecutionSpace const exec_space{}; + // clang-format off + using V0 = Kokkos::View<int, Kokkos::HostSpace>; + using V1 = Kokkos::View<int*, Kokkos::HostSpace>; + using V2 = Kokkos::View<int**, Kokkos::HostSpace>; + using V3 = Kokkos::View<int***, Kokkos::HostSpace>; + using V4 = Kokkos::View<int****, Kokkos::HostSpace>; + using V5 = Kokkos::View<int*****, Kokkos::HostSpace>; + using V6 = Kokkos::View<int******, Kokkos::HostSpace>; + using V7 = Kokkos::View<int*******, Kokkos::HostSpace>; + using V8 = Kokkos::View<int********, Kokkos::HostSpace>; + std::string const prefix = "Kokkos::View ERROR: attempt to access inaccessible memory space"; + std::string const lbl = "my_label"; + test_view_memory_access_violation(make_view<V0>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V1>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V2>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V3>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V4>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V5>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V6>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V7>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V8>(lbl), exec_space, prefix + ".*UNAVAILABLE"); + int* const ptr = nullptr; + test_view_memory_access_violation(make_view<V0>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V1>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V2>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V3>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V4>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V5>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V6>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V7>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + test_view_memory_access_violation(make_view<V8>(ptr), exec_space, prefix + ".*UNAVAILABLE"); + // clang-format on +} + +// FIXME_SYCL +#if !(defined(KOKKOS_COMPILER_INTEL) && defined(KOKKOS_ENABLE_SYCL)) +TEST(TEST_CATEGORY_DEATH, view_memory_access_violations_from_host) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using ExecutionSpace = TEST_EXECSPACE; + + if (Kokkos::SpaceAccessibility< + /*AccessSpace=*/Kokkos::HostSpace, + /*MemorySpace=*/typename ExecutionSpace::memory_space>::accessible) { + GTEST_SKIP() << "skipping since no memory access violation would occur"; + } + + test_view_memory_access_violations_from_host<ExecutionSpace>(); +} +#endif + +TEST(TEST_CATEGORY_DEATH, view_memory_access_violations_from_device) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using ExecutionSpace = TEST_EXECSPACE; + + if (Kokkos::SpaceAccessibility< + /*AccessSpace=*/ExecutionSpace, + /*MemorySpace=*/Kokkos::HostSpace>::accessible) { + GTEST_SKIP() << "skipping since no memory access violation would occur"; + } + +#if defined(KOKKOS_IMPL_HIP_ABORT_DOES_NOT_PRINT_MESSAGE) + if (std::is_same<ExecutionSpace, Kokkos::Experimental::HIP>::value) { + GTEST_SKIP() << "skipping because not yet supported with HIP toolchain"; + } +#endif +#if defined(KOKKOS_ENABLE_SYCL) && defined(NDEBUG) // FIXME_SYCL + if (std::is_same<ExecutionSpace, Kokkos::Experimental::SYCL>::value) { + GTEST_SKIP() << "skipping SYCL device-side abort does not work when NDEBUG " + "is defined"; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) // FIXME_OPENMPTARGET + if (std::is_same<ExecutionSpace, Kokkos::Experimental::OpenMPTarget>::value) { + GTEST_SKIP() << "skipping because OpenMPTarget backend is currently not " + "able to abort from the device"; + } +#endif + + test_view_memory_access_violations_from_device<ExecutionSpace>(); +} +#endif diff --git a/packages/kokkos/core/unit_test/TestViewOfClass.hpp b/packages/kokkos/core/unit_test/TestViewOfClass.hpp index 634f1da73008e60fc9c761de25655a928879383a..e9128debf7d3ba9c8edf841e3565ce528691ba54 100644 --- a/packages/kokkos/core/unit_test/TestViewOfClass.hpp +++ b/packages/kokkos/core/unit_test/TestViewOfClass.hpp @@ -45,7 +45,6 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestViewSpaceAssign.hpp b/packages/kokkos/core/unit_test/TestViewSpaceAssign.hpp index d1dfb7c512f03a6183fbc617486a29af0ffee59a..b7c50d63621e09e97880d7f1ab8fc67288c60eaa 100644 --- a/packages/kokkos/core/unit_test/TestViewSpaceAssign.hpp +++ b/packages/kokkos/core/unit_test/TestViewSpaceAssign.hpp @@ -45,7 +45,6 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <stdexcept> #include <sstream> #include <iostream> diff --git a/packages/kokkos/core/unit_test/TestViewSubview.hpp b/packages/kokkos/core/unit_test/TestViewSubview.hpp index 43bbb4320ab117bcd1e16129bec9d1e952fa88a5..3114d41be3c586bf96f8fffa8f94454bddc66391 100644 --- a/packages/kokkos/core/unit_test/TestViewSubview.hpp +++ b/packages/kokkos/core/unit_test/TestViewSubview.hpp @@ -46,7 +46,6 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <stdexcept> #include <sstream> #include <iostream> #include <type_traits> @@ -68,14 +67,14 @@ struct static_assert_predicate_true_impl; template <template <class...> class predicate, class... message, class... args> struct static_assert_predicate_true_impl< - typename std::enable_if<predicate<args...>::type::value>::type, predicate, + std::enable_if_t<predicate<args...>::type::value>, predicate, static_predicate_message<message...>, args...> { using type = int; }; template <template <class...> class predicate, class... message, class... args> struct static_assert_predicate_true_impl< - typename std::enable_if<!predicate<args...>::type::value>::type, predicate, + std::enable_if_t<!predicate<args...>::type::value>, predicate, static_predicate_message<message...>, args...> { using type = typename _kokkos____________________static_test_failure_____< message...>::type; diff --git a/packages/kokkos/core/unit_test/TestWithoutInitializing.hpp b/packages/kokkos/core/unit_test/TestWithoutInitializing.hpp index 8a58888c7c96ba726fd828fe75b3532fe4cb36b7..2bf93ab3f67d75911bb0aa7218cf1ed9f1cdeed9 100644 --- a/packages/kokkos/core/unit_test/TestWithoutInitializing.hpp +++ b/packages/kokkos/core/unit_test/TestWithoutInitializing.hpp @@ -55,6 +55,8 @@ TEST(TEST_CATEGORY, resize_realloc_no_init) { [&]() { Kokkos::resize(Kokkos::WithoutInitializing, bla, 5, 6, 7, 9); Kokkos::realloc(Kokkos::WithoutInitializing, bla, 8, 8, 8, 8); + Kokkos::realloc(Kokkos::view_alloc(Kokkos::WithoutInitializing), bla, 5, + 6, 7, 8); }, [&](BeginParallelForEvent event) { if (event.descriptor().find("initialization") != std::string::npos) @@ -96,3 +98,266 @@ TEST(TEST_CATEGORY, resize_realloc_no_alloc) { ASSERT_TRUE(success); listen_tool_events(Config::DisableAll()); } + +TEST(TEST_CATEGORY, realloc_exec_space) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<typename TEST_EXECSPACE::memory_space, + Kokkos::CudaUVMSpace>::value) + GTEST_SKIP() << "skipping since CudaUVMSpace requires additional fences"; +#endif +// FIXME_OPENMPTARGET The OpenMPTarget backend doesn't implement allocate taking +// an execution space instance properly so it needs another fence +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>::value) + GTEST_SKIP() << "skipping since the OpenMPTarget backend doesn't implement " + "allocate taking an execution space instance properly"; +#endif + + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences()); + using view_type = Kokkos::View<int*, TEST_EXECSPACE>; + view_type outer_view, outer_view2; + + auto success = validate_absence( + [&]() { + view_type inner_view(Kokkos::view_alloc(TEST_EXECSPACE{}, "bla"), 8); + // Avoid testing the destructor + outer_view = inner_view; + Kokkos::realloc( + Kokkos::view_alloc(Kokkos::WithoutInitializing, TEST_EXECSPACE{}), + inner_view, 10); + outer_view2 = inner_view; + Kokkos::realloc(Kokkos::view_alloc(TEST_EXECSPACE{}), inner_view, 10); + }, + [&](BeginFenceEvent event) { + if ((event.descriptor().find("Debug Only Check for Execution Error") != + std::string::npos) || + (event.descriptor().find("HostSpace fence") != std::string::npos)) + return MatchDiagnostic{false}; + return MatchDiagnostic{true, {"Found fence event!"}}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +namespace { +struct NonTriviallyCopyable { + KOKKOS_FUNCTION NonTriviallyCopyable() {} + KOKKOS_FUNCTION NonTriviallyCopyable(const NonTriviallyCopyable&) {} +}; +} // namespace + +TEST(TEST_CATEGORY, view_alloc) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences()); + using view_type = Kokkos::View<NonTriviallyCopyable*, TEST_EXECSPACE>; + view_type outer_view; + + auto success = validate_existence( + [&]() { + view_type inner_view(Kokkos::view_alloc("bla"), 8); + // Avoid testing the destructor + outer_view = inner_view; + }, + [&](BeginFenceEvent event) { + return MatchDiagnostic{ + event.descriptor().find( + "Kokkos::Impl::ViewValueFunctor: View init/destroy fence") != + std::string::npos}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, view_alloc_exec_space) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences()); + using view_type = Kokkos::View<NonTriviallyCopyable*, TEST_EXECSPACE>; + view_type outer_view; + + auto success = validate_absence( + [&]() { + view_type inner_view(Kokkos::view_alloc(TEST_EXECSPACE{}, "bla"), 8); + // Avoid testing the destructor + outer_view = inner_view; + }, + [&](BeginFenceEvent event) { + return MatchDiagnostic{ + event.descriptor().find( + "Kokkos::Impl::ViewValueFunctor: View init/destroy fence") != + std::string::npos}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, view_alloc_int) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences()); + using view_type = Kokkos::View<int*, TEST_EXECSPACE>; + view_type outer_view; + + auto success = validate_existence( + [&]() { + view_type inner_view("bla", 8); + // Avoid testing the destructor + outer_view = inner_view; + }, + [&](BeginFenceEvent event) { + return MatchDiagnostic{ + event.descriptor().find( + "Kokkos::Impl::ViewValueFunctor: View init/destroy fence") != + std::string::npos}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, view_alloc_exec_space_int) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences()); + using view_type = Kokkos::View<int*, TEST_EXECSPACE>; + view_type outer_view; + + auto success = validate_absence( + [&]() { + view_type inner_view(Kokkos::view_alloc(TEST_EXECSPACE{}, "bla"), 8); + // Avoid testing the destructor + outer_view = inner_view; + }, + [&](BeginFenceEvent event) { + return MatchDiagnostic{ + event.descriptor().find( + "Kokkos::Impl::ViewValueFunctor: View init/destroy fence") != + std::string::npos}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, deep_copy_zero_memset) { +// FIXME_OPENMPTARGET The OpenMPTarget backend doesn't implement ZeroMemset +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>::value) + GTEST_SKIP() << "skipping since the OpenMPTarget backend doesn't implement " + "ZeroMemset"; +#endif + + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels()); + Kokkos::View<int*, TEST_EXECSPACE> bla("bla", 8); + + auto success = + validate_absence([&]() { Kokkos::deep_copy(bla, 0); }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found begin event"}}; + }, + [&](EndParallelForEvent) { + return MatchDiagnostic{true, {"Found end event"}}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, resize_exec_space) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableFences(), + Config::EnableKernels()); + Kokkos::View<int*** * [1][2][3][4], TEST_EXECSPACE> bla("bla", 8, 7, 6, 5); + + auto success = validate_absence( + [&]() { + Kokkos::resize( + Kokkos::view_alloc(TEST_EXECSPACE{}, Kokkos::WithoutInitializing), + bla, 5, 6, 7, 8); + }, + [&](BeginFenceEvent event) { + if (event.descriptor().find("Kokkos::resize(View)") != + std::string::npos) + return MatchDiagnostic{true, {"Found begin event"}}; + return MatchDiagnostic{false}; + }, + [&](EndFenceEvent event) { + if (event.descriptor().find("Kokkos::resize(View)") != + std::string::npos) + return MatchDiagnostic{true, {"Found end event"}}; + return MatchDiagnostic{false}; + }, + [&](BeginParallelForEvent event) { + if (event.descriptor().find("initialization") != std::string::npos) + return MatchDiagnostic{true, {"Found begin event"}}; + return MatchDiagnostic{false}; + }, + [&](EndParallelForEvent event) { + if (event.descriptor().find("initialization") != std::string::npos) + return MatchDiagnostic{true, {"Found end event"}}; + return MatchDiagnostic{false}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, view_allocation_int) { +// FIXME_OPENMPTARGET +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>::value) + GTEST_SKIP() << "skipping since the OpenMPTarget has unexpected fences"; +#endif + + using ExecutionSpace = TEST_EXECSPACE; + if (Kokkos::SpaceAccessibility< + /*AccessSpace=*/Kokkos::HostSpace, + /*MemorySpace=*/ExecutionSpace::memory_space>::accessible) { + GTEST_SKIP() << "skipping since the fence checked for isn't necessary"; + } + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::EnableAll()); + using view_type = Kokkos::View<int*, TEST_EXECSPACE>; + view_type outer_view; + + auto success = validate_existence( + [&]() { + view_type inner_view( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "bla"), 8); + // Avoid testing the destructor + outer_view = inner_view; + }, + [&](BeginFenceEvent event) { + return MatchDiagnostic{ + event.descriptor().find( + "fence after copying header from HostSpace") != + std::string::npos}; + }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} + +TEST(TEST_CATEGORY, view_allocation_exec_space_int) { +#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET + if (std::is_same<TEST_EXECSPACE, Kokkos::Experimental::OpenMPTarget>::value) + GTEST_SKIP() << "skipping since the OpenMPTarget has unexpected fences"; +#endif + +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<TEST_EXECSPACE::memory_space, Kokkos::CudaUVMSpace>::value) + GTEST_SKIP() + << "skipping since the CudaUVMSpace requires additiional fences"; +#endif + + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::EnableAll()); + using view_type = Kokkos::View<int*, TEST_EXECSPACE>; + view_type outer_view; + + auto success = validate_absence( + [&]() { + view_type inner_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, + TEST_EXECSPACE{}, "bla"), + 8); + // Avoid testing the destructor + outer_view = inner_view; + }, + [&](BeginFenceEvent) { return MatchDiagnostic{true}; }); + ASSERT_TRUE(success); + listen_tool_events(Config::DisableAll()); +} diff --git a/packages/kokkos/core/unit_test/category_files/TestHIPHostPinned_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestHIPHostPinned_Category.hpp index 12c69926c7bfc10ec7fef02d9e96c39691c557d6..dace3924a32e43427b23ffccabd80d16df731685 100644 --- a/packages/kokkos/core/unit_test/category_files/TestHIPHostPinned_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestHIPHostPinned_Category.hpp @@ -48,6 +48,7 @@ #include <gtest/gtest.h> #define TEST_CATEGORY hip_hostpinned +#define TEST_CATEGORY_DEATH hip_hostpinned_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::HIPHostPinnedSpace #endif diff --git a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Numeric.hpp b/packages/kokkos/core/unit_test/category_files/TestHIPManaged_Category.hpp similarity index 82% rename from packages/kokkos/algorithms/src/std_algorithms/Kokkos_Numeric.hpp rename to packages/kokkos/core/unit_test/category_files/TestHIPManaged_Category.hpp index 793927e99af23326d5c882e894a4287f06ee1004..1ff88a4841b2ef032c38775a23a33f3c64c918f2 100644 --- a/packages/kokkos/algorithms/src/std_algorithms/Kokkos_Numeric.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestHIPManaged_Category.hpp @@ -42,18 +42,13 @@ //@HEADER */ -#ifndef KOKKOS_STD_NUMERIC_ALL_HPP -#define KOKKOS_STD_NUMERIC_ALL_HPP +#ifndef KOKKOS_TEST_HIPUNIFIED_HPP +#define KOKKOS_TEST_HIPUNIFIED_HPP -#include "./numeric/Kokkos_AdjacentDifference.hpp" +#include <gtest/gtest.h> -// contains exclusive_scan, transform_exclusive_scan -#include "./numeric/Kokkos_ExclusiveScan.hpp" - -// contains inclusive_scan, transform_inclusive_scan -#include "./numeric/Kokkos_InclusiveScan.hpp" - -#include "./numeric/Kokkos_Reduce.hpp" -#include "./numeric/Kokkos_TransformReduce.hpp" +#define TEST_CATEGORY hip_managed +#define TEST_CATEGORY_DEATH hip_managed_DeathTest +#define TEST_EXECSPACE Kokkos::Experimental::HIPManagedSpace #endif diff --git a/packages/kokkos/core/unit_test/category_files/TestHIP_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestHIP_Category.hpp index 0a9fe5a08f1167a6c407e9866e8e6ad130053986..a0a34f5c954d58e24e2a85867a94d04e4eee7ca6 100644 --- a/packages/kokkos/core/unit_test/category_files/TestHIP_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestHIP_Category.hpp @@ -49,6 +49,7 @@ #define TEST_CATEGORY hip #define TEST_CATEGORY_NUMBER 6 +#define TEST_CATEGORY_DEATH hip_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::HIP #endif diff --git a/packages/kokkos/core/unit_test/category_files/TestSYCLHostUSM_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestSYCLHostUSM_Category.hpp index 0287829fd61e88d19449e7e82d0b9727a5413fb3..4c1996bfaa73eea936d8f3a6e15340ebd5408fb8 100644 --- a/packages/kokkos/core/unit_test/category_files/TestSYCLHostUSM_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestSYCLHostUSM_Category.hpp @@ -48,6 +48,7 @@ #include <gtest/gtest.h> #define TEST_CATEGORY sycl_host_usm +#define TEST_CATEGORY_DEATH sycl_host_usm_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::SYCLHostUSMSpace #endif diff --git a/packages/kokkos/core/unit_test/category_files/TestSYCLSharedUSM_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestSYCLSharedUSM_Category.hpp index 1ec89fc61a594989f58b5076af6477be051183e8..4421559180c4e83348419bea0172e25a79660766 100644 --- a/packages/kokkos/core/unit_test/category_files/TestSYCLSharedUSM_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestSYCLSharedUSM_Category.hpp @@ -48,6 +48,7 @@ #include <gtest/gtest.h> #define TEST_CATEGORY sycl_shared_usm +#define TEST_CATEGORY_DEATH sycl_shared_usm_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::SYCLSharedUSMSpace #endif diff --git a/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp index 345f40d1c39f403dd62369c8cfa668ed1c75a951..efa84ad39f0ad5f12e96c2eb5a908fb505ba5bd5 100644 --- a/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp @@ -49,6 +49,7 @@ #define TEST_CATEGORY sycl #define TEST_CATEGORY_NUMBER 7 +#define TEST_CATEGORY_DEATH sycl_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::SYCL #endif diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Init.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Init.cpp index d09d4edfdad12e7db332c279398247bfda9ca80a..31fd63f08437520614c54714fd611d1bd87db9e3 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Init.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Init.cpp @@ -61,8 +61,7 @@ __global__ void offset(int* p) { TEST(cuda, raw_cuda_interop) { int* p; KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&p, sizeof(int) * 100)); - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); + Kokkos::initialize(); Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v(p, 100); Kokkos::deep_copy(v, 5); diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp index 13388b4c5472c5441d33e9fbfb8f99a995bdcdf0..f11f657e004d7955dc1cdcff8518ad4b58e443d6 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp @@ -50,8 +50,7 @@ namespace Test { TEST(cuda, raw_cuda_streams) { cudaStream_t stream; cudaStreamCreate(&stream); - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); + Kokkos::initialize(); int* p; cudaMalloc(&p, sizeof(int) * 100); using MemorySpace = typename TEST_EXECSPACE::memory_space; diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_ReducerViewSizeLimit.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_ReducerViewSizeLimit.cpp index 3d7498b11ca528684d9347ae17310694270384c2..b8ae97f5861b9d0552d1014b58a5041bd9e20c4c 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_ReducerViewSizeLimit.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_ReducerViewSizeLimit.cpp @@ -77,14 +77,6 @@ struct ArrayReduceFunctor { } } - KOKKOS_INLINE_FUNCTION void join(volatile value_type update, - const volatile value_type source) const { - const int numVecs = value_count; - for (int j = 0; j < numVecs; ++j) { - update[j] += source[j]; - } - } - KOKKOS_INLINE_FUNCTION void join(value_type update, const value_type source) const { const int numVecs = value_count; diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeViewAPI.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeViewAPI.cpp index 46556a20141739130505e96dbb48d248d4f9289f..c0d6a8afe878c44791d3133352fdde343cb02810 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeViewAPI.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeViewAPI.cpp @@ -67,9 +67,9 @@ struct TestViewAPI< Kokkos::MemoryTraits<0>; // maybe we want to add that later to the matrix using view_type = Kokkos::View<data_type, layout_type, space_type, traits_type>; - using alloc_layout_type = typename std::conditional< - std::is_same<layout_type, Kokkos::LayoutStride>::value, - Kokkos::LayoutLeft, layout_type>::type; + using alloc_layout_type = + std::conditional_t<std::is_same<layout_type, Kokkos::LayoutStride>::value, + Kokkos::LayoutLeft, layout_type>; using d_alloc_type = Kokkos::View<data_type, alloc_layout_type, space_type>; using h_alloc_type = typename Kokkos::View<data_type, alloc_layout_type, space_type>::HostMirror; diff --git a/packages/kokkos/core/unit_test/headers_self_contained/tstHeader.cpp b/packages/kokkos/core/unit_test/headers_self_contained/tstHeader.cpp index d488f0fa36adb2eb3b509a245dcce30b5cc90a76..9f249045be9d85001f92abb5906383d570097460 100644 --- a/packages/kokkos/core/unit_test/headers_self_contained/tstHeader.cpp +++ b/packages/kokkos/core/unit_test/headers_self_contained/tstHeader.cpp @@ -4,6 +4,8 @@ #define KOKKOS_HEADER_TO_TEST \ KOKKOS_HEADER_TEST_STRINGIZE(KOKKOS_HEADER_TEST_NAME) +#define KOKKOS_IMPL_PUBLIC_INCLUDE + // include header twice to see if the include guards are set correctly #include KOKKOS_HEADER_TO_TEST #include KOKKOS_HEADER_TO_TEST diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Init.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Init.cpp index 73d08abca9d396464e8ba538e6e228c4ad70628b..af20e753d4c99ea9941d4bbd19aa61b7894d1f3a 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Init.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Init.cpp @@ -61,8 +61,7 @@ __global__ void offset(int* p) { TEST(hip, raw_hip_interop) { int* p; KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&p, sizeof(int) * 100)); - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); + Kokkos::initialize(); Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v(p, 100); Kokkos::deep_copy(v, 5); diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Streams.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Streams.cpp index 69ca62df6a3a3e95cc77fb4354b96eb6a16e0c2d..95d102d4d1c2628ca4964d7598235bb531683912 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Streams.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Streams.cpp @@ -52,8 +52,7 @@ namespace Test { TEST(hip, raw_hip_streams) { hipStream_t stream; KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamCreate(&stream)); - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); + Kokkos::initialize(); int* p; KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&p, sizeof(int) * 100)); using MemorySpace = typename TEST_EXECSPACE::memory_space; diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp new file mode 100644 index 0000000000000000000000000000000000000000..24f48c6599876e9ee68949d65325e75d79590dcd --- /dev/null +++ b/packages/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp @@ -0,0 +1,86 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <TestHIP_Category.hpp> + +namespace { + +template <class HIPMemoryContainer> +bool checkMemoryCoarseGrainedness(HIPMemoryContainer const& container) { + auto size = container.size(); + auto allocationSize = HIPMemoryContainer::required_allocation_size(size); + hipMemRangeCoherencyMode memInfo; + + KOKKOS_IMPL_HIP_SAFE_CALL(hipMemRangeGetAttribute( + &memInfo, sizeof(hipMemRangeCoherencyMode), + hipMemRangeAttributeCoherencyMode, container.data(), allocationSize)); + + return (hipMemRangeCoherencyModeCoarseGrain == memInfo); +} + +#define KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(MEMORY_SPACE, DATATYPE, SIZE) \ + { \ + Kokkos::View<DATATYPE*, MEMORY_SPACE> view(#MEMORY_SPACE, SIZE); \ + ASSERT_TRUE(view.is_allocated()) \ + << "View in " << #MEMORY_SPACE << " with size " << SIZE \ + << " was not allocated. This prevents checks of the grainedness."; \ + ASSERT_TRUE(checkMemoryCoarseGrainedness(view)) \ + << "The memory in views in " << #MEMORY_SPACE \ + << " is not coarse-grained. Kokkos relies on all user facing memory " \ + "being coarse-grained."; \ + } + +TEST(hip, memory_requirements) { + // we want all user-facing memory in hip to be coarse grained. As of + // today(07.01.22) the documentation is not reliable/correct, we test the + // memory on the device and host + KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::Experimental::HIPSpace, int, 10); + KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::Experimental::HIPHostPinnedSpace, + int, 10); + KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::Experimental::HIPManagedSpace, + int, 10); +} +} // namespace diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_ScanUnit.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_ScanUnit.cpp index b759d6f407a791fb3b88b86f502cc956780294f3..b44bc46fb7abc3cee5be16ec2a3fd3ce910aafa1 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_ScanUnit.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_ScanUnit.cpp @@ -60,8 +60,11 @@ __global__ void start_intra_block_scan() __syncthreads(); DummyFunctor f; - Kokkos::Impl::hip_intra_block_reduce_scan<true, DummyFunctor, void>(f, - values); + typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, + Kokkos::RangePolicy<Kokkos::Experimental::HIP>, DummyFunctor>::Reducer + reducer(&f); + Kokkos::Impl::hip_intra_block_reduce_scan<true>(reducer, values); __syncthreads(); if (values[i] != ((i + 2) * (i + 1)) / 2) { diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp index d20ea877ec9e1f4aee9f0df5c1d807790cdc932e..c9b370ea7b4466e02b5b6e13da8515ac56fe45c4 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp @@ -75,6 +75,16 @@ TEST(hip, space_access) { Kokkos::HostSpace, Kokkos::Experimental::HIPSpace>::accessible, ""); + static_assert( + !Kokkos::Impl::MemorySpaceAccess< + Kokkos::HostSpace, Kokkos::Experimental::HIPManagedSpace>::assignable, + ""); + + static_assert( + Kokkos::Impl::MemorySpaceAccess< + Kokkos::HostSpace, Kokkos::Experimental::HIPManagedSpace>::accessible, + ""); + //-------------------------------------- static_assert(Kokkos::Impl::MemorySpaceAccess< @@ -100,6 +110,16 @@ TEST(hip, space_access) { Kokkos::HostSpace>::accessible, ""); + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPSpace, + Kokkos::Experimental::HIPManagedSpace>::assignable, + ""); + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPSpace, + Kokkos::Experimental::HIPManagedSpace>::accessible, + ""); + //-------------------------------------- static_assert(Kokkos::Impl::MemorySpaceAccess< @@ -127,6 +147,53 @@ TEST(hip, space_access) { Kokkos::Experimental::HIPSpace>::accessible, ""); + static_assert(!Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPHostPinnedSpace, + Kokkos::Experimental::HIPManagedSpace>::assignable, + ""); + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPHostPinnedSpace, + Kokkos::Experimental::HIPManagedSpace>::accessible, + ""); + + //-------------------------------------- + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIPManagedSpace>::assignable, + ""); + + static_assert( + !Kokkos::Impl::MemorySpaceAccess<Kokkos::Experimental::HIPManagedSpace, + Kokkos::HostSpace>::assignable, + ""); + + static_assert( + !Kokkos::Impl::MemorySpaceAccess<Kokkos::Experimental::HIPManagedSpace, + Kokkos::HostSpace>::accessible, + ""); + + static_assert(!Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIPSpace>::assignable, + ""); + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIPSpace>::accessible, + ""); + + static_assert(!Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIPHostPinnedSpace>::assignable, + ""); + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIPHostPinnedSpace>::accessible, + ""); + //-------------------------------------- static_assert(!Kokkos::SpaceAccessibility<Kokkos::Experimental::HIP, @@ -143,6 +210,11 @@ TEST(hip, space_access) { Kokkos::Experimental::HIPHostPinnedSpace>::accessible, ""); + static_assert(Kokkos::SpaceAccessibility< + Kokkos::Experimental::HIP, + Kokkos::Experimental::HIPManagedSpace>::accessible, + ""); + static_assert( !Kokkos::SpaceAccessibility<Kokkos::HostSpace, Kokkos::Experimental::HIPSpace>::accessible, @@ -153,6 +225,11 @@ TEST(hip, space_access) { Kokkos::Experimental::HIPHostPinnedSpace>::accessible, ""); + static_assert( + Kokkos::SpaceAccessibility< + Kokkos::HostSpace, Kokkos::Experimental::HIPManagedSpace>::accessible, + ""); + static_assert( std::is_same< Kokkos::Impl::HostMirror<Kokkos::Experimental::HIPSpace>::Space, @@ -165,6 +242,14 @@ TEST(hip, space_access) { Kokkos::Experimental::HIPHostPinnedSpace>::value, ""); + static_assert( + std::is_same< + Kokkos::Impl::HostMirror< + Kokkos::Experimental::HIPManagedSpace>::Space, + Kokkos::Device<Kokkos::HostSpace::execution_space, + Kokkos::Experimental::HIPManagedSpace>>::value, + ""); + static_assert(Kokkos::SpaceAccessibility< Kokkos::Impl::HostMirror<Kokkos::Experimental::HIP>::Space, Kokkos::HostSpace>::accessible, @@ -181,6 +266,12 @@ TEST(hip, space_access) { Kokkos::Experimental::HIPHostPinnedSpace>::Space, Kokkos::HostSpace>::accessible, ""); + + static_assert(Kokkos::SpaceAccessibility< + Kokkos::Impl::HostMirror< + Kokkos::Experimental::HIPManagedSpace>::Space, + Kokkos::HostSpace>::accessible, + ""); } template <class MemSpace, class ExecSpace> @@ -227,6 +318,11 @@ TEST(hip, impl_view_accessible) { Kokkos::Experimental::HIP>::run(); TestViewHIPAccessible<Kokkos::Experimental::HIPHostPinnedSpace, Kokkos::HostSpace::execution_space>::run(); + + TestViewHIPAccessible<Kokkos::Experimental::HIPManagedSpace, + Kokkos::HostSpace::execution_space>::run(); + TestViewHIPAccessible<Kokkos::Experimental::HIPManagedSpace, + Kokkos::Experimental::HIP>::run(); } } // namespace Test diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp index 722614464b2e05ea86d0fbaccd8c18a4acdf8645..421e3c50bf675354e2081ee889c4e05ecfef524b 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp @@ -51,9 +51,8 @@ #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH #ifndef HPX_COMPUTE_DEVICE_CODE -namespace Test { - namespace { + struct FunctorInitConstant { Kokkos::View<int *, Kokkos::Experimental::HPX> a; int c; @@ -107,82 +106,75 @@ struct FunctorReduce { KOKKOS_INLINE_FUNCTION void operator()(const int i, int &lsum) const { lsum += a(i); } }; -} // namespace TEST(hpx, independent_instances) { - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); - const int n = 100; const int c = 1; const int d = 3; - { - Kokkos::View<int *, Kokkos::Experimental::HPX> v1("v1", n); - Kokkos::View<int *, Kokkos::Experimental::HPX> v2("v2", n); - Kokkos::View<int *, Kokkos::Experimental::HPX> v3("v3", n); - Kokkos::View<int *, Kokkos::Experimental::HPX> v4("v4", n); - Kokkos::View<int, Kokkos::Experimental::HPX> sum_v("sum_v"); - - Kokkos::Experimental::HPX hpx1( - Kokkos::Experimental::HPX::instance_mode::independent); - Kokkos::parallel_for( - "Test::hpx::independent_instances::init", - Kokkos::Experimental::require( - Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx1, 0, n), - Kokkos::Experimental::WorkItemProperty::HintLightWeight), - FunctorInitConstant(v1, c)); - - Kokkos::Experimental::HPX hpx2(hpx1.impl_get_future()); - Kokkos::parallel_for( - "Test::hpx::independent_instances::add", - Kokkos::Experimental::require( - Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx2, 0, n), - Kokkos::Experimental::WorkItemProperty::HintLightWeight), - FunctorAdd(v1, v2, d)); - - Kokkos::Experimental::HPX hpx3(hpx1.impl_get_future()); - Kokkos::parallel_for( - "Test::hpx::independent_instances::add_index", - Kokkos::Experimental::require( - Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx3, 0, n), - Kokkos::Experimental::WorkItemProperty::HintLightWeight), - FunctorAddIndex(v1, v3)); - - // NOTE: This monstrosity is used to collapse a future<tuple<future<void>, - // future<void>>> (return type of when_all) into a future<void> which is - // ready whenever the un-collapsed future would've been ready. HPX does not - // currently have the functionality to collapse this automatically. - Kokkos::Experimental::HPX hpx4(hpx::get<0>(hpx::split_future( - hpx::when_all(hpx2.impl_get_future(), hpx3.impl_get_future())))); - Kokkos::parallel_for( - "Test::hpx::independent_instances::pointwise_sum", - Kokkos::Experimental::require( - Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx4, 0, n), - Kokkos::Experimental::WorkItemProperty::HintLightWeight), - FunctorPointwiseSum(v2, v3, v4)); - - Kokkos::parallel_reduce( - "Test::hpx::independent_instances::reduce", - Kokkos::Experimental::require( - Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx4, 0, n), - Kokkos::Experimental::WorkItemProperty::HintLightWeight), - FunctorReduce(v4), Kokkos::Sum<int>(sum_v)); - - hpx4.fence(); - - ASSERT_EQ(true, hpx1.impl_get_future().is_ready()); - ASSERT_EQ(true, hpx2.impl_get_future().is_ready()); - ASSERT_EQ(true, hpx3.impl_get_future().is_ready()); - ASSERT_EQ(true, hpx4.impl_get_future().is_ready()); - - const int expected_sum = n * (2 * c + d) + (n * (n - 1) / 2); - ASSERT_EQ(expected_sum, sum_v()); - } - - Kokkos::finalize(); + Kokkos::View<int *, Kokkos::Experimental::HPX> v1("v1", n); + Kokkos::View<int *, Kokkos::Experimental::HPX> v2("v2", n); + Kokkos::View<int *, Kokkos::Experimental::HPX> v3("v3", n); + Kokkos::View<int *, Kokkos::Experimental::HPX> v4("v4", n); + Kokkos::View<int, Kokkos::Experimental::HPX> sum_v("sum_v"); + + Kokkos::Experimental::HPX hpx1( + Kokkos::Experimental::HPX::instance_mode::independent); + Kokkos::parallel_for( + "Test::hpx::independent_instances::init", + Kokkos::Experimental::require( + Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx1, 0, n), + Kokkos::Experimental::WorkItemProperty::HintLightWeight), + FunctorInitConstant(v1, c)); + + Kokkos::Experimental::HPX hpx2(hpx1.impl_get_future()); + Kokkos::parallel_for( + "Test::hpx::independent_instances::add", + Kokkos::Experimental::require( + Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx2, 0, n), + Kokkos::Experimental::WorkItemProperty::HintLightWeight), + FunctorAdd(v1, v2, d)); + + Kokkos::Experimental::HPX hpx3(hpx1.impl_get_future()); + Kokkos::parallel_for( + "Test::hpx::independent_instances::add_index", + Kokkos::Experimental::require( + Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx3, 0, n), + Kokkos::Experimental::WorkItemProperty::HintLightWeight), + FunctorAddIndex(v1, v3)); + + // NOTE: This monstrosity is used to collapse a future<tuple<future<void>, + // future<void>>> (return type of when_all) into a future<void> which is + // ready whenever the un-collapsed future would've been ready. HPX does not + // currently have the functionality to collapse this automatically. + Kokkos::Experimental::HPX hpx4(hpx::get<0>(hpx::split_future( + hpx::when_all(hpx2.impl_get_future(), hpx3.impl_get_future())))); + Kokkos::parallel_for( + "Test::hpx::independent_instances::pointwise_sum", + Kokkos::Experimental::require( + Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx4, 0, n), + Kokkos::Experimental::WorkItemProperty::HintLightWeight), + FunctorPointwiseSum(v2, v3, v4)); + + Kokkos::parallel_reduce( + "Test::hpx::independent_instances::reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx4, 0, n), + Kokkos::Experimental::WorkItemProperty::HintLightWeight), + FunctorReduce(v4), Kokkos::Sum<int>(sum_v)); + + hpx4.fence(); + + ASSERT_EQ(true, hpx1.impl_get_future().is_ready()); + ASSERT_EQ(true, hpx2.impl_get_future().is_ready()); + ASSERT_EQ(true, hpx3.impl_get_future().is_ready()); + ASSERT_EQ(true, hpx4.impl_get_future().is_ready()); + + const int expected_sum = n * (2 * c + d) + (n * (n - 1) / 2); + ASSERT_EQ(expected_sum, sum_v()); } -} // namespace Test + +} // namespace #endif #endif diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp index ae0d8b5ab2e6c5c9034fdef228e56fa6e39e1fa1..872bf2fd5031d51930a066268c6dff3483b7f986 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp @@ -49,36 +49,30 @@ #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH -namespace Test { +namespace { -TEST(hpx, delayed_execution) { - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); +TEST(hpx, independent_instances_delayed_execution) { + Kokkos::View<bool, Kokkos::Experimental::HPX> ran("ran"); + hpx::lcos::local::promise<void> p; + hpx::shared_future<void> f = p.get_future(); - { - Kokkos::View<bool, Kokkos::Experimental::HPX> ran("ran"); - hpx::lcos::local::promise<void> p; - hpx::shared_future<void> f = p.get_future(); + Kokkos::Experimental::HPX hpx(f); + Kokkos::parallel_for( + "Test::hpx::independent_instances::delay_execution", + Kokkos::Experimental::require( + Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx, 0, 1), + Kokkos::Experimental::WorkItemProperty::HintLightWeight), + KOKKOS_LAMBDA(int) { ran() = true; }); - Kokkos::Experimental::HPX hpx(f); - Kokkos::parallel_for( - "Test::hpx::independent_instances::delay_execution", - Kokkos::Experimental::require( - Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx, 0, 1), - Kokkos::Experimental::WorkItemProperty::HintLightWeight), - KOKKOS_LAMBDA(int) { ran() = true; }); + ASSERT_FALSE(ran()); + ASSERT_FALSE(hpx.impl_get_future().is_ready()); - ASSERT_EQ(false, ran()); - ASSERT_EQ(false, hpx.impl_get_future().is_ready()); + p.set_value(); - p.set_value(); - - hpx.fence(); - ASSERT_EQ(true, hpx.impl_get_future().is_ready()); - } - - Kokkos::finalize(); + hpx.fence(); + ASSERT_TRUE(hpx.impl_get_future().is_ready()); } -} // namespace Test + +} // namespace #endif diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp index 300cb111111ee242989733241d00425f9da1d0a4..899cd09d4f4d766101105e19765fc264245ea153 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp @@ -49,69 +49,63 @@ #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH -namespace Test { +namespace { -TEST(hpx, instance_ids) { - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); +TEST(hpx, independent_instances_instance_ids) { + Kokkos::Experimental::HPX hpx_default1; + Kokkos::Experimental::HPX hpx_default2 = hpx_default1; + Kokkos::Experimental::HPX hpx_default3{hpx_default1}; + Kokkos::Experimental::HPX hpx_default4( + Kokkos::Experimental::HPX::instance_mode::default_); + Kokkos::Experimental::HPX hpx_default5; + hpx_default5 = hpx_default1; - { - Kokkos::Experimental::HPX hpx_default1; - Kokkos::Experimental::HPX hpx_default2 = hpx_default1; - Kokkos::Experimental::HPX hpx_default3{hpx_default1}; - Kokkos::Experimental::HPX hpx_default4( - Kokkos::Experimental::HPX::instance_mode::default_); - Kokkos::Experimental::HPX hpx_default5; - hpx_default5 = hpx_default1; + ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), + hpx_default1.impl_instance_id()); + ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), + hpx_default2.impl_instance_id()); + ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), + hpx_default3.impl_instance_id()); + ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), + hpx_default4.impl_instance_id()); + ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), + hpx_default5.impl_instance_id()); - ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), - hpx_default1.impl_instance_id()); - ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), - hpx_default2.impl_instance_id()); - ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), - hpx_default3.impl_instance_id()); - ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), - hpx_default4.impl_instance_id()); - ASSERT_EQ(Kokkos::Experimental::HPX::impl_default_instance_id(), - hpx_default5.impl_instance_id()); + Kokkos::Experimental::HPX hpx_independent1( + Kokkos::Experimental::HPX::instance_mode::independent); + Kokkos::Experimental::HPX hpx_independent2 = hpx_independent1; + Kokkos::Experimental::HPX hpx_independent3{hpx_independent1}; + Kokkos::Experimental::HPX hpx_independent4; + hpx_independent4 = hpx_independent1; - Kokkos::Experimental::HPX hpx_independent1( - Kokkos::Experimental::HPX::instance_mode::independent); - Kokkos::Experimental::HPX hpx_independent2 = hpx_independent1; - Kokkos::Experimental::HPX hpx_independent3{hpx_independent1}; - Kokkos::Experimental::HPX hpx_independent4; - hpx_independent4 = hpx_independent1; + ASSERT_NE(hpx_default1.impl_instance_id(), + hpx_independent1.impl_instance_id()); + ASSERT_EQ(hpx_independent1.impl_instance_id(), + hpx_independent2.impl_instance_id()); + ASSERT_EQ(hpx_independent1.impl_instance_id(), + hpx_independent3.impl_instance_id()); + ASSERT_EQ(hpx_independent1.impl_instance_id(), + hpx_independent4.impl_instance_id()); - ASSERT_NE(hpx_default1.impl_instance_id(), - hpx_independent1.impl_instance_id()); - ASSERT_EQ(hpx_independent1.impl_instance_id(), - hpx_independent2.impl_instance_id()); - ASSERT_EQ(hpx_independent1.impl_instance_id(), - hpx_independent3.impl_instance_id()); - ASSERT_EQ(hpx_independent1.impl_instance_id(), - hpx_independent4.impl_instance_id()); + hpx::shared_future<void> f = hpx::make_ready_future<void>(); + Kokkos::Experimental::HPX hpx_independent_future1(f); + Kokkos::Experimental::HPX hpx_independent_future2 = hpx_independent_future1; + Kokkos::Experimental::HPX hpx_independent_future3{hpx_independent_future1}; + Kokkos::Experimental::HPX hpx_independent_future4; + hpx_independent_future4 = hpx_independent_future1; - hpx::shared_future<void> f = hpx::make_ready_future<void>(); - Kokkos::Experimental::HPX hpx_independent_future1(f); - Kokkos::Experimental::HPX hpx_independent_future2 = hpx_independent_future1; - Kokkos::Experimental::HPX hpx_independent_future3{hpx_independent_future1}; - Kokkos::Experimental::HPX hpx_independent_future4; - hpx_independent_future4 = hpx_independent_future1; - - ASSERT_NE(hpx_default1.impl_instance_id(), - hpx_independent1.impl_instance_id()); - ASSERT_NE(hpx_independent1.impl_instance_id(), - hpx_independent_future1.impl_instance_id()); - ASSERT_EQ(hpx_independent_future1.impl_instance_id(), - hpx_independent_future2.impl_instance_id()); - ASSERT_EQ(hpx_independent_future1.impl_instance_id(), - hpx_independent_future3.impl_instance_id()); - ASSERT_EQ(hpx_independent_future1.impl_instance_id(), - hpx_independent_future4.impl_instance_id()); - } - - Kokkos::finalize(); + ASSERT_NE(hpx_default1.impl_instance_id(), + hpx_independent1.impl_instance_id()); + ASSERT_NE(hpx_independent1.impl_instance_id(), + hpx_independent_future1.impl_instance_id()); + ASSERT_EQ(hpx_independent_future1.impl_instance_id(), + hpx_independent_future2.impl_instance_id()); + ASSERT_EQ(hpx_independent_future1.impl_instance_id(), + hpx_independent_future3.impl_instance_id()); + ASSERT_EQ(hpx_independent_future1.impl_instance_id(), + hpx_independent_future4.impl_instance_id()); } -} // namespace Test + +} // namespace #endif diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesRefCounting.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesRefCounting.cpp index a98c8b0d62339fa5c2e68124984d5b790b14f692..a69bea572f7bccc5108087f11fa6dbfca5afd4c9 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesRefCounting.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesRefCounting.cpp @@ -47,7 +47,6 @@ #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH -namespace Test { namespace { std::atomic<int> dummy_count; @@ -57,39 +56,32 @@ struct dummy { ~dummy() { --dummy_count; } void f() const {} }; -} // namespace + // This test makes sure the independent HPX instances don't hold on to captured // data after destruction. -TEST(hpx, reference_counting) { - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); - - { - dummy d; - Kokkos::Experimental::HPX hpx( - Kokkos::Experimental::HPX::instance_mode::independent); - Kokkos::parallel_for( - "Test::hpx::reference_counting::dummy", - Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx, 0, 1), - KOKKOS_LAMBDA(int) { - // Make sure dummy struct is captured. - d.f(); - }); +TEST(hpx, independent_instances_reference_counting) { + dummy d; + Kokkos::Experimental::HPX hpx( + Kokkos::Experimental::HPX::instance_mode::independent); + Kokkos::parallel_for( + "Test::hpx::reference_counting::dummy", + Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx, 0, 1), + KOKKOS_LAMBDA(int) { + // Make sure dummy struct is captured. + d.f(); + }); - // This attaches a continuation and releases the d captured above from the - // shared state of the internal future. - Kokkos::parallel_for( - "Test::hpx::reference_counting::dummy_clear", - Kokkos::RangePolicy<Kokkos::Experimental::HPX>(hpx, 0, 1), - KOKKOS_LAMBDA(int){}); + hpx.fence(); - hpx.fence(); + // The fence above makes sure that copies of dummy get released. However, + // all copies are not guaranteed to be released as soon as fence returns. + // Therefore we wait for a short time to make it almost guaranteed that all + // copies have been released. + std::this_thread::sleep_for(std::chrono::milliseconds(100)); - ASSERT_EQ(1, dummy_count); - } - - Kokkos::finalize(); + ASSERT_EQ(1, dummy_count); } -} // namespace Test + +} // namespace #endif diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp index 31c35ac9a7f0a3425948157cb7f2d3a4239691ad..e89f7acacc3a5b0c4dff58137dff4c067eeae02e 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp @@ -48,10 +48,10 @@ namespace Test { // Test whether allocations survive Kokkos initialize/finalize if done via Raw -// Cuda. +// HPX. TEST(hpx, raw_hpx_interop) { - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); + // FIXME_HPX + Kokkos::initialize(); Kokkos::finalize(); } } // namespace Test diff --git a/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp b/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp index f157af4f2ca14212b5e3f8a88ff7b676cd0d1c62..c14a90d749709d05ab7c87b6de032cc30f55cf5f 100644 --- a/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp +++ b/packages/kokkos/core/unit_test/incremental/Test01_execspace.hpp @@ -61,10 +61,10 @@ template <class ExecSpace> struct TestIncrExecSpaceTypedef { void testit() { const bool passed = - (!std::is_same<void, typename ExecSpace::memory_space>::value) && + (!std::is_void<typename ExecSpace::memory_space>::value) && std::is_same<ExecSpace, typename ExecSpace::execution_space>::value && - !std::is_same<void, typename ExecSpace::scratch_memory_space>::value && - !std::is_same<void, typename ExecSpace::array_layout>::value; + !std::is_void<typename ExecSpace::scratch_memory_space>::value && + !std::is_void<typename ExecSpace::array_layout>::value; static_assert(passed == true, "The memory and execution spaces are defined"); } diff --git a/packages/kokkos/core/unit_test/incremental/Test14_MDRangeReduce.hpp b/packages/kokkos/core/unit_test/incremental/Test14_MDRangeReduce.hpp index 7d53b9fb208ea490626634c304dd130b74392461..649cf6c3830143ad0e7c530e84d57c3484a5e9f9 100644 --- a/packages/kokkos/core/unit_test/incremental/Test14_MDRangeReduce.hpp +++ b/packages/kokkos/core/unit_test/incremental/Test14_MDRangeReduce.hpp @@ -71,12 +71,6 @@ struct MyComplex { _re += src._re; _im += src._im; } - - KOKKOS_INLINE_FUNCTION - void operator+=(const volatile MyComplex& src) volatile { - _re += src._re; - _im += src._im; - } }; template <class ExecSpace> diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP.hpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP.hpp index ce8ee40d454051e0edaff4ba25db390e7dd056bf..1039f13fec00e5c39149511ebfcd8c28edc24aa0 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP.hpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP.hpp @@ -74,7 +74,6 @@ #include <TestCXX11.hpp> #include <TestCXX11Deduction.hpp> #include <TestTeamVector.hpp> -#include <TestTemplateMetaFunctions.hpp> #include <TestPolicyConstruction.hpp> #include <TestMDRange.hpp> #include <TestConcurrentBitset.hpp> diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp index c3ee67673912bb8c8f022d03322d6e8b69adfd72..5f8fd2236680dc79edbd5dd62e031a9ab54c5ffb 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp @@ -62,8 +62,7 @@ TEST(openmp, raw_openmp_interop) { ASSERT_EQ(count, num_threads); - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); + Kokkos::initialize(); count = 0; #pragma omp parallel diff --git a/packages/kokkos/core/unit_test/openmptarget/TestOpenMPTarget.hpp b/packages/kokkos/core/unit_test/openmptarget/TestOpenMPTarget.hpp index edc1c24ddf298f8f00a3a451df8ca75a13cfa46c..3d8c722be4dbc46f7cc6039500b977fe6b68809b 100644 --- a/packages/kokkos/core/unit_test/openmptarget/TestOpenMPTarget.hpp +++ b/packages/kokkos/core/unit_test/openmptarget/TestOpenMPTarget.hpp @@ -76,7 +76,6 @@ //#include <TestCXX11.hpp> //#include <TestCXX11Deduction.hpp> #include <TestTeamVector.hpp> -//#include <TestTemplateMetaFunctions.hpp> //#include <TestPolicyConstruction.hpp> //#include <TestMDRange.hpp> diff --git a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init.cpp index d145d69d9e0feb4450bfff5080e9955115b5c49e..e45d990745e1ba3cc6c8d7e5e66c434745619872 100644 --- a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init.cpp +++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init.cpp @@ -52,11 +52,10 @@ namespace Test { // Test whether allocations survive Kokkos initialize/finalize if done via Raw // SYCL. TEST(sycl, raw_sycl_interop) { - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); + Kokkos::initialize(); Kokkos::Experimental::SYCL default_space; - sycl::context default_context = default_space.sycl_context(); + sycl::context default_context = default_space.sycl_queue().get_context(); sycl::default_selector device_selector; sycl::queue queue(default_context, device_selector); diff --git a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init_Context.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init_Context.cpp index c12c5c07295d73ddb0600d366f9c50faa6ba96df..114d2a4aa2bd13c1932baf3e812c0373029a5821 100644 --- a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init_Context.cpp +++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init_Context.cpp @@ -52,7 +52,7 @@ namespace Test { // Test whether external allocations can be accessed by the default queue. TEST(sycl, raw_sycl_interop_context_1) { Kokkos::Experimental::SYCL default_space; - sycl::context default_context = default_space.sycl_context(); + sycl::context default_context = default_space.sycl_queue().get_context(); sycl::default_selector device_selector; sycl::queue queue(default_context, device_selector); @@ -86,7 +86,7 @@ TEST(sycl, raw_sycl_interop_context_1) { // Test whether regular View allocations can be accessed by non-default queues. TEST(sycl, raw_sycl_interop_context_2) { Kokkos::Experimental::SYCL default_space; - sycl::context default_context = default_space.sycl_context(); + sycl::context default_context = default_space.sycl_queue().get_context(); sycl::default_selector device_selector; sycl::queue queue(default_context, device_selector); diff --git a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp index 40a88a6ca407387bfa0a3b3f23296a3eb2cad5f5..8ffada1dab1d255c3fec8486fe51cef06fb935a5 100644 --- a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp +++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp @@ -49,12 +49,8 @@ namespace Test { // Test Interoperability with SYCL Streams TEST(sycl, raw_sycl_queues) { sycl::default_selector device_selector; - // FIXME_SYCL using an in-order queue here should not be necessary since we - // are using submit_barrier for managing kernel dependencies but this seems to - // be required as a hot fix for now. - sycl::queue queue(device_selector, sycl::property::queue::in_order()); - Kokkos::InitArguments arguments{-1, -1, -1, false}; - Kokkos::initialize(arguments); + sycl::queue queue(device_selector); + Kokkos::initialize(); int* p = sycl::malloc_device<int>(100, queue); using MemorySpace = typename TEST_EXECSPACE::memory_space; diff --git a/packages/kokkos/core/unit_test/sycl/TestSYCL_TeamScratchStreams.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_TeamScratchStreams.cpp index ab0d09880f03b56d81ae693d26b5c838b2436a24..420522caf5036a087b69c928b63ec4100aca0325 100644 --- a/packages/kokkos/core/unit_test/sycl/TestSYCL_TeamScratchStreams.cpp +++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_TeamScratchStreams.cpp @@ -102,7 +102,7 @@ void sycl_queue_scratch_test( Kokkos::View<int64_t, Kokkos::Experimental::SYCLDeviceUSMSpace> counter) { constexpr int K = 4; Kokkos::Experimental::SYCL default_space; - sycl::context default_context = default_space.sycl_context(); + sycl::context default_context = default_space.sycl_queue().get_context(); sycl::default_selector device_selector; sycl::queue queue(default_context, device_selector); diff --git a/packages/kokkos/core/unit_test/tools/TestLogicalSpaces.hpp b/packages/kokkos/core/unit_test/tools/TestLogicalSpaces.hpp index df250fe0d979ab282a394519d85e73a85215da73..2fd43558f674a6760c8125aeeec4381c3da503cd 100644 --- a/packages/kokkos/core/unit_test/tools/TestLogicalSpaces.hpp +++ b/packages/kokkos/core/unit_test/tools/TestLogicalSpaces.hpp @@ -189,11 +189,14 @@ TEST(defaultdevicetype, chained_logical_spaces) { test_chained_spaces(); } TEST(defaultdevicetype, access_allowed) { test_allowed_access<fake_memory_space>(); } +// FIXME_SYCL +#if !(defined(KOKKOS_COMPILER_INTEL) && defined(KOKKOS_ENABLE_SYCL)) TEST(defaultdevicetype_DeathTest, access_forbidden) { ::testing::FLAGS_gtest_death_test_style = "threadsafe"; ASSERT_DEATH( { test_allowed_access<semantically_independent_logical_space>(); }, "Kokkos::View ERROR: attempt to access inaccessible memory space"); } +#endif } // namespace Test diff --git a/packages/kokkos/core/unit_test/tools/TestWithoutInitializing.cpp b/packages/kokkos/core/unit_test/tools/TestWithoutInitializing.cpp index c0a695d72004abce4e4d95bd1d5e96e72bf85c15..8d81098941eaeda73a2f9ac0f87bb412d927b10e 100644 --- a/packages/kokkos/core/unit_test/tools/TestWithoutInitializing.cpp +++ b/packages/kokkos/core/unit_test/tools/TestWithoutInitializing.cpp @@ -75,3 +75,79 @@ TEST(kokkosp, create_mirror_no_init) { }); ASSERT_TRUE(success); } + +TEST(kokkosp, create_mirror_no_init_view_ctor) { + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels()); + Kokkos::View<int*, Kokkos::DefaultExecutionSpace> device_view("device view", + 10); + Kokkos::View<int*, Kokkos::HostSpace> host_view("host view", 10); + + auto success = validate_absence( + [&]() { + auto mirror_device = Kokkos::create_mirror( + Kokkos::view_alloc(Kokkos::HostSpace{}, + Kokkos::WithoutInitializing), + device_view); + auto mirror_host = Kokkos::create_mirror( + Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing, + Kokkos::DefaultExecutionSpace{}), + host_view); + auto mirror_device_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::HostSpace{}, + Kokkos::WithoutInitializing), + device_view); + auto mirror_host_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing, + Kokkos::DefaultExecutionSpace{}), + host_view); + mirror_host_view = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::WithoutInitializing), host_view); + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found begin event"}}; + }, + [&](EndParallelForEvent) { + return MatchDiagnostic{true, {"Found end event"}}; + }); + ASSERT_TRUE(success); +} + +TEST(kokkosp, create_mirror_view_and_copy) { +#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET + if (std::is_same<Kokkos::DefaultExecutionSpace, + Kokkos::Experimental::OpenMPTarget>::value) + GTEST_SKIP() << "skipping since the OpenMPTarget has unexpected fences"; +#endif + +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same<Kokkos::DefaultExecutionSpace::memory_space, + Kokkos::CudaUVMSpace>::value) + GTEST_SKIP() + << "skipping since the CudaUVMSpace requires additional fences"; +#endif + + using namespace Kokkos::Test::Tools; + listen_tool_events(Config::DisableAll(), Config::EnableKernels(), + Config::EnableFences()); + Kokkos::View<int*, Kokkos::DefaultExecutionSpace> device_view; + Kokkos::View<int*, Kokkos::HostSpace> host_view("host view", 10); + + auto success = validate_absence( + [&]() { + auto mirror_device = Kokkos::create_mirror_view_and_copy( + Kokkos::view_alloc( + Kokkos::DefaultExecutionSpace{}, + typename Kokkos::DefaultExecutionSpace::memory_space{}), + host_view); + // Avoid fences for deallocation when mirror_device goes out of scope. + device_view = mirror_device; + }, + [&](BeginParallelForEvent) { + return MatchDiagnostic{true, {"Found parallel_for event"}}; + }, + [&](BeginFenceEvent) { + return MatchDiagnostic{true, {"Found fence event"}}; + }); + ASSERT_TRUE(success); +} diff --git a/packages/kokkos/core/unit_test/tools/include/ToolTestingUtilities.hpp b/packages/kokkos/core/unit_test/tools/include/ToolTestingUtilities.hpp index e5a03f7fb6b93126b7b51ab6665cb1431adca845..10dbea579964a14d7ad0f3acf1b23a8b2eb5221e 100644 --- a/packages/kokkos/core/unit_test/tools/include/ToolTestingUtilities.hpp +++ b/packages/kokkos/core/unit_test/tools/include/ToolTestingUtilities.hpp @@ -1300,6 +1300,24 @@ bool validate_absence(const Lambda& lam, const Matchers... matchers) { return true; } +template <class Lambda, class Matcher> +bool validate_existence(const Lambda& lam, const Matcher matcher) { + // First, erase events from previous invocations + found_events.clear(); + // Invoke the lambda (this will populate found_events, via tooling) + lam(); + // compare the found events against the expected ones + for (const auto& event : found_events) { + MatchDiagnostic match = check_presence_of(event, matcher); + + if (match.success) return true; + } + std::cout << "Test failure: Didn't encounter wanted events" << std::endl; + for (const auto& p_event : found_events) + std::cout << p_event->descriptor() << std::endl; + return false; +} + } // namespace Tools } // namespace Test } // namespace Kokkos diff --git a/packages/kokkos/example/build_cmake_installed/CMakeLists.txt b/packages/kokkos/example/build_cmake_installed/CMakeLists.txt index 48d2cff512b2e076f3922c751391da58a51e7f61..780f7e6ac6d110573a8958d567063e32edad3764 100644 --- a/packages/kokkos/example/build_cmake_installed/CMakeLists.txt +++ b/packages/kokkos/example/build_cmake_installed/CMakeLists.txt @@ -6,12 +6,6 @@ cmake_minimum_required(VERSION 3.16) # Kokkos flags will only apply to C++ files project(Example CXX Fortran) -# You need this for using Kokkos_ROOT variable -if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.12.0") - message(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables") - cmake_policy(SET CMP0074 NEW) -endif() - # Look for an installed Kokkos find_package(Kokkos REQUIRED) diff --git a/packages/kokkos/example/build_cmake_installed/cmake_example.cpp b/packages/kokkos/example/build_cmake_installed/cmake_example.cpp index fd05172cb83ff3052b0a054e2a72475825555d93..5101526ab865cdcd2e9e00250530a0db605da756 100644 --- a/packages/kokkos/example/build_cmake_installed/cmake_example.cpp +++ b/packages/kokkos/example/build_cmake_installed/cmake_example.cpp @@ -55,7 +55,7 @@ struct CountFunctor { int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); - Kokkos::DefaultExecutionSpace::print_configuration(std::cout); + Kokkos::DefaultExecutionSpace().print_configuration(std::cout); if (argc < 2) { fprintf(stderr, "Usage: %s [<kokkos_options>] <size>\n", argv[0]); diff --git a/packages/kokkos/example/build_cmake_installed_different_compiler/CMakeLists.txt b/packages/kokkos/example/build_cmake_installed_different_compiler/CMakeLists.txt index df16774e742e9f60a116a5a8dcdf93bcc17b0606..1647c6ca06ca648ede40de1c508bd837a9beaffa 100644 --- a/packages/kokkos/example/build_cmake_installed_different_compiler/CMakeLists.txt +++ b/packages/kokkos/example/build_cmake_installed_different_compiler/CMakeLists.txt @@ -6,10 +6,6 @@ cmake_minimum_required(VERSION 3.16) # Kokkos flags will only apply to C++ files project(Example CXX Fortran) -# You need this for using Kokkos_ROOT variable -message(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables") -cmake_policy(SET CMP0074 NEW) - # Look for an installed Kokkos but force using the compiler launcher # to ensure that targets depending on Kokkos use the same compiler # as when kokkos was installed, e.g. if kokkos was built with diff --git a/packages/kokkos/example/build_cmake_installed_different_compiler/foo.cpp b/packages/kokkos/example/build_cmake_installed_different_compiler/foo.cpp index fc10366f71bd9b0d421b18e935c2cea86925904b..f78f07c6f6ba00fcbbf154dcfcb3088d0be1c8fd 100644 --- a/packages/kokkos/example/build_cmake_installed_different_compiler/foo.cpp +++ b/packages/kokkos/example/build_cmake_installed_different_compiler/foo.cpp @@ -53,7 +53,7 @@ struct CountFunctor { int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); - Kokkos::DefaultExecutionSpace::print_configuration(std::cout); + Kokkos::DefaultExecutionSpace().print_configuration(std::cout); if (argc < 2) { fprintf(stderr, "Usage: %s [<kokkos_options>] <size>\n", argv[0]); diff --git a/packages/kokkos/example/build_cmake_installed_kk_as_language/cmake_example.cpp b/packages/kokkos/example/build_cmake_installed_kk_as_language/cmake_example.cpp index 5a0f93e9db3b871c2c8bfeae55f2661dcd67213b..b9b1c5848d1dd8a1fd082684bfadc8aebd1acb03 100644 --- a/packages/kokkos/example/build_cmake_installed_kk_as_language/cmake_example.cpp +++ b/packages/kokkos/example/build_cmake_installed_kk_as_language/cmake_example.cpp @@ -56,7 +56,7 @@ struct CountEvenIntegers { int main(int argc, char* argv[]) { Kokkos::ScopeGuard guard(argc, argv); - Kokkos::DefaultExecutionSpace::print_configuration(std::cout); + Kokkos::DefaultExecutionSpace().print_configuration(std::cout); const long n = argc > 1 ? atoi(argv[1]) : 10; diff --git a/packages/kokkos/example/tutorial/06_simple_mdrangepolicy/simple_mdrangepolicy.cpp b/packages/kokkos/example/tutorial/06_simple_mdrangepolicy/simple_mdrangepolicy.cpp index 5ac7f4fbb060ae952a0685313ec357ffa05abf96..aac3b7eba869280ff09c0c5604ce4e4421d3693f 100644 --- a/packages/kokkos/example/tutorial/06_simple_mdrangepolicy/simple_mdrangepolicy.cpp +++ b/packages/kokkos/example/tutorial/06_simple_mdrangepolicy/simple_mdrangepolicy.cpp @@ -61,13 +61,13 @@ // Simple functor for computing/storing the product of indices in a View v template <class ViewType> -struct MDFunctor { +struct MDFunctor2D { using value_type = long; ViewType v; size_t size; - MDFunctor(const ViewType& v_, const size_t size_) : v(v_), size(size_) {} + MDFunctor2D(const ViewType& v_, const size_t size_) : v(v_), size(size_) {} // 2D case - used by parallel_for KOKKOS_INLINE_FUNCTION @@ -75,12 +75,6 @@ struct MDFunctor { v(i, j) = i * j; // compute the product of indices } - // 3D case - used by parallel_for - KOKKOS_INLINE_FUNCTION - void operator()(const int i, const int j, const int k) const { - v(i, j, k) = i * j * k; // compute the product of indices - } - // 2D case - reduction KOKKOS_INLINE_FUNCTION void operator()(const int i, const int j, value_type& incorrect_count) const { @@ -88,6 +82,22 @@ struct MDFunctor { incorrect_count += 1; } } +}; + +template <class ViewType> +struct MDFunctor3D { + using value_type = long; + + ViewType v; + size_t size; + + MDFunctor3D(const ViewType& v_, const size_t size_) : v(v_), size(size_) {} + + // 3D case - used by parallel_for + KOKKOS_INLINE_FUNCTION + void operator()(const int i, const int j, const int k) const { + v(i, j, k) = i * j * k; // compute the product of indices + } // 3D case - reduction KOKKOS_INLINE_FUNCTION @@ -170,11 +180,12 @@ int main(int argc, char* argv[]) { ViewType_2D v2("v2", n, n); // Execute parallel_for with rank 2 MDRangePolicy - Kokkos::parallel_for("md2d", mdpolicy_2d, MDFunctor<ViewType_2D>(v2, n)); + Kokkos::parallel_for("md2d", mdpolicy_2d, MDFunctor2D<ViewType_2D>(v2, n)); // Check results with a parallel_reduce using the MDRangePolicy Kokkos::parallel_reduce("md2dredux", mdpolicy_2d, - MDFunctor<ViewType_2D>(v2, n), incorrect_count_2d); + MDFunctor2D<ViewType_2D>(v2, n), + incorrect_count_2d); printf("Rank 2 MDRangePolicy incorrect count: %ld\n", incorrect_count_2d); // should be 0 @@ -194,11 +205,12 @@ int main(int argc, char* argv[]) { ViewType_3D v3("v3", n, n, n); // Execute parallel_for with rank 3 MDRangePolicy - Kokkos::parallel_for("md3d", mdpolicy_3d, MDFunctor<ViewType_3D>(v3, n)); + Kokkos::parallel_for("md3d", mdpolicy_3d, MDFunctor3D<ViewType_3D>(v3, n)); // Check results with a parallel_reduce using the MDRangePolicy Kokkos::parallel_reduce("md3dredux", mdpolicy_3d, - MDFunctor<ViewType_3D>(v3, n), incorrect_count_3d); + MDFunctor3D<ViewType_3D>(v3, n), + incorrect_count_3d); printf("Rank 3 MDRangePolicy incorrect count: %ld\n", incorrect_count_3d); // should be 0 diff --git a/packages/kokkos/generate_makefile.bash b/packages/kokkos/generate_makefile.bash index f86147bb9fc1dd8508dfc1ee8a3226e50aab16a7..3b78301fca06e79a6433f57d8eddd2ce9c47ff5e 100755 --- a/packages/kokkos/generate_makefile.bash +++ b/packages/kokkos/generate_makefile.bash @@ -179,9 +179,18 @@ display_help_text() { echo " HSW = Intel Haswell CPUs" echo " BDW = Intel Broadwell Xeon E-class CPUs" echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " ICX = Intel Ice Lake CPUs (AVX512)" echo " [Intel Xeon Phi]" echo " KNC = Intel Knights Corner Xeon Phi" echo " KNL = Intel Knights Landing Xeon Phi" + echo " [Intel: GPU]" + echo " INTEL_GEN = SPIR64-based devices, e.g. Intel GPUs, using JIT" + echo " INTEL_DG1 = Intel Iris XeMAX GPU" + echo " INTEL_GEN9 = Intel GPU Gen9" + echo " INTEL_GEN11 = Intel GPU Gen11" + echo " INTEL_GEN12LP = Intel GPU Gen12LP" + echo " INTEL_XEHP = Intel GPU Xe-HP" + echo " INTEL_PVC = Intel GPU Ponte Vecchio" echo " [NVIDIA]" echo " Kepler30 = NVIDIA Kepler generation CC 3.0" echo " Kepler32 = NVIDIA Kepler generation CC 3.2" @@ -194,11 +203,15 @@ display_help_text() { echo " Pascal61 = NVIDIA Pascal generation CC 6.1" echo " Volta70 = NVIDIA Volta generation CC 7.0" echo " Volta72 = NVIDIA Volta generation CC 7.2" + echo " Ampere80 = NVIDIA Ampere generation CC 8.0" + echo " Ampere86 = NVIDIA Ampere generation CC 8.6" echo "" echo "--compiler=/Path/To/Compiler Set the compiler." echo "--debug,-dbg: Enable Debugging." echo "--boundscheck: Enable Kokkos_ENABLE_DEBUG_BOUNDS_CHECK to check View accesses within bounds." echo "--disable-tests Disable compilation of unit tests (enabled by default)" + echo "--deprecated-code Enable deprecated code (disabled by default)" + echo "--deprecated-code-warnings Enable deprecated code warnings (disabled by default)" echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" echo " build. This will still set certain required" echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," @@ -239,6 +252,9 @@ WITH_CUDA_BACKEND=OFF WITH_HIP_BACKEND=OFF WITH_OMPT_BACKEND=OFF +KOKKOS_DEPRECATED_CODE=OFF +KOKKOS_DEPRECATED_CODE_WARNINGS=OFF + while [[ $# > 0 ]] do key="$1" @@ -358,6 +374,12 @@ do --disable-tests) KOKKOS_DO_TESTS=OFF ;; + --deprecated-code) + KOKKOS_DEPRECATED_CODE=ON + ;; + --deprecated-code-warnings) + KOKKOS_DEPRECATED_CODE_WARNINGS=ON + ;; --no-examples) KOKKOS_DO_EXAMPLES=OFF ;; @@ -483,5 +505,5 @@ if [[ ${COMPILER} == *clang* ]]; then fi fi -echo cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=${KOKKOS_DO_TESTS} -DKokkos_ENABLE_EXAMPLES=${KOKKOS_DO_EXAMPLES} ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} ${KOKKOS_HIP_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_BC_CMD} ${KOKKOS_HWLOC_CMD} ${KOKKOS_HWLOC_PATH_CMD} ${KOKKOS_MEMKIND_CMD} ${KOKKOS_MEMKIND_PATH_CMD} -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF ${KOKKOS_PATH} -cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS//\"}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS//\"}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=${KOKKOS_DO_TESTS} -DKokkos_ENABLE_EXAMPLES=${KOKKOS_DO_EXAMPLES} ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} ${KOKKOS_HIP_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_BC_CMD} ${KOKKOS_HWLOC_CMD} ${KOKKOS_HWLOC_PATH_CMD} ${KOKKOS_MEMKIND_CMD} ${KOKKOS_MEMKIND_PATH_CMD} ${PASSTHRU_CMAKE_FLAGS} -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF ${KOKKOS_PATH} +echo cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=${KOKKOS_DO_TESTS} -DKokkos_ENABLE_EXAMPLES=${KOKKOS_DO_EXAMPLES} ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} ${KOKKOS_HIP_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_BC_CMD} ${KOKKOS_HWLOC_CMD} ${KOKKOS_HWLOC_PATH_CMD} ${KOKKOS_MEMKIND_CMD} ${KOKKOS_MEMKIND_PATH_CMD} -DKokkos_ENABLE_DEPRECATION_WARNINGS=${KOKKOS_DEPRECATED_CODE_WARNINGS} -DKokkos_ENABLE_DEPRECATED_CODE_3=${KOKKOS_DEPRECATED_CODE} ${KOKKOS_PATH} +cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS//\"}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS//\"}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=${KOKKOS_DO_TESTS} -DKokkos_ENABLE_EXAMPLES=${KOKKOS_DO_EXAMPLES} ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} ${KOKKOS_HIP_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_BC_CMD} ${KOKKOS_HWLOC_CMD} ${KOKKOS_HWLOC_PATH_CMD} ${KOKKOS_MEMKIND_CMD} ${KOKKOS_MEMKIND_PATH_CMD} ${PASSTHRU_CMAKE_FLAGS} -DKokkos_ENABLE_DEPRECATION_WARNINGS=${KOKKOS_DEPRECATED_CODE_WARNINGS} -DKokkos_ENABLE_DEPRECATED_CODE_3=${KOKKOS_DEPRECATED_CODE} ${KOKKOS_PATH} diff --git a/packages/kokkos/gnu_generate_makefile.bash b/packages/kokkos/gnu_generate_makefile.bash index 15a095854e9c8ad9712e9b368a911f81c1bb9163..aab95e12e34c71ba2777f63ee30213fb9b62bab9 100755 --- a/packages/kokkos/gnu_generate_makefile.bash +++ b/packages/kokkos/gnu_generate_makefile.bash @@ -158,9 +158,18 @@ do echo " HSW = Intel Haswell CPUs" echo " BDW = Intel Broadwell Xeon E-class CPUs" echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " ICX = Intel Ice Lake CPUs (AVX512)" echo " [Intel Xeon Phi]" echo " KNC = Intel Knights Corner Xeon Phi" echo " KNL = Intel Knights Landing Xeon Phi" + echo " [Intel: GPU]" + echo " INTEL_GEN = SPIR64-based devices, e.g. Intel GPUs, using JIT" + echo " INTEL_DG1 = Intel Iris XeMAX GPU" + echo " INTEL_GEN9 = Intel GPU Gen9" + echo " INTEL_GEN11 = Intel GPU Gen11" + echo " INTEL_GEN12LP = Intel GPU Gen12LP" + echo " INTEL_XEHP = Intel GPU Xe-HP" + echo " INTEL_PVC = Intel GPU Ponte Vecchio" echo " [NVIDIA]" echo " Kepler30 = NVIDIA Kepler generation CC 3.0" echo " Kepler32 = NVIDIA Kepler generation CC 3.2" diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index 41c755a8a84be166c3e82b892b8b26b4d5df2bf7..a1a87ce3199d10449b92be4a8e09ecaa790a303f 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -28,3 +28,4 @@ tag: 3.4.01 date: 05:20:2021 master: 4b97a22f release: 410b15c8 tag: 3.5.00 date: 11:19:2021 master: c28a8b03 release: 21b879e4 tag: 3.6.00 date: 04:14:2022 master: 2834f94a release: 6ea708ff tag: 3.6.01 date: 06:16:2022 master: b52f8c83 release: afe9b404 +tag: 3.7.00 date: 08:25:2022 master: d19aab99 release: 0018e5fb diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvhpc b/packages/kokkos/scripts/docker/Dockerfile.nvhpc index 3e3a32e4dd017a5be6e95ce701cfa6082d5d6b0e..c0b8cc19d155eb8d9eab1cf5cfaa8cfaf61f664c 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.nvhpc +++ b/packages/kokkos/scripts/docker/Dockerfile.nvhpc @@ -1,4 +1,4 @@ -ARG BASE=nvcr.io/nvidia/nvhpc:21.9-devel-cuda11.4-ubuntu20.04 +ARG BASE=nvcr.io/nvidia/nvhpc:22.3-devel-cuda11.6-ubuntu20.04 FROM $BASE RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ @@ -9,7 +9,7 @@ RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ gpg --verify ${KEYDUMP_FILE}.sig ${KEYDUMP_FILE} && \ rm ${KEYDUMP_FILE}* -ARG CMAKE_VERSION=3.21.4 +ARG CMAKE_VERSION=3.23.1 ENV CMAKE_DIR=/opt/cmake RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \ CMAKE_SCRIPT=cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.openmptarget b/packages/kokkos/scripts/docker/Dockerfile.openmptarget index caeee1821a00866662cce2b6efa30b5b9d1c82a6..e10c3f2208f2bb9368f6f33a82da4b43ef61fa30 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.openmptarget +++ b/packages/kokkos/scripts/docker/Dockerfile.openmptarget @@ -38,7 +38,7 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO rm ${CMAKE_SCRIPT} ENV PATH=${CMAKE_DIR}/bin:$PATH -ARG LLVM_VERSION=llvmorg-13.0.1-rc3 +ARG LLVM_VERSION=llvmorg-14.0.0 ENV LLVM_DIR=/opt/llvm RUN LLVM_URL=https://github.com/llvm/llvm-project/archive &&\ LLVM_ARCHIVE=${LLVM_VERSION}.tar.gz &&\ diff --git a/packages/kokkos/scripts/docker/Dockerfile.sycl b/packages/kokkos/scripts/docker/Dockerfile.sycl index 1cd700648a23341eae53068e770eb144a784aad0..0970d2ac5727ce144badf496745c980639185bc4 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.sycl +++ b/packages/kokkos/scripts/docker/Dockerfile.sycl @@ -38,8 +38,8 @@ RUN CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSIO ENV PATH=${CMAKE_DIR}/bin:$PATH ENV SYCL_DIR=/opt/sycl -RUN SYCL_VERSION=2021-09 && \ - SYCL_URL=https://github.com/intel/llvm/archive/ && \ +RUN SYCL_VERSION=20220112 && \ + SYCL_URL=https://github.com/intel/llvm/archive/sycl-nightly && \ SYCL_ARCHIVE=${SYCL_VERSION}.tar.gz && \ SCRATCH_DIR=/scratch && mkdir -p ${SCRATCH_DIR} && cd ${SCRATCH_DIR} && \ wget --quiet ${SYCL_URL}/${SYCL_ARCHIVE} && \ diff --git a/packages/kokkos/scripts/testing_scripts/test_all_sandia b/packages/kokkos/scripts/testing_scripts/test_all_sandia index b2c5afe23793c16c5e40c781d2422d358252eded..72ee31707ecafed7e6e6bad506b2e16c4ff95ffc 100755 --- a/packages/kokkos/scripts/testing_scripts/test_all_sandia +++ b/packages/kokkos/scripts/testing_scripts/test_all_sandia @@ -13,6 +13,8 @@ print_help() { echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" echo " Defaults to root repo containing this script" echo "--debug: Run tests in debug. Defaults to False" + echo "--deprecated-code: Enable deprecated code (disabled by default)" + echo "--deprecated-code-warnings Enable deprecated code warnings (disabled by default)" echo "--boundscheck: Enable Kokkos_ENABLE_DEBUG_BOUNDS_CHECK to check View accesses within bounds." echo "--test-script: Test this script, not Kokkos" echo "--skip-hwloc: Do not do hwloc tests" @@ -167,6 +169,9 @@ CXX_STANDARD="14" CTESTTIMEOUT=2000 +KOKKOS_DEPRECATED_CODE="" +KOKKOS_DEPRECATED_CODE_WARNINGS="" + # # Handle arguments. # @@ -188,6 +193,12 @@ do --boundscheck*) KOKKOS_BOUNDS_CHECK="--boundscheck" ;; + --deprecated-code) + KOKKOS_DEPRECATED_CODE="--deprecated-code" + ;; + --deprecated-code-warnings) + KOKKOS_DEPRECATED_CODE_WARNINGS="--deprecated-code-warnings" + ;; --build-only*) BUILD_ONLY=True ;; @@ -441,6 +452,8 @@ elif [ "$MACHINE" = "weaver" ]; then IBM_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/xl/<COMPILER_VERSION>,gcc/7.2.0" CUDA_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>,ibm/xl/16.1.1,gcc/7.2.0" CUDA10_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>,ibm/xl/16.1.1,gcc/7.4.0" + # Cuda/11 modules available only on the dev queue (rhel8 OS); gcc/8.3.1 loaded by default + CUDA11_MODULE_LIST="cmake/3.21.2,<COMPILER_NAME>/<COMPILER_VERSION>" # Don't do pthread with Power GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" @@ -465,6 +478,8 @@ elif [ "$MACHINE" = "weaver" ]; then "cuda/10.1.105 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/10.1.243 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/10.2.089 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/10.2.2 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/11.2.2 $CUDA11_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi @@ -507,6 +522,8 @@ elif [ "$MACHINE" = "caraway" ]; then SKIP_HWLOC=True BASE_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>" + # Cuda11 usage available on the V100 queue + CUDA11_MODULE_LIST="cmake/3.22.2,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/8.2.0" HIPCLANG_BUILD_LIST="Hip_Serial,Hip_OpenMP" HIPCLANG_WARNING_FLAGS="-Werror -Wno-unused-command-line-argument -DNDEBUG" @@ -514,6 +531,12 @@ elif [ "$MACHINE" = "caraway" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("rocm/4.3.0 $BASE_MODULE_LIST $HIPCLANG_BUILD_LIST hipcc $HIPCLANG_WARNING_FLAGS" "rocm/4.5.0 $BASE_MODULE_LIST $HIPCLANG_BUILD_LIST hipcc $HIPCLANG_WARNING_FLAGS" + "cuda/11.4 $CUDA11_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/8.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/9.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/10.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/11.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" ) if [ -z "$ARCH_FLAG" ]; then @@ -636,6 +659,7 @@ fi export OMP_NUM_THREADS=8 export OMP_PROC_BIND=spread export OMP_PLACES=cores +export OMP_MAX_ACTIVE_LEVELS=1 declare -i NUM_RESULTS_TO_KEEP=7 @@ -869,12 +893,12 @@ single_build_and_test() { # KOKKOS_OPTIONS and KOKKOS_CUDA_OPTIONS are exported and detected by kokkos' generate_makefile.sh during install of kokkos; we pass them to the reproducer script instructions echo " # Use generate_makefile line below to call cmake which generates makefile for this build:" &> call_generate_makefile.sh - echo " ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} --no-examples ${KOKKOS_BOUNDS_CHECK} $extra_args" &>> call_generate_makefile.sh + echo " ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} --no-examples ${KOKKOS_BOUNDS_CHECK} ${KOKKOS_DEPRECATED_CODE} ${KOKKOS_DEPRECATED_CODE_WARNINGS} $extra_args" &>> call_generate_makefile.sh # store script command with generic path for faster copy/paste of reproducer into issues - echo " \$KOKKOS_PATH/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=\$KOKKOS_PATH --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} ${KOKKOS_BOUNDS_CHECK} --no-examples $extra_args" &> call_generate_makefile_genericpath.sh + echo " \$KOKKOS_PATH/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=\$KOKKOS_PATH --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} ${KOKKOS_BOUNDS_CHECK} --no-examples ${KOKKOS_DEPRECATED_CODE} ${KOKKOS_DEPRECATED_CODE_WARNINGS} $extra_args" &> call_generate_makefile_genericpath.sh - run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} ${KOKKOS_BOUNDS_CHECK} --no-examples $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} ${KOKKOS_BOUNDS_CHECK} --no-examples ${KOKKOS_DEPRECATED_CODE} ${KOKKOS_DEPRECATED_CODE_WARNINGS} $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } local make_par_lvl=12 if [[ "$MACHINE" = white* ]]; then make_par_lvl=48 diff --git a/packages/kokkos/simd/CMakeLists.txt b/packages/kokkos/simd/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..83557e61e627f1d8d8a4a7f99c4e8709d924cb27 --- /dev/null +++ b/packages/kokkos/simd/CMakeLists.txt @@ -0,0 +1,10 @@ + +KOKKOS_SUBPACKAGE(Simd) + +IF (NOT Kokkos_INSTALL_TESTING) + ADD_SUBDIRECTORY(src) +ENDIF() + +KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) + +KOKKOS_SUBPACKAGE_POSTPROCESS() diff --git a/packages/kokkos/simd/cmake/Dependencies.cmake b/packages/kokkos/simd/cmake/Dependencies.cmake new file mode 100644 index 0000000000000000000000000000000000000000..5e29157369c9ab8cab935a1bfc4c6dad2fdd0296 --- /dev/null +++ b/packages/kokkos/simd/cmake/Dependencies.cmake @@ -0,0 +1,5 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_REQUIRED_PACKAGES KokkosCore + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC HPX + TEST_OPTIONAL_TPLS CUSPARSE + ) diff --git a/packages/kokkos/simd/src/CMakeLists.txt b/packages/kokkos/simd/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8779112bc3c88dcc6752b215c781d356aaa8fe40 --- /dev/null +++ b/packages/kokkos/simd/src/CMakeLists.txt @@ -0,0 +1,29 @@ +#I have to leave these here for tribits +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +FILE(GLOB SIMD_HEADERS *.hpp) +FILE(GLOB SIMD_SOURCES *.cpp) + +INSTALL ( + DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" +) + +#----------------------------------------------------------------------------- + +# We have to pass the sources in here for Tribits +# These will get ignored for standalone CMake and a true interface library made +KOKKOS_ADD_LIBRARY( + kokkossimd + SOURCES ${SIMD_SOURCES} + HEADERS ${SIMD_HEADERS} +) +KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkossimd + ${KOKKOS_TOP_BUILD_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) diff --git a/packages/kokkos/simd/src/Kokkos_SIMD.hpp b/packages/kokkos/simd/src/Kokkos_SIMD.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a15a1b8ebf41ae242b6ac0df86dd8cecdae2365c --- /dev/null +++ b/packages/kokkos/simd/src/Kokkos_SIMD.hpp @@ -0,0 +1,161 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SIMD_HPP +#define KOKKOS_SIMD_HPP + +#include <Kokkos_SIMD_Common.hpp> + +#include <Kokkos_SIMD_Scalar.hpp> + +#ifdef KOKKOS_ARCH_AVX512XEON +#include <Kokkos_SIMD_AVX512.hpp> +#endif + +namespace Kokkos { +namespace Experimental { + +namespace simd_abi { + +namespace Impl { + +#if defined(KOKKOS_ARCH_AVX512XEON) +using host_native = avx512_fixed_size<8>; +#else +using host_native = scalar; +#endif + +template <class T> +struct ForSpace; + +#ifdef KOKKOS_ENABLE_SERIAL +template <> +struct ForSpace<Kokkos::Serial> { + using type = host_native; +}; +#endif + +#ifdef KOKKOS_ENABLE_CUDA +template <> +struct ForSpace<Kokkos::Cuda> { + using type = scalar; +}; +#endif + +#ifdef KOKKOS_ENABLE_THREADS +template <> +struct ForSpace<Kokkos::Threads> { + using type = host_native; +}; +#endif + +#ifdef KOKKOS_ENABLE_HPX +template <> +struct ForSpace<Kokkos::Experimental::HPX> { + using type = scalar; +}; +#endif + +#ifdef KOKKOS_ENABLE_OPENMP +template <> +struct ForSpace<Kokkos::OpenMP> { + using type = host_native; +}; +#endif + +#ifdef KOKKOS_ENABLE_OPENMPTARGET +template <> +struct ForSpace<Kokkos::Experimental::OpenMPTarget> { + using type = scalar; +}; +#endif + +#ifdef KOKKOS_ENABLE_HIP +template <> +struct ForSpace<Kokkos::Experimental::HIP> { + using type = scalar; +}; +#endif + +#ifdef KOKKOS_ENABLE_SYCL +template <> +struct ForSpace<Kokkos::Experimental::SYCL> { + using type = scalar; +}; +#endif + +} // namespace Impl + +template <class Space> +using ForSpace = typename Impl::ForSpace<typename Space::execution_space>::type; + +template <class T> +using native = ForSpace<Kokkos::DefaultExecutionSpace>; + +} // namespace simd_abi + +template <class T> +using native_simd = simd<T, simd_abi::native<T>>; +template <class T> +using native_simd_mask = simd_mask<T, simd_abi::native<T>>; + +namespace Impl { + +template <class... Abis> +class abi_set {}; + +#ifdef KOKKOS_ARCH_AVX512XEON +using host_abi_set = abi_set<simd_abi::scalar, simd_abi::avx512_fixed_size<8>>; +#else +using host_abi_set = abi_set<simd_abi::scalar>; +#endif + +using device_abi_set = abi_set<simd_abi::scalar>; + +} // namespace Impl + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp b/packages/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1df0730ac48aa1fa51109942f4c210d84e426f4f --- /dev/null +++ b/packages/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp @@ -0,0 +1,1023 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SIMD_AVX512_HPP +#define KOKKOS_SIMD_AVX512_HPP + +#include <functional> +#include <type_traits> + +#include <Kokkos_SIMD_Common.hpp> + +#include <immintrin.h> + +namespace Kokkos { +namespace Experimental { + +namespace simd_abi { + +template <int N> +class avx512_fixed_size {}; + +} // namespace simd_abi + +template <class T> +class simd_mask<T, simd_abi::avx512_fixed_size<8>> { + __mmask8 m_value; + + public: + class reference { + __mmask8& m_mask; + int m_lane; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION __mmask8 bit_mask() const { + return __mmask8(std::int16_t(1 << m_lane)); + } + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(__mmask8& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + if (value) { + m_mask |= bit_mask(); + } else { + m_mask &= ~bit_mask(); + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + return (m_mask & bit_mask()) != 0; + } + }; + using value_type = bool; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) + : m_value(-std::int16_t(value)) {} + template <class U> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask( + simd_mask<U, simd_abi::avx512_fixed_size<8>> const& other) + : m_value(static_cast<__mmask8>(other)) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + __mmask8 const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __mmask8() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return static_cast<value_type>(reference(m_value, int(i))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator||(simd_mask const& other) const { + return simd_mask(_kor_mask8(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator&&(simd_mask const& other) const { + return simd_mask(_kand_mask8(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask operator!() const { + static const __mmask8 true_value(static_cast<__mmask8>(simd_mask(true))); + return simd_mask(_kxor_mask8(true_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + simd_mask const& other) const { + return m_value == other.m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + simd_mask const& other) const { + return m_value != other.m_value; + } +}; + +template <> +class simd<std::int32_t, simd_abi::avx512_fixed_size<8>> { + __m256i m_value; + + public: + using value_type = std::int32_t; + using abi_type = simd_abi::avx512_fixed_size<8>; + using mask_type = simd_mask<value_type, abi_type>; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template <class U, std::enable_if_t<std::is_convertible_v<U, value_type>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm256_set1_epi32(value_type(value))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m256i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd<std::uint64_t, abi_type> const& other); + template <class G, + std::enable_if_t< + // basically, can you do { value_type r = + // gen(std::integral_constant<std::size_t, i>()); } + std::is_invocable_r_v<value_type, G, + std::integral_constant<std::size_t, 0>>, + bool> = false> + KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) + : m_value( + _mm256_setr_epi32(gen(std::integral_constant<std::size_t, 0>()), + gen(std::integral_constant<std::size_t, 1>()), + gen(std::integral_constant<std::size_t, 2>()), + gen(std::integral_constant<std::size_t, 3>()), + gen(std::integral_constant<std::size_t, 4>()), + gen(std::integral_constant<std::size_t, 5>()), + gen(std::integral_constant<std::size_t, 6>()), + gen(std::integral_constant<std::size_t, 7>()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast<value_type*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast<value_type const*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm256_mask_storeu_epi32(ptr, static_cast<__mmask8>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm256_mask_loadu_epi32( + _mm256_set1_epi32(0), static_cast<__mmask8>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<(simd const& other) const { + return mask_type(_mm256_cmplt_epi32_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>(simd const& other) const { + return mask_type(_mm256_cmplt_epi32_mask(other.m_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<=(simd const& other) const { + return mask_type(_mm256_cmple_epi32_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>=(simd const& other) const { + return mask_type(_mm256_cmple_epi32_mask(other.m_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator==(simd const& other) const { + return mask_type(_mm256_cmpeq_epi32_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator!=(simd const& other) const { + return mask_type(_mm256_cmpneq_epi32_mask(m_value, other.m_value)); + } +}; + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> + operator*(simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::int32_t, simd_abi::avx512_fixed_size<8>>( + _mm256_mullo_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> + operator+(simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::int32_t, simd_abi::avx512_fixed_size<8>>( + _mm256_add_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> + operator-(simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::int32_t, simd_abi::avx512_fixed_size<8>>( + _mm256_sub_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> + operator-(simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& a) { + return simd<std::int32_t, simd_abi::avx512_fixed_size<8>>(0) - a; +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<std::int32_t, simd_abi::avx512_fixed_size<8>> condition( + simd_mask<std::int32_t, simd_abi::avx512_fixed_size<8>> const& a, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& b, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& c) { + return simd<std::int32_t, simd_abi::avx512_fixed_size<8>>( + _mm256_mask_blend_epi32(static_cast<__mmask8>(a), static_cast<__m256i>(c), + static_cast<__m256i>(b))); +} + +template <> +class simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> { + __m256i m_value; + + public: + using value_type = std::uint32_t; + using abi_type = simd_abi::avx512_fixed_size<8>; + using mask_type = simd_mask<value_type, abi_type>; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template <class U, std::enable_if_t<std::is_convertible_v<U, value_type>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm256_set1_epi32(bit_cast<std::int32_t>(value_type(value)))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m256i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& other) + : m_value(static_cast<__m256i>(other)) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast<value_type*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast<value_type const*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<(simd const& other) const { + return mask_type(_mm256_cmplt_epu32_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>(simd const& other) const { + return mask_type(_mm256_cmplt_epu32_mask(other.m_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<=(simd const& other) const { + return mask_type(_mm256_cmple_epu32_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>=(simd const& other) const { + return mask_type(_mm256_cmple_epu32_mask(other.m_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator==(simd const& other) const { + return mask_type(_mm256_cmpeq_epu32_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator!=(simd const& other) const { + return mask_type(_mm256_cmpneq_epu32_mask(m_value, other.m_value)); + } +}; + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> + operator*(simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::uint32_t, simd_abi::avx512_fixed_size<8>>( + _mm256_mullo_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> + operator+(simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::uint32_t, simd_abi::avx512_fixed_size<8>>( + _mm256_add_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> + operator-(simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::uint32_t, simd_abi::avx512_fixed_size<8>>( + _mm256_sub_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> condition( + simd_mask<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& a, + simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& b, + simd<std::uint32_t, simd_abi::avx512_fixed_size<8>> const& c) { + return simd<std::uint32_t, simd_abi::avx512_fixed_size<8>>( + _mm256_mask_blend_epi32(static_cast<__mmask8>(a), static_cast<__m256i>(c), + static_cast<__m256i>(b))); +} + +template <> +class simd<std::int64_t, simd_abi::avx512_fixed_size<8>> { + __m512i m_value; + + public: + using value_type = std::int64_t; + using abi_type = simd_abi::avx512_fixed_size<8>; + using mask_type = simd_mask<value_type, abi_type>; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template <class U, std::enable_if_t<std::is_convertible_v<U, value_type>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm512_set1_epi64(value_type(value))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& other) + : m_value(_mm512_cvtepi32_epi64(static_cast<__m256i>(other))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& other); + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr simd(__m512i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast<value_type*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast<value_type const*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm512_mask_storeu_epi64(ptr, static_cast<__mmask8>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator>>(int rhs) const { + return _mm512_srai_epi64(m_value, rhs); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd + operator>>(simd<int, simd_abi::avx512_fixed_size<8>> const& rhs) const { + return _mm512_srav_epi64(m_value, + _mm512_cvtepi32_epi64(static_cast<__m256i>(rhs))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator<<(int rhs) const { + return _mm512_slli_epi64(m_value, rhs); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd + operator<<(simd<int, simd_abi::avx512_fixed_size<8>> const& rhs) const { + return _mm512_sllv_epi64(m_value, + _mm512_cvtepi32_epi64(static_cast<__m256i>(rhs))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512i() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<(simd const& other) const { + return mask_type(_mm512_cmplt_epi64_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>(simd const& other) const { + return mask_type(_mm512_cmplt_epi64_mask(other.m_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<=(simd const& other) const { + return mask_type(_mm512_cmple_epi64_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>=(simd const& other) const { + return mask_type(_mm512_cmple_epi64_mask(other.m_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator==(simd const& other) const { + return mask_type(_mm512_cmpeq_epi64_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator!=(simd const& other) const { + return mask_type(_mm512_cmpneq_epi64_mask(m_value, other.m_value)); + } +}; + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> + operator*(simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::int64_t, simd_abi::avx512_fixed_size<8>>( + _mm512_mullo_epi64(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> + operator+(simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::int64_t, simd_abi::avx512_fixed_size<8>>( + _mm512_add_epi64(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> + operator-(simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::int64_t, simd_abi::avx512_fixed_size<8>>( + _mm512_sub_epi64(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> + operator-(simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& a) { + return simd<std::int64_t, simd_abi::avx512_fixed_size<8>>(0) - a; +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<std::int64_t, simd_abi::avx512_fixed_size<8>> condition( + simd_mask<std::int64_t, simd_abi::avx512_fixed_size<8>> const& a, + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& b, + simd<std::int64_t, simd_abi::avx512_fixed_size<8>> const& c) { + return simd<std::int64_t, simd_abi::avx512_fixed_size<8>>( + _mm512_mask_blend_epi64(static_cast<__mmask8>(a), static_cast<__m512i>(c), + static_cast<__m512i>(b))); +} + +template <> +class simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> { + __m512i m_value; + + public: + using value_type = std::uint64_t; + using abi_type = simd_abi::avx512_fixed_size<8>; + using mask_type = simd_mask<value_type, abi_type>; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template <class U, std::enable_if_t<std::is_convertible_v<U, value_type>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm512_set1_epi64(bit_cast<std::int64_t>(value_type(value)))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr simd(__m512i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd<std::int32_t, abi_type> const& other) + : m_value(_mm512_cvtepi32_epi64(static_cast<__m256i>(other))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd<std::int64_t, abi_type> const& other) + : m_value(static_cast<__m512i>(other)) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast<value_type*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast<value_type const*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd + operator>>(unsigned int rhs) const { + return _mm512_srli_epi64(m_value, rhs); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator>>( + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& rhs) const { + return _mm512_srlv_epi64(m_value, + _mm512_cvtepi32_epi64(static_cast<__m256i>(rhs))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd + operator<<(unsigned int rhs) const { + return _mm512_slli_epi64(m_value, rhs); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator<<( + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& rhs) const { + return _mm512_sllv_epi64(m_value, + _mm512_cvtepi32_epi64(static_cast<__m256i>(rhs))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd + operator&(simd const& other) const { + return _mm512_and_epi64(m_value, other.m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd + operator|(simd const& other) const { + return _mm512_or_epi64(m_value, other.m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512i() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<(simd const& other) const { + return mask_type(_mm512_cmplt_epu64_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>(simd const& other) const { + return mask_type(_mm512_cmplt_epu64_mask(other.m_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<=(simd const& other) const { + return mask_type(_mm512_cmple_epu64_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>=(simd const& other) const { + return mask_type(_mm512_cmple_epu64_mask(other.m_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator==(simd const& other) const { + return mask_type(_mm512_cmpeq_epu64_mask(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator!=(simd const& other) const { + return mask_type(_mm512_cmpneq_epu64_mask(m_value, other.m_value)); + } +}; + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> + operator*(simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::uint64_t, simd_abi::avx512_fixed_size<8>>( + _mm512_mullo_epi64(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> + operator+(simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::uint64_t, simd_abi::avx512_fixed_size<8>>( + _mm512_add_epi64(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> + operator-(simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<std::uint64_t, simd_abi::avx512_fixed_size<8>>( + _mm512_sub_epi64(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> condition( + simd_mask<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& a, + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& b, + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& c) { + return simd<std::uint64_t, simd_abi::avx512_fixed_size<8>>( + _mm512_mask_blend_epi64(static_cast<__mmask8>(a), static_cast<__m512i>(c), + static_cast<__m512i>(b))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<std::int32_t, simd_abi::avx512_fixed_size<8>>::simd( + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& other) + : m_value(_mm512_cvtepi64_epi32(static_cast<__m512i>(other))) {} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<std::int64_t, simd_abi::avx512_fixed_size<8>>::simd( + simd<std::uint64_t, simd_abi::avx512_fixed_size<8>> const& other) + : m_value(static_cast<__m512i>(other)) {} + +template <> +class simd<double, simd_abi::avx512_fixed_size<8>> { + __m512d m_value; + + public: + using value_type = double; + using abi_type = simd_abi::avx512_fixed_size<8>; + using mask_type = simd_mask<value_type, abi_type>; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template <class U, std::enable_if_t<std::is_convertible_v<U, value_type>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm512_set1_pd(value_type(value))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(double a, double b, double c, + double d, double e, double f, + double g, double h) + : m_value(_mm512_setr_pd(a, b, c, d, e, f, g, h)) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m512d const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast<value_type*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast<value_type const*>(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm512_loadu_pd(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm512_storeu_pd(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512d() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<(simd const& other) const { + return mask_type(_mm512_cmp_pd_mask(m_value, other.m_value, _CMP_LT_OS)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>(simd const& other) const { + return mask_type(_mm512_cmp_pd_mask(m_value, other.m_value, _CMP_GT_OS)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator<=(simd const& other) const { + return mask_type(_mm512_cmp_pd_mask(m_value, other.m_value, _CMP_LE_OS)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator>=(simd const& other) const { + return mask_type(_mm512_cmp_pd_mask(m_value, other.m_value, _CMP_GE_OS)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator==(simd const& other) const { + return mask_type(_mm512_cmp_pd_mask(m_value, other.m_value, _CMP_EQ_OS)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type + operator!=(simd const& other) const { + return mask_type(_mm512_cmp_pd_mask(m_value, other.m_value, _CMP_NEQ_OS)); + } +}; + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<double, simd_abi::avx512_fixed_size<8>> + operator*(simd<double, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<double, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_mul_pd(static_cast<__m512d>(lhs), static_cast<__m512d>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<double, simd_abi::avx512_fixed_size<8>> + operator/(simd<double, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<double, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_div_pd(static_cast<__m512d>(lhs), static_cast<__m512d>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<double, simd_abi::avx512_fixed_size<8>> + operator+(simd<double, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<double, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_add_pd(static_cast<__m512d>(lhs), static_cast<__m512d>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<double, simd_abi::avx512_fixed_size<8>> + operator-(simd<double, simd_abi::avx512_fixed_size<8>> const& lhs, + simd<double, simd_abi::avx512_fixed_size<8>> const& rhs) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_sub_pd(static_cast<__m512d>(lhs), static_cast<__m512d>(rhs))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd<double, simd_abi::avx512_fixed_size<8>> + operator-(simd<double, simd_abi::avx512_fixed_size<8>> const& a) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_sub_pd(_mm512_set1_pd(0.0), static_cast<__m512d>(a))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> copysign( + simd<double, simd_abi::avx512_fixed_size<8>> const& a, + simd<double, simd_abi::avx512_fixed_size<8>> const& b) { + static const __m512i sign_mask = reinterpret_cast<__m512i>( + static_cast<__m512d>(simd<double, simd_abi::avx512_fixed_size<8>>(-0.0))); + return simd<double, simd_abi::avx512_fixed_size<8>>( + reinterpret_cast<__m512d>(_mm512_xor_epi64( + _mm512_andnot_epi64( + sign_mask, reinterpret_cast<__m512i>(static_cast<__m512d>(a))), + _mm512_and_epi64( + sign_mask, reinterpret_cast<__m512i>(static_cast<__m512d>(b)))))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> abs( + simd<double, simd_abi::avx512_fixed_size<8>> const& a) { + __m512d const rhs = static_cast<__m512d>(a); + return simd<double, simd_abi::avx512_fixed_size<8>>(reinterpret_cast<__m512d>( + _mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF), + reinterpret_cast<__m512i>(rhs)))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> sqrt( + simd<double, simd_abi::avx512_fixed_size<8>> const& a) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_sqrt_pd(static_cast<__m512d>(a))); +} + +#ifdef __INTEL_COMPILER + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> cbrt( + simd<double, simd_abi::avx512_fixed_size<8>> const& a) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_cbrt_pd(static_cast<__m512d>(a))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> exp( + simd<double, simd_abi::avx512_fixed_size<8>> const& a) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_exp_pd(static_cast<__m512d>(a))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> log( + simd<double, simd_abi::avx512_fixed_size<8>> const& a) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_log_pd(static_cast<__m512d>(a))); +} + +#endif + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> fma( + simd<double, simd_abi::avx512_fixed_size<8>> const& a, + simd<double, simd_abi::avx512_fixed_size<8>> const& b, + simd<double, simd_abi::avx512_fixed_size<8>> const& c) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_fmadd_pd(static_cast<__m512d>(a), static_cast<__m512d>(b), + static_cast<__m512d>(c))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> max( + simd<double, simd_abi::avx512_fixed_size<8>> const& a, + simd<double, simd_abi::avx512_fixed_size<8>> const& b) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_max_pd(static_cast<__m512d>(a), static_cast<__m512d>(b))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> min( + simd<double, simd_abi::avx512_fixed_size<8>> const& a, + simd<double, simd_abi::avx512_fixed_size<8>> const& b) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_min_pd(static_cast<__m512d>(a), static_cast<__m512d>(b))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd<double, simd_abi::avx512_fixed_size<8>> condition( + simd_mask<double, simd_abi::avx512_fixed_size<8>> const& a, + simd<double, simd_abi::avx512_fixed_size<8>> const& b, + simd<double, simd_abi::avx512_fixed_size<8>> const& c) { + return simd<double, simd_abi::avx512_fixed_size<8>>( + _mm512_mask_blend_pd(static_cast<__mmask8>(a), static_cast<__m512d>(c), + static_cast<__m512d>(b))); +} + +template <> +class const_where_expression<simd_mask<double, simd_abi::avx512_fixed_size<8>>, + simd<double, simd_abi::avx512_fixed_size<8>>> { + public: + using abi_type = simd_abi::avx512_fixed_size<8>; + using value_type = simd<double, abi_type>; + using mask_type = simd_mask<double, abi_type>; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast<value_type&>(value_arg)), m_mask(mask_arg) {} + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr mask_type const& + mask() const { + return m_mask; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr value_type const& + value() const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(double* mem, element_aligned_tag) const { + _mm512_mask_storeu_pd(mem, static_cast<__mmask8>(m_mask), + static_cast<__m512d>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + double* mem, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& index) const { + _mm512_mask_i32scatter_pd(mem, static_cast<__mmask8>(m_mask), + static_cast<__m256i>(index), + static_cast<__m512d>(m_value), 8); + } +}; + +template <> +class where_expression<simd_mask<double, simd_abi::avx512_fixed_size<8>>, + simd<double, simd_abi::avx512_fixed_size<8>>> + : public const_where_expression< + simd_mask<double, simd_abi::avx512_fixed_size<8>>, + simd<double, simd_abi::avx512_fixed_size<8>>> { + public: + where_expression( + simd_mask<double, simd_abi::avx512_fixed_size<8>> const& mask_arg, + simd<double, simd_abi::avx512_fixed_size<8>>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(double const* mem, element_aligned_tag) { + m_value = value_type(_mm512_mask_loadu_pd( + _mm512_set1_pd(0.0), static_cast<__mmask8>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + double const* mem, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>> const& index) { + m_value = value_type(_mm512_mask_i32gather_pd( + _mm512_set1_pd(0.0), static_cast<__mmask8>(m_mask), + static_cast<__m256i>(index), mem, 8)); + } + template <class U, std::enable_if_t< + std::is_convertible_v< + U, simd<double, simd_abi::avx512_fixed_size<8>>>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast<simd<double, simd_abi::avx512_fixed_size<8>>>( + std::forward<U>(x)); + m_value = simd<double, simd_abi::avx512_fixed_size<8>>(_mm512_mask_blend_pd( + static_cast<__mmask8>(m_mask), static_cast<__m512d>(m_value), + static_cast<__m512d>(x_as_value_type))); + } +}; + +template <> +class const_where_expression< + simd_mask<std::int32_t, simd_abi::avx512_fixed_size<8>>, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>>> { + public: + using abi_type = simd_abi::avx512_fixed_size<8>; + using value_type = simd<std::int32_t, abi_type>; + using mask_type = simd_mask<std::int32_t, abi_type>; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast<value_type&>(value_arg)), m_mask(mask_arg) {} + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr mask_type const& + mask() const { + return m_mask; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr value_type const& + value() const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, element_aligned_tag) const { + _mm256_mask_storeu_epi32(mem, static_cast<__mmask8>(m_mask), + static_cast<__m256i>(m_value)); + } +}; + +template <> +class where_expression<simd_mask<std::int32_t, simd_abi::avx512_fixed_size<8>>, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>>> + : public const_where_expression< + simd_mask<std::int32_t, simd_abi::avx512_fixed_size<8>>, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>>> { + public: + where_expression( + simd_mask<std::int32_t, simd_abi::avx512_fixed_size<8>> const& mask_arg, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, element_aligned_tag) { + m_value = value_type(_mm256_mask_loadu_epi32( + _mm256_set1_epi32(0), static_cast<__mmask8>(m_mask), mem)); + } +}; + +template <> +class const_where_expression< + simd_mask<std::int64_t, simd_abi::avx512_fixed_size<8>>, + simd<std::int64_t, simd_abi::avx512_fixed_size<8>>> { + public: + using abi_type = simd_abi::avx512_fixed_size<8>; + using value_type = simd<std::int64_t, abi_type>; + using mask_type = simd_mask<std::int64_t, abi_type>; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast<value_type&>(value_arg)), m_mask(mask_arg) {} + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr mask_type const& + mask() const { + return m_mask; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr value_type const& + value() const { + return m_value; + } +}; + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int32_t hmax( + const_where_expression< + simd_mask<std::int32_t, simd_abi::avx512_fixed_size<8>>, + simd<std::int32_t, simd_abi::avx512_fixed_size<8>>> const& x) { + return _mm512_mask_reduce_max_epi32( + static_cast<__mmask8>(x.mask()), + _mm512_castsi256_si512(static_cast<__m256i>(x.value()))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION double hmin( + const_where_expression<simd_mask<double, simd_abi::avx512_fixed_size<8>>, + simd<double, simd_abi::avx512_fixed_size<8>>> const& + x) { + return _mm512_mask_reduce_min_pd(static_cast<__mmask8>(x.mask()), + static_cast<__m512d>(x.value())); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION std::int64_t reduce( + const_where_expression< + simd_mask<std::int64_t, simd_abi::avx512_fixed_size<8>>, + simd<std::int64_t, simd_abi::avx512_fixed_size<8>>> const& x, + std::int64_t, std::plus<>) { + return _mm512_mask_reduce_add_epi64(static_cast<__mmask8>(x.mask()), + static_cast<__m512i>(x.value())); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION double reduce( + const_where_expression<simd_mask<double, simd_abi::avx512_fixed_size<8>>, + simd<double, simd_abi::avx512_fixed_size<8>>> const& + x, + double, std::plus<>) { + return _mm512_mask_reduce_add_pd(static_cast<__mmask8>(x.mask()), + static_cast<__m512d>(x.value())); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/simd/src/Kokkos_SIMD_Common.hpp b/packages/kokkos/simd/src/Kokkos_SIMD_Common.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ae2843b3055a6d602460dfb31031807cb7abe7d4 --- /dev/null +++ b/packages/kokkos/simd/src/Kokkos_SIMD_Common.hpp @@ -0,0 +1,428 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SIMD_COMMON_HPP +#define KOKKOS_SIMD_COMMON_HPP + +#include <cmath> +#include <cstring> + +#include <Kokkos_Core.hpp> + +namespace Kokkos { + +namespace Experimental { + +template <class To, class From> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr To bit_cast( + From const& src) { + To dst; + std::memcpy(&dst, &src, sizeof(To)); + return dst; +} + +template <class T, class Abi> +class simd; + +template <class T, class Abi> +class simd_mask; + +struct element_aligned_tag {}; + +// class template declarations for const_where_expression and where_expression + +template <class M, class T> +class const_where_expression { + protected: + T& m_value; + M const& m_mask; + + public: + const_where_expression(M const& mask_arg, T const& value_arg) + : m_value(const_cast<T&>(value_arg)), m_mask(mask_arg) {} + KOKKOS_FORCEINLINE_FUNCTION T const& value() const { return this->m_value; } +}; + +template <class M, class T> +class where_expression : public const_where_expression<M, T> { + using base_type = const_where_expression<M, T>; + + public: + where_expression(M const& mask_arg, T& value_arg) + : base_type(mask_arg, value_arg) {} + KOKKOS_FORCEINLINE_FUNCTION T& value() { return this->m_value; } +}; + +// specializations of where expression templates for the case when the +// mask type is bool, to allow generic code to use where() on both +// SIMD types and non-SIMD builtin arithmetic types + +template <class T> +class const_where_expression<bool, T> { + protected: + T& m_value; + bool m_mask; + + public: + KOKKOS_FORCEINLINE_FUNCTION + const_where_expression(bool mask_arg, T const& value_arg) + : m_value(const_cast<T&>(value_arg)), m_mask(mask_arg) {} + KOKKOS_FORCEINLINE_FUNCTION T const& value() const { return this->m_value; } +}; + +template <class T> +class where_expression<bool, T> : public const_where_expression<bool, T> { + using base_type = const_where_expression<bool, T>; + + public: + KOKKOS_FORCEINLINE_FUNCTION + where_expression(bool mask_arg, T& value_arg) + : base_type(mask_arg, value_arg) {} + KOKKOS_FORCEINLINE_FUNCTION T& value() { return this->m_value; } + template <class U, + std::enable_if_t<std::is_convertible_v<U, T>, bool> = false> + KOKKOS_FORCEINLINE_FUNCTION void operator=(U const& x) { + if (this->m_mask) this->m_value = x; + } +}; + +template <class T, class Abi> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION + where_expression<simd_mask<T, Abi>, simd<T, Abi>> + where(typename simd<T, Abi>::mask_type const& mask, simd<T, Abi>& value) { + return where_expression(mask, value); +} + +template <class T, class Abi> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION + const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> + where(typename simd<T, Abi>::mask_type const& mask, + simd<T, Abi> const& value) { + return const_where_expression(mask, value); +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION where_expression<bool, T> where( + bool mask, T& value) { + return where_expression(mask, value); +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION const_where_expression<bool, T> where( + bool mask, T const& value) { + return const_where_expression(mask, value); +} + +// The code below provides: +// operator@(simd<T, Abi>, Arithmetic) +// operator@(Arithmetic, simd<T, Abi>) +// operator@=(simd<T, Abi>&, U&&) +// operator@=(where_expression<M, T>&, U&&) + +template <class T, class U, class Abi, + std::enable_if_t<std::is_arithmetic_v<U>, bool> = false> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator+( + Experimental::simd<T, Abi> const& lhs, U rhs) { + using result_member = decltype(lhs[0] + rhs); + return Experimental::simd<result_member, Abi>(lhs) + + Experimental::simd<result_member, Abi>(rhs); +} + +template <class T, class U, class Abi, + std::enable_if_t<std::is_arithmetic_v<U>, bool> = false> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator+( + U lhs, Experimental::simd<T, Abi> const& rhs) { + using result_member = decltype(lhs + rhs[0]); + return Experimental::simd<result_member, Abi>(lhs) + + Experimental::simd<result_member, Abi>(rhs); +} + +template <class T, class U, class Abi> +KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator+=(simd<T, Abi>& lhs, + U&& rhs) { + lhs = lhs + std::forward<U>(rhs); + return lhs; +} + +template <class M, class T, class U> +KOKKOS_FORCEINLINE_FUNCTION where_expression<M, T>& operator+=( + where_expression<M, T>& lhs, U&& rhs) { + lhs = lhs.value() + std::forward<U>(rhs); + return lhs; +} + +template <class T, class U, class Abi, + std::enable_if_t<std::is_arithmetic_v<U>, bool> = false> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator-( + Experimental::simd<T, Abi> const& lhs, U rhs) { + using result_member = decltype(lhs[0] - rhs); + return Experimental::simd<result_member, Abi>(lhs) - + Experimental::simd<result_member, Abi>(rhs); +} + +template <class T, class U, class Abi, + std::enable_if_t<std::is_arithmetic_v<U>, bool> = false> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator-( + U lhs, Experimental::simd<T, Abi> const& rhs) { + using result_member = decltype(lhs - rhs[0]); + return Experimental::simd<result_member, Abi>(lhs) - + Experimental::simd<result_member, Abi>(rhs); +} + +template <class T, class U, class Abi> +KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator-=(simd<T, Abi>& lhs, + U&& rhs) { + lhs = lhs - std::forward<U>(rhs); + return lhs; +} + +template <class M, class T, class U> +KOKKOS_FORCEINLINE_FUNCTION where_expression<M, T>& operator-=( + where_expression<M, T>& lhs, U&& rhs) { + lhs = lhs.value() - std::forward<U>(rhs); + return lhs; +} + +template <class T, class U, class Abi, + std::enable_if_t<std::is_arithmetic_v<U>, bool> = false> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator*( + Experimental::simd<T, Abi> const& lhs, U rhs) { + using result_member = decltype(lhs[0] * rhs); + return Experimental::simd<result_member, Abi>(lhs) * + Experimental::simd<result_member, Abi>(rhs); +} + +template <class T, class U, class Abi, + std::enable_if_t<std::is_arithmetic_v<U>, bool> = false> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator*( + U lhs, Experimental::simd<T, Abi> const& rhs) { + using result_member = decltype(lhs * rhs[0]); + return Experimental::simd<result_member, Abi>(lhs) * + Experimental::simd<result_member, Abi>(rhs); +} + +template <class T, class U, class Abi> +KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator*=(simd<T, Abi>& lhs, + U&& rhs) { + lhs = lhs * std::forward<U>(rhs); + return lhs; +} + +template <class M, class T, class U> +KOKKOS_FORCEINLINE_FUNCTION where_expression<M, T>& operator*=( + where_expression<M, T>& lhs, U&& rhs) { + lhs = lhs.value() * std::forward<U>(rhs); + return lhs; +} + +template <class T, class U, class Abi, + std::enable_if_t<std::is_arithmetic_v<U>, bool> = false> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator/( + Experimental::simd<T, Abi> const& lhs, U rhs) { + using result_member = decltype(lhs[0] / rhs); + return Experimental::simd<result_member, Abi>(lhs) / + Experimental::simd<result_member, Abi>(rhs); +} + +template <class T, class U, class Abi, + std::enable_if_t<std::is_arithmetic_v<U>, bool> = false> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto operator/( + U lhs, Experimental::simd<T, Abi> const& rhs) { + using result_member = decltype(lhs / rhs[0]); + return Experimental::simd<result_member, Abi>(lhs) / + Experimental::simd<result_member, Abi>(rhs); +} + +template <class T, class U, class Abi> +KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi>& operator/=(simd<T, Abi>& lhs, + U&& rhs) { + lhs = lhs / std::forward<U>(rhs); + return lhs; +} + +template <class M, class T, class U> +KOKKOS_FORCEINLINE_FUNCTION where_expression<M, T>& operator/=( + where_expression<M, T>& lhs, U&& rhs) { + lhs = lhs.value() / std::forward<U>(rhs); + return lhs; +} + +// implement mask reductions for type bool to allow generic code to accept +// both simd<double, Abi> and just double + +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr bool all_of(bool a) { + return a; +} + +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr bool any_of(bool a) { + return a; +} + +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION constexpr bool none_of(bool a) { + return !a; +} + +// fallback implementations of reductions across simd_mask: + +template <class T, class Abi> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool all_of( + simd_mask<T, Abi> const& a) { + return a == simd_mask<T, Abi>(true); +} + +template <class T, class Abi> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool any_of( + simd_mask<T, Abi> const& a) { + return a != simd_mask<T, Abi>(false); +} + +template <class T, class Abi> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION bool none_of( + simd_mask<T, Abi> const& a) { + return a == simd_mask<T, Abi>(false); +} + +} // namespace Experimental + +template <class T, class Abi> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION Experimental::simd<T, Abi> min( + Experimental::simd<T, Abi> const& a, Experimental::simd<T, Abi> const& b) { + Experimental::simd<T, Abi> result; + for (std::size_t i = 0; i < Experimental::simd<T, Abi>::size(); ++i) { + result[i] = Kokkos::min(a[i], b[i]); + } + return result; +} + +template <class T, class Abi> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION Experimental::simd<T, Abi> max( + Experimental::simd<T, Abi> const& a, Experimental::simd<T, Abi> const& b) { + Experimental::simd<T, Abi> result; + for (std::size_t i = 0; i < Experimental::simd<T, Abi>::size(); ++i) { + result[i] = Kokkos::max(a[i], b[i]); + } + return result; +} + +// fallback implementations of <cmath> functions. +// individual Abi types may provide overloads with more efficient +// implementations. +// These are not in the Experimental namespace because their double +// overloads are not either + +#define KOKKOS_IMPL_SIMD_UNARY_FUNCTION(FUNC) \ + template <class Abi> \ + [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION Experimental::simd<double, Abi> \ + FUNC(Experimental::simd<double, Abi> const& a) { \ + Experimental::simd<double, Abi> result; \ + for (std::size_t i = 0; i < Experimental::simd<double, Abi>::size(); \ + ++i) { \ + result[i] = Kokkos::FUNC(a[i]); \ + } \ + return result; \ + } + +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(abs) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(exp) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(exp2) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(log) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(log10) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(log2) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(sqrt) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(cbrt) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(sin) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(cos) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(tan) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(asin) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(acos) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(atan) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(sinh) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(cosh) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(tanh) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(asinh) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(acosh) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(atanh) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(erf) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(erfc) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(tgamma) +KOKKOS_IMPL_SIMD_UNARY_FUNCTION(lgamma) + +#define KOKKOS_IMPL_SIMD_BINARY_FUNCTION(FUNC) \ + template <class Abi> \ + [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION Experimental::simd<double, Abi> \ + FUNC(Experimental::simd<double, Abi> const& a, \ + Experimental::simd<double, Abi> const& b) { \ + Experimental::simd<double, Abi> result; \ + for (std::size_t i = 0; i < Experimental::simd<double, Abi>::size(); \ + ++i) { \ + result[i] = Kokkos::FUNC(a[i], b[i]); \ + } \ + return result; \ + } + +KOKKOS_IMPL_SIMD_BINARY_FUNCTION(pow) +KOKKOS_IMPL_SIMD_BINARY_FUNCTION(hypot) +KOKKOS_IMPL_SIMD_BINARY_FUNCTION(atan2) +KOKKOS_IMPL_SIMD_BINARY_FUNCTION(copysign) + +#define KOKKOS_IMPL_SIMD_TERNARY_FUNCTION(FUNC) \ + template <class Abi> \ + [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION Experimental::simd<double, Abi> \ + FUNC(Experimental::simd<double, Abi> const& a, \ + Experimental::simd<double, Abi> const& b, \ + Experimental::simd<double, Abi> const& c) { \ + Experimental::simd<double, Abi> result; \ + for (std::size_t i = 0; i < Experimental::simd<double, Abi>::size(); \ + ++i) { \ + result[i] = Kokkos::FUNC(a[i], b[i], c[i]); \ + } \ + return result; \ + } + +KOKKOS_IMPL_SIMD_TERNARY_FUNCTION(fma) +KOKKOS_IMPL_SIMD_TERNARY_FUNCTION(hypot) + +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp b/packages/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f0d06695e50750874a68aa68af0f06e61c688feb --- /dev/null +++ b/packages/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp @@ -0,0 +1,353 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SIMD_SCALAR_HPP +#define KOKKOS_SIMD_SCALAR_HPP + +#include <type_traits> +#include <climits> +#include <cfloat> + +#include <Kokkos_SIMD_Common.hpp> + +namespace Kokkos { +namespace Experimental { + +namespace simd_abi { + +class scalar {}; + +} // namespace simd_abi + +template <class T> +class simd_mask<T, simd_abi::scalar> { + bool m_value; + + public: + using value_type = bool; + using simd_type = simd<T, simd_abi::scalar>; + using abi_type = simd_abi::scalar; + using reference = value_type&; + KOKKOS_DEFAULTED_FUNCTION simd_mask() = default; + KOKKOS_FORCEINLINE_FUNCTION static constexpr std::size_t size() { return 1; } + KOKKOS_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) + : m_value(value) {} + template <class U> + KOKKOS_FORCEINLINE_FUNCTION simd_mask( + simd_mask<U, simd_abi::scalar> const& other) + : m_value(static_cast<bool>(other)) {} + KOKKOS_FORCEINLINE_FUNCTION constexpr explicit operator bool() const { + return m_value; + } + KOKKOS_FORCEINLINE_FUNCTION reference operator[](std::size_t) { + return m_value; + } + KOKKOS_FORCEINLINE_FUNCTION value_type operator[](std::size_t) const { + return m_value; + } + KOKKOS_FORCEINLINE_FUNCTION simd_mask + operator||(simd_mask const& other) const { + return simd_mask(m_value || other.m_value); + } + KOKKOS_FORCEINLINE_FUNCTION simd_mask + operator&&(simd_mask const& other) const { + return simd_mask(m_value && other.m_value); + } + KOKKOS_FORCEINLINE_FUNCTION simd_mask operator!() const { + return simd_mask(!m_value); + } + KOKKOS_FORCEINLINE_FUNCTION bool operator==(simd_mask const& other) const { + return m_value == other.m_value; + } + KOKKOS_FORCEINLINE_FUNCTION bool operator!=(simd_mask const& other) const { + return m_value != other.m_value; + } +}; + +template <class T> +class simd<T, simd_abi::scalar> { + T m_value; + + public: + using value_type = T; + using abi_type = simd_abi::scalar; + using mask_type = simd_mask<T, abi_type>; + using reference = value_type&; + KOKKOS_DEFAULTED_FUNCTION simd() = default; + KOKKOS_DEFAULTED_FUNCTION simd(simd const&) = default; + KOKKOS_DEFAULTED_FUNCTION simd(simd&&) = default; + KOKKOS_DEFAULTED_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_DEFAULTED_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_FORCEINLINE_FUNCTION static constexpr std::size_t size() { return 1; } + template <class U, std::enable_if_t<std::is_convertible_v<U, value_type>, + bool> = false> + KOKKOS_FORCEINLINE_FUNCTION simd(U&& value) : m_value(value) {} + template <class U, std::enable_if_t<std::is_convertible_v<U, value_type>, + bool> = false> + KOKKOS_FORCEINLINE_FUNCTION explicit simd(simd<U, abi_type> const& other) + : m_value(static_cast<U>(other)) {} + template <class G, + std::enable_if_t< + // basically, can you do { value_type r = + // gen(std::integral_constant<std::size_t, i>()); } + std::is_invocable_r_v<value_type, G, + std::integral_constant<std::size_t, 0>>, + bool> = false> + KOKKOS_FORCEINLINE_FUNCTION simd(G&& gen) + : m_value(gen(std::integral_constant<std::size_t, 0>())) {} + KOKKOS_FORCEINLINE_FUNCTION simd operator-() const { return simd(-m_value); } + KOKKOS_FORCEINLINE_FUNCTION simd operator>>(int rhs) const { + return simd(m_value >> rhs); + } + KOKKOS_FORCEINLINE_FUNCTION simd + operator>>(simd<int, abi_type> const& rhs) const { + return simd(m_value >> static_cast<int>(rhs)); + } + KOKKOS_FORCEINLINE_FUNCTION simd operator<<(int rhs) const { + return simd(m_value << rhs); + } + KOKKOS_FORCEINLINE_FUNCTION simd + operator<<(simd<int, abi_type> const& rhs) const { + return simd(m_value << static_cast<int>(rhs)); + } + KOKKOS_FORCEINLINE_FUNCTION simd operator&(simd const& other) const { + return m_value & other.m_value; + } + KOKKOS_FORCEINLINE_FUNCTION simd operator|(simd const& other) const { + return m_value | other.m_value; + } + KOKKOS_FORCEINLINE_FUNCTION constexpr explicit operator T() const { + return m_value; + } + KOKKOS_FORCEINLINE_FUNCTION mask_type operator<(simd const& other) const { + return mask_type(m_value < other.m_value); + } + KOKKOS_FORCEINLINE_FUNCTION mask_type operator>(simd const& other) const { + return mask_type(m_value > other.m_value); + } + KOKKOS_FORCEINLINE_FUNCTION mask_type operator<=(simd const& other) const { + return mask_type(m_value <= other.m_value); + } + KOKKOS_FORCEINLINE_FUNCTION mask_type operator>=(simd const& other) const { + return mask_type(m_value >= other.m_value); + } + KOKKOS_FORCEINLINE_FUNCTION mask_type operator==(simd const& other) const { + return mask_type(m_value == other.m_value); + } + KOKKOS_FORCEINLINE_FUNCTION mask_type operator!=(simd const& other) const { + return mask_type(m_value != other.m_value); + } + KOKKOS_FORCEINLINE_FUNCTION void copy_from(T const* ptr, + element_aligned_tag) { + m_value = *ptr; + } + KOKKOS_FORCEINLINE_FUNCTION void copy_to(T* ptr, element_aligned_tag) const { + *ptr = m_value; + } + KOKKOS_FORCEINLINE_FUNCTION reference operator[](std::size_t) { + return m_value; + } + KOKKOS_FORCEINLINE_FUNCTION value_type operator[](std::size_t) const { + return m_value; + } +}; + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION simd<T, simd_abi::scalar> operator*( + simd<T, simd_abi::scalar> const& lhs, + simd<T, simd_abi::scalar> const& rhs) { + return simd<T, simd_abi::scalar>(static_cast<T>(lhs) * static_cast<T>(rhs)); +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION simd<T, simd_abi::scalar> operator/( + simd<T, simd_abi::scalar> const& lhs, + simd<T, simd_abi::scalar> const& rhs) { + return simd<T, simd_abi::scalar>(static_cast<T>(lhs) / static_cast<T>(rhs)); +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION simd<T, simd_abi::scalar> operator+( + simd<T, simd_abi::scalar> const& lhs, + simd<T, simd_abi::scalar> const& rhs) { + return simd<T, simd_abi::scalar>(static_cast<T>(lhs) + static_cast<T>(rhs)); +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION simd<T, simd_abi::scalar> operator-( + simd<T, simd_abi::scalar> const& lhs, + simd<T, simd_abi::scalar> const& rhs) { + return simd<T, simd_abi::scalar>(static_cast<T>(lhs) - static_cast<T>(rhs)); +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION simd<T, simd_abi::scalar> abs( + simd<T, simd_abi::scalar> const& a) { + return simd<T, simd_abi::scalar>(std::abs(static_cast<T>(a))); +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION simd<T, simd_abi::scalar> sqrt( + simd<T, simd_abi::scalar> const& a) { + return simd<T, simd_abi::scalar>(std::sqrt(static_cast<T>(a))); +} + +template <class T> +KOKKOS_FORCEINLINE_FUNCTION simd<T, simd_abi::scalar> fma( + simd<T, simd_abi::scalar> const& x, simd<T, simd_abi::scalar> const& y, + simd<T, simd_abi::scalar> const& z) { + return simd<T, simd_abi::scalar>((static_cast<T>(x) * static_cast<T>(y)) + + static_cast<T>(z)); +} + +template <class T> +KOKKOS_FORCEINLINE_FUNCTION simd<T, simd_abi::scalar> condition( + desul::Impl::dont_deduce_this_parameter_t< + simd_mask<T, simd_abi::scalar>> const& a, + simd<T, simd_abi::scalar> const& b, simd<T, simd_abi::scalar> const& c) { + return simd<T, simd_abi::scalar>(static_cast<bool>(a) ? static_cast<T>(b) + : static_cast<T>(c)); +} + +template <class T, class Abi> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION simd<T, Abi> copysign( + simd<T, Abi> const& a, simd<T, Abi> const& b) { + return std::copysign(static_cast<T>(a), static_cast<T>(b)); +} + +template <class T> +class const_where_expression<simd_mask<T, simd_abi::scalar>, + simd<T, simd_abi::scalar>> { + public: + using abi_type = simd_abi::scalar; + using value_type = simd<T, abi_type>; + using mask_type = simd_mask<T, abi_type>; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + KOKKOS_FORCEINLINE_FUNCTION + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast<value_type&>(value_arg)), m_mask(mask_arg) {} + KOKKOS_FORCEINLINE_FUNCTION + mask_type const& mask() const { return m_mask; } + KOKKOS_FORCEINLINE_FUNCTION + value_type const& value() const { return m_value; } + KOKKOS_FORCEINLINE_FUNCTION + void copy_to(T* mem, element_aligned_tag) const { + if (static_cast<bool>(m_mask)) *mem = static_cast<T>(m_value); + } + template <class Integral> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<std::is_integral_v<Integral>> + scatter_to(T* mem, simd<Integral, simd_abi::scalar> const& index) const { + if (static_cast<bool>(m_mask)) + mem[static_cast<Integral>(index)] = static_cast<T>(m_value); + } +}; + +template <class T> +class where_expression<simd_mask<T, simd_abi::scalar>, + simd<T, simd_abi::scalar>> + : public const_where_expression<simd_mask<T, simd_abi::scalar>, + simd<T, simd_abi::scalar>> { + using base_type = const_where_expression<simd_mask<T, simd_abi::scalar>, + simd<T, simd_abi::scalar>>; + + public: + using typename base_type::value_type; + KOKKOS_FORCEINLINE_FUNCTION + where_expression(simd_mask<T, simd_abi::scalar> const& mask_arg, + simd<T, simd_abi::scalar>& value_arg) + : base_type(mask_arg, value_arg) {} + KOKKOS_FORCEINLINE_FUNCTION + void copy_from(T const* mem, element_aligned_tag) { + if (static_cast<bool>(this->m_mask)) this->m_value = *mem; + } + template <class Integral> + KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<std::is_integral_v<Integral>> + gather_from(T const* mem, simd<Integral, simd_abi::scalar> const& index) { + if (static_cast<bool>(this->m_mask)) + this->m_value = mem[static_cast<Integral>(index)]; + } + template <class U, std::enable_if_t< + std::is_convertible_v<U, simd<T, simd_abi::scalar>>, + bool> = false> + KOKKOS_FORCEINLINE_FUNCTION void operator=(U&& x) { + if (static_cast<bool>(this->m_mask)) + this->m_value = + static_cast<simd<T, simd_abi::scalar>>(std::forward<U>(x)); + } +}; + +template <class T, class BinaryOp> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T +reduce(const_where_expression<simd_mask<T, simd_abi::scalar>, + simd<T, simd_abi::scalar>> const& x, + T identity_element, BinaryOp) { + return static_cast<bool>(x.mask()) ? static_cast<T>(x.value()) + : identity_element; +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T +hmax(const_where_expression<simd_mask<T, simd_abi::scalar>, + simd<T, simd_abi::scalar>> const& x) { + return static_cast<bool>(x.mask()) ? static_cast<T>(x.value()) + : Kokkos::reduction_identity<T>::max(); +} + +template <class T> +[[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION T +hmin(const_where_expression<simd_mask<T, simd_abi::scalar>, + simd<T, simd_abi::scalar>> const& x) { + return static_cast<bool>(x.mask()) ? static_cast<T>(x.value()) + : Kokkos::reduction_identity<T>::min(); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/simd/src/Kokkos_SIMD_dummy.cpp b/packages/kokkos/simd/src/Kokkos_SIMD_dummy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d273cb6d54d70f2e816c341525eee366744ae6b8 --- /dev/null +++ b/packages/kokkos/simd/src/Kokkos_SIMD_dummy.cpp @@ -0,0 +1,7 @@ +// This file is needed in order to get the linker language +// for the header only submodule. +// While we set the language properties in our normal cmake +// path it does not get set in the Trilinos environment. +// Furthermore, setting LINKER_LANGUAGE is only supported +// in CMAKE 3.19 and up. +void KOKKOS_SIMD_SRC_DUMMY_PREVENT_LINK_ERROR() {} diff --git a/packages/kokkos/simd/unit_tests/CMakeLists.txt b/packages/kokkos/simd/unit_tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f4de8624187ddf9878259d80a7b413ac8453a881 --- /dev/null +++ b/packages/kokkos/simd/unit_tests/CMakeLists.txt @@ -0,0 +1,5 @@ +KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SIMD + SOURCES + UnitTestMain.cpp + TestSIMD.cpp) diff --git a/packages/kokkos/simd/unit_tests/TestSIMD.cpp b/packages/kokkos/simd/unit_tests/TestSIMD.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fdf72e91c84af4c8ba757703aa2fe7353b3d9e62 --- /dev/null +++ b/packages/kokkos/simd/unit_tests/TestSIMD.cpp @@ -0,0 +1,376 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_SIMD.hpp> + +class gtest_checker { + public: + void truth(bool x) const { EXPECT_TRUE(x); } + template <class T> + void equality(T const& a, T const& b) const { + EXPECT_EQ(a, b); + } +}; + +class kokkos_checker { + public: + KOKKOS_INLINE_FUNCTION void truth(bool x) const { + if (!x) Kokkos::abort("SIMD unit test truth condition failed on device"); + } + template <class T> + KOKKOS_INLINE_FUNCTION void equality(T const& a, T const& b) const { + if (a != b) + Kokkos::abort("SIMD unit test equality condition failed on device"); + } +}; + +template <class T, class Abi> +inline void host_check_equality( + Kokkos::Experimental::simd<T, Abi> const& expected_result, + Kokkos::Experimental::simd<T, Abi> const& computed_result, + std::size_t nlanes) { + gtest_checker checker; + for (std::size_t i = 0; i < nlanes; ++i) { + checker.equality(expected_result[i], computed_result[i]); + } + using mask_type = typename Kokkos::Experimental::simd<T, Abi>::mask_type; + mask_type mask(false); + for (std::size_t i = 0; i < nlanes; ++i) { + mask[i] = true; + } + checker.equality((expected_result == computed_result) && mask, mask); +} + +template <class T, class Abi> +KOKKOS_INLINE_FUNCTION void device_check_equality( + Kokkos::Experimental::simd<T, Abi> const& expected_result, + Kokkos::Experimental::simd<T, Abi> const& computed_result, + std::size_t nlanes) { + kokkos_checker checker; + for (std::size_t i = 0; i < nlanes; ++i) { + checker.equality(expected_result[i], computed_result[i]); + } + using mask_type = typename Kokkos::Experimental::simd<T, Abi>::mask_type; + mask_type mask(false); + for (std::size_t i = 0; i < nlanes; ++i) { + mask[i] = true; + } + checker.equality((expected_result == computed_result) && mask, mask); +} + +class load_element_aligned { + public: + template <class T, class Abi> + bool host_load(T const* mem, std::size_t n, + Kokkos::Experimental::simd<T, Abi>& result) const { + if (n < result.size()) return false; + result.copy_from(mem, Kokkos::Experimental::element_aligned_tag()); + return true; + } + template <class T, class Abi> + KOKKOS_INLINE_FUNCTION bool device_load( + T const* mem, std::size_t n, + Kokkos::Experimental::simd<T, Abi>& result) const { + if (n < result.size()) return false; + result.copy_from(mem, Kokkos::Experimental::element_aligned_tag()); + return true; + } +}; + +class load_masked { + public: + template <class T, class Abi> + bool host_load(T const* mem, std::size_t n, + Kokkos::Experimental::simd<T, Abi>& result) const { + using mask_type = typename Kokkos::Experimental::simd<T, Abi>::mask_type; + mask_type mask(false); + for (std::size_t i = 0; i < n; ++i) { + mask[i] = true; + } + where(mask, result) + .copy_from(mem, Kokkos::Experimental::element_aligned_tag()); + where(!mask, result) = 0; + return true; + } + template <class T, class Abi> + KOKKOS_INLINE_FUNCTION bool device_load( + T const* mem, std::size_t n, + Kokkos::Experimental::simd<T, Abi>& result) const { + using mask_type = typename Kokkos::Experimental::simd<T, Abi>::mask_type; + mask_type mask(false); + for (std::size_t i = 0; i < n; ++i) { + mask[i] = true; + } + where(mask, result) + .copy_from(mem, Kokkos::Experimental::element_aligned_tag()); + where(!mask, result) = T(0); + return true; + } +}; + +class load_as_scalars { + public: + template <class T, class Abi> + bool host_load(T const* mem, std::size_t n, + Kokkos::Experimental::simd<T, Abi>& result) const { + for (std::size_t i = 0; i < n; ++i) { + result[i] = mem[i]; + } + for (std::size_t i = n; i < result.size(); ++i) { + result[i] = T(0); + } + return true; + } + template <class T, class Abi> + KOKKOS_INLINE_FUNCTION bool device_load( + T const* mem, std::size_t n, + Kokkos::Experimental::simd<T, Abi>& result) const { + for (std::size_t i = 0; i < n; ++i) { + result[i] = mem[i]; + } + for (std::size_t i = n; i < result.size(); ++i) { + result[i] = T(0); + } + return true; + } +}; + +template <class Abi, class Loader, class BinaryOp, class T> +void host_check_binary_op_one_loader(BinaryOp binary_op, std::size_t n, + T const* first_args, + T const* second_args) { + Loader loader; + using simd_type = Kokkos::Experimental::simd<T, Abi>; + std::size_t constexpr width = simd_type::size(); + for (std::size_t i = 0; i < n; i += width) { + std::size_t const nremaining = n - i; + std::size_t const nlanes = Kokkos::min(nremaining, width); + simd_type first_arg; + bool const loaded_first_arg = + loader.host_load(first_args + i, nlanes, first_arg); + simd_type second_arg; + bool const loaded_second_arg = + loader.host_load(second_args + i, nlanes, second_arg); + if (!(loaded_first_arg && loaded_second_arg)) continue; + simd_type expected_result; + for (std::size_t lane = 0; lane < nlanes; ++lane) { + expected_result[lane] = + binary_op.on_host(first_arg[lane], second_arg[lane]); + } + simd_type const computed_result = binary_op.on_host(first_arg, second_arg); + host_check_equality(expected_result, computed_result, nlanes); + } +} + +template <class Abi, class Loader, class BinaryOp, class T> +KOKKOS_INLINE_FUNCTION void device_check_binary_op_one_loader( + BinaryOp binary_op, std::size_t n, T const* first_args, + T const* second_args) { + Loader loader; + using simd_type = Kokkos::Experimental::simd<T, Abi>; + std::size_t constexpr width = simd_type::size(); + for (std::size_t i = 0; i < n; i += width) { + std::size_t const nremaining = n - i; + std::size_t const nlanes = Kokkos::min(nremaining, width); + simd_type first_arg; + bool const loaded_first_arg = + loader.device_load(first_args + i, nlanes, first_arg); + simd_type second_arg; + bool const loaded_second_arg = + loader.device_load(second_args + i, nlanes, second_arg); + if (!(loaded_first_arg && loaded_second_arg)) continue; + simd_type expected_result; + for (std::size_t lane = 0; lane < nlanes; ++lane) { + expected_result[lane] = + binary_op.on_device(first_arg[lane], second_arg[lane]); + } + simd_type const computed_result = + binary_op.on_device(first_arg, second_arg); + device_check_equality(expected_result, computed_result, nlanes); + } +} + +template <class Abi, class BinaryOp, class T> +inline void host_check_binary_op_all_loaders(BinaryOp binary_op, std::size_t n, + T const* first_args, + T const* second_args) { + host_check_binary_op_one_loader<Abi, load_element_aligned>( + binary_op, n, first_args, second_args); + host_check_binary_op_one_loader<Abi, load_masked>(binary_op, n, first_args, + second_args); + host_check_binary_op_one_loader<Abi, load_as_scalars>( + binary_op, n, first_args, second_args); +} + +template <class Abi, class BinaryOp, class T> +KOKKOS_INLINE_FUNCTION void device_check_binary_op_all_loaders( + BinaryOp binary_op, std::size_t n, T const* first_args, + T const* second_args) { + device_check_binary_op_one_loader<Abi, load_element_aligned>( + binary_op, n, first_args, second_args); + device_check_binary_op_one_loader<Abi, load_masked>(binary_op, n, first_args, + second_args); + device_check_binary_op_one_loader<Abi, load_as_scalars>( + binary_op, n, first_args, second_args); +} + +class plus { + public: + template <class T> + auto on_host(T const& a, T const& b) const { + return a + b; + } + template <class T> + KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { + return a + b; + } +}; + +class minus { + public: + template <class T> + auto on_host(T const& a, T const& b) const { + return a - b; + } + template <class T> + KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { + return a - b; + } +}; + +class multiplies { + public: + template <class T> + auto on_host(T const& a, T const& b) const { + return a * b; + } + template <class T> + KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { + return a * b; + } +}; + +class divides { + public: + template <class T> + auto on_host(T const& a, T const& b) const { + return a / b; + } + template <class T> + KOKKOS_INLINE_FUNCTION auto on_device(T const& a, T const& b) const { + return a / b; + } +}; + +template <class Abi> +inline void host_check_math_ops() { + std::size_t constexpr n = 11; + double const first_args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; + double const second_args[n] = {1, 2, 1, 1, 1, -3, -2, 1, 13, -3, -2}; + host_check_binary_op_all_loaders<Abi>(plus(), n, first_args, second_args); + host_check_binary_op_all_loaders<Abi>(minus(), n, first_args, second_args); + host_check_binary_op_all_loaders<Abi>(multiplies(), n, first_args, + second_args); + host_check_binary_op_all_loaders<Abi>(divides(), n, first_args, second_args); +} + +template <class Abi> +KOKKOS_INLINE_FUNCTION void device_check_math_ops() { + std::size_t constexpr n = 11; + double const first_args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; + double const second_args[n] = {1, 2, 1, 1, 1, -3, -2, 1, 13, -3, -2}; + device_check_binary_op_all_loaders<Abi>(plus(), n, first_args, second_args); + device_check_binary_op_all_loaders<Abi>(minus(), n, first_args, second_args); + device_check_binary_op_all_loaders<Abi>(multiplies(), n, first_args, + second_args); + device_check_binary_op_all_loaders<Abi>(divides(), n, first_args, + second_args); +} + +template <class Abi> +inline void host_check_abi() { + host_check_math_ops<Abi>(); +} + +template <class Abi> +KOKKOS_INLINE_FUNCTION void device_check_abi() { + device_check_math_ops<Abi>(); +} + +inline void host_check_abis(Kokkos::Experimental::Impl::abi_set<>) {} + +KOKKOS_INLINE_FUNCTION void device_check_abis( + Kokkos::Experimental::Impl::abi_set<>) {} + +template <class FirstAbi, class... RestAbis> +inline void host_check_abis( + Kokkos::Experimental::Impl::abi_set<FirstAbi, RestAbis...>) { + host_check_abi<FirstAbi>(); + host_check_abis(Kokkos::Experimental::Impl::abi_set<RestAbis...>()); +} + +template <class FirstAbi, class... RestAbis> +KOKKOS_INLINE_FUNCTION void device_check_abis( + Kokkos::Experimental::Impl::abi_set<FirstAbi, RestAbis...>) { + device_check_abi<FirstAbi>(); + device_check_abis(Kokkos::Experimental::Impl::abi_set<RestAbis...>()); +} + +TEST(simd, host) { + host_check_abis(Kokkos::Experimental::Impl::host_abi_set()); +} + +class simd_device_functor { + public: + KOKKOS_INLINE_FUNCTION void operator()(int) const { + device_check_abis(Kokkos::Experimental::Impl::device_abi_set()); + } +}; + +TEST(simd, device) { + Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::IndexType<int>>(0, 1), + simd_device_functor()); +} diff --git a/packages/kokkos/simd/unit_tests/UnitTestMain.cpp b/packages/kokkos/simd/unit_tests/UnitTestMain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e245aad35fc33a595a16f711dbd4a63a0c7f8948 --- /dev/null +++ b/packages/kokkos/simd/unit_tests/UnitTestMain.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> + +int main(int argc, char *argv[]) { + Kokkos::initialize(argc, argv); + ::testing::InitGoogleTest(&argc, argv); + int result = RUN_ALL_TESTS(); + Kokkos::finalize(); + return result; +} diff --git a/packages/kokkos/core/src/desul/.clang-format b/packages/kokkos/tpls/desul/include/desul/.clang-format similarity index 100% rename from packages/kokkos/core/src/desul/.clang-format rename to packages/kokkos/tpls/desul/include/desul/.clang-format diff --git a/packages/kokkos/core/src/desul/atomics.hpp b/packages/kokkos/tpls/desul/include/desul/atomics.hpp similarity index 98% rename from packages/kokkos/core/src/desul/atomics.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics.hpp index ab3fe25392faa70027cb19c2a02c18c570c5768b..8ba5b0f3a09697942752467e0937a1409f66a687 100644 --- a/packages/kokkos/core/src/desul/atomics.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -9,11 +9,10 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifndef DESUL_ATOMICS_HPP_ #define DESUL_ATOMICS_HPP_ -#include "desul/atomics/Macros.hpp" - #include "desul/atomics/Atomic_Ref.hpp" #include "desul/atomics/Compare_Exchange.hpp" #include "desul/atomics/Generic.hpp" #include "desul/atomics/Lock_Array.hpp" +#include "desul/atomics/Macros.hpp" #endif diff --git a/packages/kokkos/core/src/desul/atomics/Atomic_Ref.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp similarity index 91% rename from packages/kokkos/core/src/desul/atomics/Atomic_Ref.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp index 73cd01a7e6ff9c54b8b851193bf256124f399cfe..fbf2dcf6b7cb2972f2d43b81f0b55b45951ee03c 100644 --- a/packages/kokkos/core/src/desul/atomics/Atomic_Ref.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp @@ -103,10 +103,10 @@ struct basic_atomic_ref<T, MemoryOrder, MemoryScope, false, false> { DESUL_FUNCTION bool compare_exchange_weak( T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); + desired, + order, + cmpexch_failure_memory_order<_MemoryOrder>(), + MemoryScope()); } template <typename SuccessMemoryOrder, typename FailureMemoryOrder> @@ -123,10 +123,10 @@ struct basic_atomic_ref<T, MemoryOrder, MemoryScope, false, false> { DESUL_FUNCTION bool compare_exchange_strong( T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); + desired, + order, + cmpexch_failure_memory_order<_MemoryOrder>(), + MemoryScope()); } }; @@ -195,10 +195,10 @@ struct basic_atomic_ref<T, MemoryOrder, MemoryScope, true, false> { DESUL_FUNCTION bool compare_exchange_weak( T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); + desired, + order, + cmpexch_failure_memory_order<_MemoryOrder>(), + MemoryScope()); } template <typename SuccessMemoryOrder, typename FailureMemoryOrder> @@ -215,10 +215,10 @@ struct basic_atomic_ref<T, MemoryOrder, MemoryScope, true, false> { DESUL_FUNCTION bool compare_exchange_strong( T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); + desired, + order, + cmpexch_failure_memory_order<_MemoryOrder>(), + MemoryScope()); } template <typename _MemoryOrder = MemoryOrder> @@ -348,10 +348,10 @@ struct basic_atomic_ref<T, MemoryOrder, MemoryScope, false, true> { DESUL_FUNCTION bool compare_exchange_weak( T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); + desired, + order, + cmpexch_failure_memory_order<_MemoryOrder>(), + MemoryScope()); } template <typename SuccessMemoryOrder, typename FailureMemoryOrder> @@ -368,10 +368,10 @@ struct basic_atomic_ref<T, MemoryOrder, MemoryScope, false, true> { DESUL_FUNCTION bool compare_exchange_strong( T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); + desired, + order, + cmpexch_failure_memory_order<_MemoryOrder>(), + MemoryScope()); } template <typename _MemoryOrder = MemoryOrder> @@ -457,10 +457,10 @@ struct basic_atomic_ref<T*, MemoryOrder, MemoryScope, false, false> { DESUL_FUNCTION bool compare_exchange_weak( T*& expected, T* desired, _MemoryOrder order = _MemoryOrder()) const noexcept { return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); + desired, + order, + cmpexch_failure_memory_order<_MemoryOrder>(), + MemoryScope()); } template <typename SuccessMemoryOrder, typename FailureMemoryOrder> @@ -477,10 +477,10 @@ struct basic_atomic_ref<T*, MemoryOrder, MemoryScope, false, false> { DESUL_FUNCTION bool compare_exchange_strong( T*& expected, T* desired, _MemoryOrder order = _MemoryOrder()) const noexcept { return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); + desired, + order, + cmpexch_failure_memory_order<_MemoryOrder>(), + MemoryScope()); } template <typename _MemoryOrder = MemoryOrder> diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/CUDA.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/CUDA.hpp new file mode 100644 index 0000000000000000000000000000000000000000..87c0df4af91827793e0d95c43b1af08df88263fe --- /dev/null +++ b/packages/kokkos/tpls/desul/include/desul/atomics/CUDA.hpp @@ -0,0 +1,664 @@ +/* +Copyright (c) 2019, Lawrence Livermore National Security, LLC +and DESUL project contributors. See the COPYRIGHT file for details. +Source: https://github.com/desul/desul + +SPDX-License-Identifier: (BSD-3-Clause) +*/ +#ifndef DESUL_ATOMICS_CUDA_HPP_ +#define DESUL_ATOMICS_CUDA_HPP_ + +#ifdef DESUL_HAVE_CUDA_ATOMICS +// When building with Clang we need to include the device functions always since Clang +// must see a consistent overload set in both device and host compilation, but that +// means we need to know on the host what to make visible, i.e. we need a host side +// compile knowledge of architecture. +#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700)) || \ + (!defined(__NVCC__) && !defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA)) +#define DESUL_HAVE_CUDA_ATOMICS_ASM +#include <desul/atomics/cuda/CUDA_asm.hpp> +#endif + +#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700)) || \ + (!defined(__NVCC__) && !defined(DESUL_HAVE_CUDA_ATOMICS_ASM)) +namespace desul { +namespace Impl { +template <class T> +struct is_cuda_atomic_integer_type { + static constexpr bool value = std::is_same<T, int>::value || + std::is_same<T, unsigned int>::value || + std::is_same<T, unsigned long long int>::value; +}; + +template <class T> +struct is_cuda_atomic_add_type { + static constexpr bool value = is_cuda_atomic_integer_type<T>::value || +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 600) + std::is_same<T, double>::value || +#endif + std::is_same<T, float>::value; +}; + +template <class T> +struct is_cuda_atomic_sub_type { + static constexpr bool value = + std::is_same<T, int>::value || std::is_same<T, unsigned int>::value; +}; +} // namespace Impl + +// Atomic Add +template <class T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_add_type<T>::value, T> + atomic_fetch_add(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicAdd(dest, val); +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_add_type<T>::value, T> + atomic_fetch_add(T* dest, T val, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicAdd(dest, val); + __threadfence(); + return return_val; +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_add_type<T>::value, T> + atomic_fetch_add(T* dest, T val, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_add(dest, val, MemoryOrder(), MemoryScopeDevice()); +} + +// Atomic Sub +template <class T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_sub_type<T>::value, T> + atomic_fetch_sub(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicSub(dest, val); +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_sub_type<T>::value, T> + atomic_fetch_sub(T* dest, T val, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicSub(dest, val); + __threadfence(); + return return_val; +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_sub_type<T>::value, T> + atomic_fetch_sub(T* dest, T val, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_sub(dest, val, MemoryOrder(), MemoryScopeDevice()); +} + +// Wrap around atomic add +__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest, + unsigned int val, + MemoryOrderRelaxed, + MemoryScopeDevice) { + return atomicInc(dest, val); +} + +template <typename MemoryOrder> +__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest, + unsigned int val, + MemoryOrder, + MemoryScopeDevice) { + __threadfence(); + unsigned int return_val = atomicInc(dest, val); + __threadfence(); + return return_val; +} + +template <typename MemoryOrder> +__device__ inline unsigned int atomic_fetch_inc_mod(unsigned int* dest, + unsigned int val, + MemoryOrder, + MemoryScopeCore) { + return atomic_fetch_inc_mod(dest, val, MemoryOrder(), MemoryScopeDevice()); +} + +// Wrap around atomic sub +__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest, + unsigned int val, + MemoryOrderRelaxed, + MemoryScopeDevice) { + return atomicDec(dest, val); +} + +template <typename MemoryOrder> +__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest, + unsigned int val, + MemoryOrder, + MemoryScopeDevice) { + __threadfence(); + unsigned int return_val = atomicDec(dest, val); + __threadfence(); + return return_val; +} + +template <typename MemoryOrder> +__device__ inline unsigned int atomic_fetch_dec_mod(unsigned int* dest, + unsigned int val, + MemoryOrder, + MemoryScopeCore) { + return atomic_fetch_dec_mod(dest, val, MemoryOrder(), MemoryScopeDevice()); +} + +// Atomic Inc +template <typename T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_add_type<T>::value, T> + atomic_fetch_inc(T* dest, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicAdd(dest, T(1)); +} + +template <typename T, typename MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_add_type<T>::value, T> + atomic_fetch_inc(T* dest, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicAdd(dest, T(1)); + __threadfence(); + + return return_val; +} + +template <typename T, typename MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_add_type<T>::value, T> + atomic_fetch_inc(T* dest, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_add(dest, T(1), MemoryOrder(), MemoryScopeDevice()); +} + +// Atomic Dec +template <typename T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_sub_type<T>::value, T> + atomic_fetch_dec(T* dest, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicSub(dest, T(1)); +} + +template <typename T, typename MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_sub_type<T>::value, T> + atomic_fetch_dec(T* dest, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicSub(dest, T(1)); + __threadfence(); + return return_val; +} + +template <typename T, typename MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_sub_type<T>::value, T> + atomic_fetch_dec(T* dest, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_sub(dest, T(1), MemoryOrder(), MemoryScopeDevice()); +} + +// Atomic Max +template <class T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_max(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicMax(dest, val); +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_max(T* dest, T val, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicMax(dest, val); + __threadfence(); + return return_val; +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_max(T* dest, T val, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_max(dest, val, MemoryOrder(), MemoryScopeDevice()); +} + +// Atomic Min +template <class T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_min(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicMin(dest, val); +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_min(T* dest, T val, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicMin(dest, val); + __threadfence(); + return return_val; +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_min(T* dest, T val, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_min(dest, val, MemoryOrder(), MemoryScopeDevice()); +} + +// Atomic And +template <class T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_and(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicAnd(dest, val); +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_and(T* dest, T val, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicAnd(dest, val); + __threadfence(); + return return_val; +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_and(T* dest, T val, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_and(dest, val, MemoryOrder(), MemoryScopeDevice()); +} + +// Atomic XOR +template <class T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_xor(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicXor(dest, val); +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_xor(T* dest, T val, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicXor(dest, val); + __threadfence(); + return return_val; +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_xor(T* dest, T val, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_xor(dest, val, MemoryOrder(), MemoryScopeDevice()); +} + +// Atomic OR +template <class T> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_or(T* dest, T val, MemoryOrderRelaxed, MemoryScopeDevice) { + return atomicOr(dest, val); +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_or(T* dest, T val, MemoryOrder, MemoryScopeDevice) { + __threadfence(); + T return_val = atomicOr(dest, val); + __threadfence(); + return return_val; +} + +template <class T, class MemoryOrder> +__device__ inline + std::enable_if_t<Impl::is_cuda_atomic_integer_type<T>::value, T> + atomic_fetch_or(T* dest, T val, MemoryOrder, MemoryScopeCore) { + return atomic_fetch_or(dest, val, MemoryOrder(), MemoryScopeDevice()); +} +} // namespace desul +#endif + +#if !defined(__NVCC__) +// Functions defined as device functions in CUDA which don't exist in the GCC overload +// set +namespace desul { + +#if defined(DESUL_HAVE_CUDA_ATOMICS_ASM) +#define DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(TYPE, ORDER, SCOPE) \ + inline void atomic_add(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ + (void)atomic_fetch_add(dest, val, order, scope); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(int32_t, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(long, + MemoryOrderRelaxed, + MemoryScopeDevice); // only for ASM? +DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(unsigned int, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(unsigned long long, + MemoryOrderRelaxed, + MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(float, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_ADD(double, MemoryOrderRelaxed, MemoryScopeDevice); + +#define DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(TYPE, ORDER, SCOPE) \ + inline void atomic_sub(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ + (void)atomic_fetch_sub(dest, val, order, scope); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(int32_t, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(long, + MemoryOrderRelaxed, + MemoryScopeDevice); // only for ASM? +DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(unsigned int, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(float, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_SUB(double, MemoryOrderRelaxed, MemoryScopeDevice); + +#define DESUL_IMPL_CUDA_HOST_ATOMIC_INC(TYPE, ORDER, SCOPE) \ + inline void atomic_inc(TYPE* const dest, ORDER order, SCOPE scope) { \ + (void)atomic_fetch_inc(dest, order, scope); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_INC(unsigned int, + MemoryOrderRelaxed, + MemoryScopeDevice); // only for ASM? + +#define DESUL_IMPL_CUDA_HOST_ATOMIC_DEC(TYPE, ORDER, SCOPE) \ + inline void atomic_dec(TYPE* const dest, ORDER order, SCOPE scope) { \ + (void)atomic_fetch_dec(dest, order, scope); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_DEC(unsigned, + MemoryOrderRelaxed, + MemoryScopeDevice); // only for ASM? + +#endif // DESUL_HAVE_CUDA_ATOMICS_ASM + +#define DESUL_IMPL_CUDA_HOST_ATOMIC_INC_MOD(TYPE, ORDER, SCOPE) \ + inline TYPE atomic_fetch_inc_mod(TYPE* dest, TYPE val, ORDER order, SCOPE scope) { \ + using cas_t = typename Impl::atomic_compare_exchange_type<sizeof(TYPE)>::type; \ + cas_t oldval = reinterpret_cast<cas_t&>(*dest); \ + cas_t assume = oldval; \ + do { \ + assume = oldval; \ + TYPE newval = (reinterpret_cast<TYPE&>(assume) >= val) \ + ? static_cast<TYPE>(0) \ + : reinterpret_cast<TYPE&>(assume) + static_cast<TYPE>(1); \ + oldval = desul::atomic_compare_exchange(reinterpret_cast<cas_t*>(dest), \ + assume, \ + reinterpret_cast<cas_t&>(newval), \ + order, \ + scope); \ + } while (assume != oldval); \ + return reinterpret_cast<TYPE&>(oldval); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_INC_MOD(unsigned int, + MemoryOrderRelaxed, + MemoryScopeDevice); +#define DESUL_IMPL_CUDA_HOST_ATOMIC_DEC_MOD(TYPE, ORDER, SCOPE) \ + inline TYPE atomic_fetch_dec_mod(TYPE* dest, TYPE val, ORDER order, SCOPE scope) { \ + using cas_t = typename Impl::atomic_compare_exchange_type<sizeof(TYPE)>::type; \ + cas_t oldval = reinterpret_cast<cas_t&>(*dest); \ + cas_t assume = oldval; \ + do { \ + assume = oldval; \ + TYPE newval = ((reinterpret_cast<TYPE&>(assume) == static_cast<TYPE>(0)) | \ + (reinterpret_cast<TYPE&>(assume) > val)) \ + ? val \ + : reinterpret_cast<TYPE&>(assume) - static_cast<TYPE>(1); \ + oldval = desul::atomic_compare_exchange(reinterpret_cast<cas_t*>(dest), \ + assume, \ + reinterpret_cast<cas_t&>(newval), \ + order, \ + scope); \ + } while (assume != oldval); \ + return reinterpret_cast<TYPE&>(oldval); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_DEC_MOD(unsigned int, + MemoryOrderRelaxed, + MemoryScopeDevice); + +#define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_ADD(TYPE, ORDER, SCOPE) \ + inline TYPE atomic_fetch_add(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ + return Impl::atomic_fetch_oper( \ + Impl::AddOper<TYPE, const TYPE>(), dest, val, order, scope); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_ADD(float, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_ADD(double, MemoryOrderRelaxed, MemoryScopeDevice); + +#define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_SUB(TYPE, ORDER, SCOPE) \ + inline TYPE atomic_fetch_sub(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ + return Impl::atomic_fetch_oper( \ + Impl::SubOper<TYPE, const TYPE>(), dest, val, order, scope); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_SUB(float, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_SUB(double, MemoryOrderRelaxed, MemoryScopeDevice); + +#define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(TYPE, ORDER, SCOPE) \ + inline TYPE atomic_fetch_max(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ + return Impl::atomic_fetch_oper( \ + Impl::MaxOper<TYPE, const TYPE>(), dest, val, order, scope); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(int, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(long, + MemoryOrderRelaxed, + MemoryScopeDevice); // only for ASM? +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(unsigned int, + MemoryOrderRelaxed, + MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(unsigned long, + MemoryOrderRelaxed, + MemoryScopeDevice); +// DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MAX(unsigned long +// long,MemoryOrderRelaxed,MemoryScopeDevice); + +#define DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(TYPE, ORDER, SCOPE) \ + inline TYPE atomic_fetch_min(TYPE* const dest, TYPE val, ORDER order, SCOPE scope) { \ + return Impl::atomic_fetch_oper( \ + Impl::MinOper<TYPE, const TYPE>(), dest, val, order, scope); \ + } +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(int, MemoryOrderRelaxed, MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(long, + MemoryOrderRelaxed, + MemoryScopeDevice); // only for ASM? +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(unsigned int, + MemoryOrderRelaxed, + MemoryScopeDevice); +DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(unsigned long, + MemoryOrderRelaxed, + MemoryScopeDevice); +// DESUL_IMPL_CUDA_HOST_ATOMIC_FETCH_MIN(unsigned long +// long,MemoryOrderRelaxed,MemoryScopeDevice); inline void atomic_fetch_max(int32_t* +// const dest, int32_t val, MemoryOrderRelaxed order, MemoryScopeDevice scope) { + +} // namespace desul + +// Functions defined int the GCC overload set but not in the device overload set +namespace desul { +__device__ inline unsigned long long atomic_fetch_add(unsigned long long* const dest, + unsigned long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::AddOper<unsigned long long, const unsigned long long>(), + dest, + val, + order, + scope); +} +__device__ inline long long atomic_fetch_add(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::AddOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_fetch_add(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::AddOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long long atomic_fetch_sub(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::SubOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_fetch_sub(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::SubOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long atomic_fetch_max(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::MaxOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long atomic_fetch_min(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::MinOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long atomic_fetch_or(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::OrOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long long atomic_fetch_or(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::OrOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_fetch_xor(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::XorOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long long atomic_fetch_xor(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::XorOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_fetch_and(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::AndOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long long atomic_fetch_and(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_fetch_oper( + Impl::AndOper<long long, const long long>(), dest, val, order, scope); +} + +__device__ inline unsigned long long atomic_add_fetch(unsigned long long* const dest, + unsigned long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::AddOper<unsigned long long, const unsigned long long>(), + dest, + val, + order, + scope); +} +__device__ inline long long atomic_add_fetch(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::AddOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_add_fetch(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::AddOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long long atomic_sub_fetch(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::SubOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_sub_fetch(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::SubOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long long atomic_or_fetch(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::OrOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_or_fetch(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::OrOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long long atomic_xor_fetch(long long* const dest, + long long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::XorOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_xor_fetch(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::XorOper<long, const long>(), dest, val, order, scope); +} +__device__ inline long long atomic_and_fetch(long long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::AndOper<long long, const long long>(), dest, val, order, scope); +} +__device__ inline long atomic_and_fetch(long* const dest, + long val, + MemoryOrderRelaxed order, + MemoryScopeDevice scope) { + return Impl::atomic_oper_fetch( + Impl::AndOper<long, const long>(), dest, val, order, scope); +} +} // namespace desul +#endif + +#endif // DESUL_HAVE_CUDA_ATOMICS +#endif diff --git a/packages/kokkos/core/src/desul/atomics/Common.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Common.hpp similarity index 91% rename from packages/kokkos/core/src/desul/atomics/Common.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Common.hpp index 1b8dc9f58e947042c5a20c2a3d5154d780bece78..aef098e4d8caf6b0d7e9a418cdeff0a9349acce0 100644 --- a/packages/kokkos/core/src/desul/atomics/Common.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Common.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -8,11 +8,12 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifndef DESUL_ATOMICS_COMMON_HPP_ #define DESUL_ATOMICS_COMMON_HPP_ -#include "desul/atomics/Macros.hpp" -#include <cstdint> #include <atomic> +#include <cstdint> #include <type_traits> +#include "desul/atomics/Macros.hpp" + namespace desul { struct alignas(16) Dummy16ByteValue { int64_t value1; @@ -137,20 +138,21 @@ using cmpexch_failure_memory_order = typename CmpExchFailureOrder<MemoryOrder>::memory_order; } // namespace Impl -} +} // namespace desul -// We should in principle use std::numeric_limits, but that requires constexpr function support on device -// Currently that is still considered experimetal on CUDA and sometimes not reliable. +// We should in principle use std::numeric_limits, but that requires constexpr function +// support on device Currently that is still considered experimetal on CUDA and +// sometimes not reliable. namespace desul { namespace Impl { -template<class T> +template <class T> struct numeric_limits_max; -template<> +template <> struct numeric_limits_max<uint32_t> { static constexpr uint32_t value = 0xffffffffu; }; -template<> +template <> struct numeric_limits_max<uint64_t> { static constexpr uint64_t value = 0xfffffffflu; }; @@ -172,30 +174,32 @@ DESUL_INLINE_FUNCTION bool atomic_is_lock_free() noexcept { ; } -template<std::size_t N> +template <std::size_t N> struct atomic_compare_exchange_type; -template<> +template <> struct atomic_compare_exchange_type<4> { using type = int32_t; }; -template<> +template <> struct atomic_compare_exchange_type<8> { using type = int64_t; }; -template<> +template <> struct atomic_compare_exchange_type<16> { using type = Dummy16ByteValue; }; -template<class T> -struct dont_deduce_this_parameter { using type = T; }; +template <class T> +struct dont_deduce_this_parameter { + using type = T; +}; -template<class T> +template <class T> using dont_deduce_this_parameter_t = typename dont_deduce_this_parameter<T>::type; -} -} +} // namespace Impl +} // namespace desul #endif diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange.hpp similarity index 99% rename from packages/kokkos/core/src/desul/atomics/Compare_Exchange.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange.hpp index 81c90db79e8b42159fab8e7e632d9315f2920475..d947dac27500fb1ebc63b89465de4b8f3bb8d77c 100644 --- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange.hpp @@ -9,9 +9,8 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_HPP_ #define DESUL_ATOMICS_COMPARE_EXCHANGE_HPP_ -#include "desul/atomics/Macros.hpp" - #include "desul/atomics/Compare_Exchange_ScopeCaller.hpp" +#include "desul/atomics/Macros.hpp" #ifdef DESUL_HAVE_GCC_ATOMICS #include "desul/atomics/Compare_Exchange_GCC.hpp" diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_CUDA.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_CUDA.hpp similarity index 69% rename from packages/kokkos/core/src/desul/atomics/Compare_Exchange_CUDA.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_CUDA.hpp index abe566c7ba91bd2ce7ffb2e8db96bce43090f414..310c59f55ef4ca1342a7e1a113535f185b38692a 100644 --- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_CUDA.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_CUDA.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -40,7 +40,7 @@ __device__ inline void atomic_thread_fence(MemoryOrderAcqRel, MemoryScopeCore) { __device__ inline void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeCore) { __threadfence_block(); } -#if (__CUDA_ARCH__>=600) || !defined(__NVCC__) +#if (__CUDA_ARCH__ >= 600) || !defined(__NVCC__) __device__ inline void atomic_thread_fence(MemoryOrderRelease, MemoryScopeNode) { __threadfence_system(); } @@ -55,19 +55,21 @@ __device__ inline void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeNode) { } #endif #endif -} +} // namespace desul -// Compare Exchange for PRE Volta, not supported with CLANG as CUDA compiler, since we do NOT have a way -// of having the code included for clang only when the CC is smaller than 700 -// But on Clang the device side symbol list must be independent of __CUDA_ARCH__ +// Compare Exchange for PRE Volta, not supported with CLANG as CUDA compiler, since we +// do NOT have a way of having the code included for clang only when the CC is smaller +// than 700 But on Clang the device side symbol list must be independent of +// __CUDA_ARCH__ // FIXME temporary fix for https://github.com/kokkos/kokkos/issues/4390 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700) || \ -(!defined(__NVCC__) && defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA) && 0) + (!defined(__NVCC__) && defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA) && 0) namespace desul { template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned int) == 4, "this function assumes an unsigned int is 32-bit"); + static_assert(sizeof(unsigned int) == 4, + "this function assumes an unsigned int is 32-bit"); unsigned int return_val = atomicCAS(reinterpret_cast<unsigned int*>(dest), reinterpret_cast<unsigned int&>(compare), reinterpret_cast<unsigned int&>(value)); @@ -76,7 +78,8 @@ __device__ typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_excha template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, "this function assumes an unsigned long long is 64-bit"); + static_assert(sizeof(unsigned long long int) == 8, + "this function assumes an unsigned long long is 64-bit"); unsigned long long int return_val = atomicCAS(reinterpret_cast<unsigned long long int*>(dest), reinterpret_cast<unsigned long long int&>(compare), @@ -85,34 +88,41 @@ __device__ typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_excha } template <typename T, class MemoryScope> -__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type atomic_compare_exchange( +__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type +atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrderRelease, MemoryScope) { - T return_val = atomic_compare_exchange(dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); + T return_val = atomic_compare_exchange( + dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return return_val; } template <typename T, class MemoryScope> -__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type atomic_compare_exchange( +__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type +atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrderAcquire, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); - T return_val = atomic_compare_exchange(dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); + T return_val = atomic_compare_exchange( + dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); return return_val; } template <typename T, class MemoryScope> -__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type atomic_compare_exchange( +__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type +atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrderAcqRel, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); - T return_val = atomic_compare_exchange(dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); + T return_val = atomic_compare_exchange( + dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return return_val; } template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange( T* const dest, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned int) == 4, "this function assumes an unsigned int is 32-bit"); + static_assert(sizeof(unsigned int) == 4, + "this function assumes an unsigned int is 32-bit"); unsigned int return_val = atomicExch(reinterpret_cast<unsigned int*>(dest), reinterpret_cast<unsigned int&>(value)); return reinterpret_cast<T&>(return_val); @@ -120,7 +130,8 @@ __device__ typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange( template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange( T* const dest, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, "this function assumes an unsigned long long is 64-bit"); + static_assert(sizeof(unsigned long long int) == 8, + "this function assumes an unsigned long long is 64-bit"); unsigned long long int return_val = atomicExch(reinterpret_cast<unsigned long long int*>(dest), reinterpret_cast<unsigned long long int&>(value)); @@ -128,27 +139,27 @@ __device__ typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange( } template <typename T, class MemoryScope> -__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type atomic_exchange( - T* const dest, T value, MemoryOrderRelease, MemoryScope) { +__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type +atomic_exchange(T* const dest, T value, MemoryOrderRelease, MemoryScope) { T return_val = atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return reinterpret_cast<T&>(return_val); } template <typename T, class MemoryScope> -__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type atomic_exchange( - T* const dest, T value, MemoryOrderAcquire, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); +__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type +atomic_exchange(T* const dest, T value, MemoryOrderAcquire, MemoryScope) { + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); T return_val = atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); return reinterpret_cast<T&>(return_val); } template <typename T, class MemoryScope> -__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type atomic_exchange( - T* const dest, T value, MemoryOrderAcqRel, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); +__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type +atomic_exchange(T* const dest, T value, MemoryOrderAcqRel, MemoryScope) { + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); T return_val = atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return reinterpret_cast<T&>(return_val); } } // namespace desul @@ -162,8 +173,8 @@ __device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type at // We simply can say DESUL proper doesn't support clang CUDA build pre Volta, // Kokkos has that knowledge and so I use it here, allowing in Kokkos to use // clang with pre Volta as CUDA compiler -#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__>=700)) || \ - (!defined(__NVCC__) && !defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA)) +#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700)) || \ + (!defined(__NVCC__) && !defined(DESUL_CUDA_ARCH_IS_PRE_VOLTA)) #include <desul/atomics/cuda/CUDA_asm_exchange.hpp> #endif @@ -174,42 +185,45 @@ namespace desul { template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange( T* const dest, T value, MemoryOrderSeqCst, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); - T return_val = atomic_exchange(dest,value,MemoryOrderRelaxed(),MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); + T return_val = atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return return_val; } template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange( T* const dest, T value, MemoryOrderSeqCst, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); - T return_val = atomic_exchange(dest,value,MemoryOrderRelaxed(),MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); + T return_val = atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return return_val; } template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrderSeqCst, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); - T return_val = atomic_compare_exchange(dest,compare,value,MemoryOrderRelaxed(),MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); + T return_val = atomic_compare_exchange( + dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return return_val; } template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrderSeqCst, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); - T return_val = atomic_compare_exchange(dest,compare,value,MemoryOrderRelaxed(),MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); + T return_val = atomic_compare_exchange( + dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return return_val; } -} +} // namespace desul #endif #if defined(__CUDA_ARCH__) || !defined(__NVCC__) namespace desul { template <typename T, class MemoryOrder, class MemoryScope> -__device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type atomic_compare_exchange( +__device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type +atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrder, MemoryScope scope) { // This is a way to avoid dead lock in a warp or wave front T return_val; @@ -220,12 +234,13 @@ __device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::typ while (active != done_active) { if (!done) { if (Impl::lock_address_cuda((void*)dest, scope)) { - if(std::is_same<MemoryOrder,MemoryOrderSeqCst>::value) atomic_thread_fence(MemoryOrderRelease(),scope); - atomic_thread_fence(MemoryOrderAcquire(),scope); + if (std::is_same<MemoryOrder, MemoryOrderSeqCst>::value) + atomic_thread_fence(MemoryOrderRelease(), scope); + atomic_thread_fence(MemoryOrderAcquire(), scope); return_val = *dest; - if(return_val == compare) { + if (return_val == compare) { *dest = value; - atomic_thread_fence(MemoryOrderRelease(),scope); + atomic_thread_fence(MemoryOrderRelease(), scope); } Impl::unlock_address_cuda((void*)dest, scope); done = 1; @@ -236,8 +251,8 @@ __device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::typ return return_val; } template <typename T, class MemoryOrder, class MemoryScope> -__device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type atomic_exchange( - T* const dest, T value, MemoryOrder, MemoryScope scope) { +__device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type +atomic_exchange(T* const dest, T value, MemoryOrder, MemoryScope scope) { // This is a way to avoid dead lock in a warp or wave front T return_val; int done = 0; @@ -247,11 +262,12 @@ __device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::typ while (active != done_active) { if (!done) { if (Impl::lock_address_cuda((void*)dest, scope)) { - if(std::is_same<MemoryOrder,MemoryOrderSeqCst>::value) atomic_thread_fence(MemoryOrderRelease(),scope); - atomic_thread_fence(MemoryOrderAcquire(),scope); + if (std::is_same<MemoryOrder, MemoryOrderSeqCst>::value) + atomic_thread_fence(MemoryOrderRelease(), scope); + atomic_thread_fence(MemoryOrderAcquire(), scope); return_val = *dest; *dest = value; - atomic_thread_fence(MemoryOrderRelease(),scope); + atomic_thread_fence(MemoryOrderRelease(), scope); Impl::unlock_address_cuda((void*)dest, scope); done = 1; } @@ -260,9 +276,8 @@ __device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::typ } return return_val; } -} +} // namespace desul #endif - #endif #endif diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_GCC.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_GCC.hpp similarity index 65% rename from packages/kokkos/core/src/desul/atomics/Compare_Exchange_GCC.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_GCC.hpp index 418bea0b8b72f42883cc582bd58a5a170f738fea..fad3c43b046feb67ee05a3a86fe9986e349d5686 100644 --- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_GCC.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_GCC.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -18,39 +18,39 @@ SPDX-License-Identifier: (BSD-3-Clause) namespace desul { namespace Impl { -template<class T> +template <class T> struct atomic_exchange_available_gcc { constexpr static bool value = #ifndef DESUL_HAVE_LIBATOMIC - ((sizeof(T)==4 && alignof(T)==4) || + ((sizeof(T) == 4 && alignof(T) == 4) || #ifdef DESUL_HAVE_16BYTE_COMPARE_AND_SWAP - (sizeof(T)==16 && alignof(T)==16) || + (sizeof(T) == 16 && alignof(T) == 16) || #endif - (sizeof(T)==8 && alignof(T)==8)) && + (sizeof(T) == 8 && alignof(T) == 8)) && #endif - std::is_trivially_copyable<T>::value; + std::is_trivially_copyable<T>::value; }; -} //namespace Impl +} // namespace Impl -#if defined(__clang__) && (__clang_major__>=7) && !defined(__APPLE__) +#if defined(__clang__) && (__clang_major__ >= 7) && !defined(__APPLE__) +// clang-format off // Disable warning for large atomics on clang 7 and up (checked with godbolt) // error: large atomic operation may incur significant performance penalty [-Werror,-Watomic-alignment] // https://godbolt.org/z/G7YhqhbG6 +// clang-format on #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Watomic-alignment" #endif -template<class MemoryOrder, class MemoryScope> +template <class MemoryOrder, class MemoryScope> void atomic_thread_fence(MemoryOrder, MemoryScope) { __atomic_thread_fence(GCCMemoryOrder<MemoryOrder>::value); } template <typename T, class MemoryOrder, class MemoryScope> -std::enable_if_t<Impl::atomic_exchange_available_gcc<T>::value, T> -atomic_exchange( +std::enable_if_t<Impl::atomic_exchange_available_gcc<T>::value, T> atomic_exchange( T* dest, T value, MemoryOrder, MemoryScope) { T return_val; - __atomic_exchange( - dest, &value, &return_val, GCCMemoryOrder<MemoryOrder>::value); + __atomic_exchange(dest, &value, &return_val, GCCMemoryOrder<MemoryOrder>::value); return return_val; } @@ -58,17 +58,19 @@ atomic_exchange( // Those two get handled separatly. template <typename T, class MemoryOrder, class MemoryScope> std::enable_if_t<Impl::atomic_exchange_available_gcc<T>::value, T> -atomic_compare_exchange( - T* dest, T compare, T value, MemoryOrder, MemoryScope) { - (void)__atomic_compare_exchange( - dest, &compare, &value, false, GCCMemoryOrder<MemoryOrder>::value, GCCMemoryOrder<MemoryOrder>::value); +atomic_compare_exchange(T* dest, T compare, T value, MemoryOrder, MemoryScope) { + (void)__atomic_compare_exchange(dest, + &compare, + &value, + false, + GCCMemoryOrder<MemoryOrder>::value, + GCCMemoryOrder<MemoryOrder>::value); return compare; } template <typename T, class MemoryScope> std::enable_if_t<Impl::atomic_exchange_available_gcc<T>::value, T> -atomic_compare_exchange( - T* dest, T compare, T value, MemoryOrderRelease, MemoryScope) { +atomic_compare_exchange(T* dest, T compare, T value, MemoryOrderRelease, MemoryScope) { (void)__atomic_compare_exchange( dest, &compare, &value, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); return compare; @@ -76,14 +78,13 @@ atomic_compare_exchange( template <typename T, class MemoryScope> std::enable_if_t<Impl::atomic_exchange_available_gcc<T>::value, T> -atomic_compare_exchange( - T* dest, T compare, T value, MemoryOrderAcqRel, MemoryScope) { +atomic_compare_exchange(T* dest, T compare, T value, MemoryOrderAcqRel, MemoryScope) { (void)__atomic_compare_exchange( dest, &compare, &value, false, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); return compare; } -#if defined(__clang__) && (__clang_major__>=7) && !defined(__APPLE__) +#if defined(__clang__) && (__clang_major__ >= 7) && !defined(__APPLE__) #pragma GCC diagnostic pop #endif } // namespace desul diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_HIP.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp similarity index 88% rename from packages/kokkos/core/src/desul/atomics/Compare_Exchange_HIP.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp index d6bf04a7e6d25449934f4813c936bf37ce9bb07b..96739bc1fc705d04d7df05a5ddd91b72069de508 100644 --- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_HIP.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp @@ -13,51 +13,39 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifdef DESUL_HAVE_HIP_ATOMICS namespace desul { -#if defined(__HIP_DEVICE_COMPILE__) inline __device__ void atomic_thread_fence(MemoryOrderRelease, MemoryScopeDevice) { __threadfence(); } - inline __device__ void atomic_thread_fence(MemoryOrderAcquire, MemoryScopeDevice) { __threadfence(); } - inline __device__ void atomic_thread_fence(MemoryOrderAcqRel, MemoryScopeDevice) { __threadfence(); } - inline __device__ void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeDevice) { __threadfence(); } - inline __device__ void atomic_thread_fence(MemoryOrderRelease, MemoryScopeCore) { __threadfence_block(); } - inline __device__ void atomic_thread_fence(MemoryOrderAcquire, MemoryScopeCore) { __threadfence_block(); } - inline __device__ void atomic_thread_fence(MemoryOrderAcqRel, MemoryScopeCore) { __threadfence_block(); } - inline __device__ void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeCore) { __threadfence_block(); } - inline __device__ void atomic_thread_fence(MemoryOrderRelease, MemoryScopeNode) { __threadfence_system(); } - inline __device__ void atomic_thread_fence(MemoryOrderAcquire, MemoryScopeNode) { __threadfence_system(); } - inline __device__ void atomic_thread_fence(MemoryOrderAcqRel, MemoryScopeNode) { __threadfence_system(); } - inline __device__ void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeNode) { __threadfence_system(); } @@ -165,24 +153,15 @@ atomic_exchange(T* const dest, T value, MemoryOrderAcqRel, MemoryScope) { template <typename T, class MemoryScope> __device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type atomic_exchange(T* const dest, T value, MemoryOrderSeqCst, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return reinterpret_cast<T&>(return_val); -} - -template <typename T, class MemoryScope> -__device__ typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderSeqCst, MemoryScope) { atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); + T return_val = atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return return_val; + return reinterpret_cast<T&>(return_val); } template <typename T, class MemoryScope> -__device__ typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange( +__device__ typename std::enable_if<sizeof(T) == 4 || sizeof(T) == 8, T>::type +atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrderSeqCst, MemoryScope) { atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); T return_val = atomic_compare_exchange( @@ -192,10 +171,9 @@ __device__ typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_excha } template <typename T, class MemoryOrder, class MemoryScope> -DESUL_INLINE_FUNCTION __device__ - typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type - atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrder, MemoryScope scope) { +__device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type +atomic_compare_exchange( + T* const dest, T compare, T value, MemoryOrder, MemoryScope scope) { // This is a way to avoid dead lock in a warp or wave front T return_val; int done = 0; @@ -222,9 +200,8 @@ DESUL_INLINE_FUNCTION __device__ } template <typename T, class MemoryOrder, class MemoryScope> -DESUL_INLINE_FUNCTION __device__ - typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type - atomic_exchange(T* const dest, T value, MemoryOrder, MemoryScope scope) { +__device__ typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type +atomic_exchange(T* const dest, T value, MemoryOrder, MemoryScope scope) { // This is a way to avoid dead lock in a warp or wave front T return_val; int done = 0; @@ -247,7 +224,6 @@ DESUL_INLINE_FUNCTION __device__ } return return_val; } -#endif } // namespace desul #endif #endif diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_MSVC.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_MSVC.hpp similarity index 100% rename from packages/kokkos/core/src/desul/atomics/Compare_Exchange_MSVC.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_MSVC.hpp diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_OpenMP.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_OpenMP.hpp similarity index 71% rename from packages/kokkos/core/src/desul/atomics/Compare_Exchange_OpenMP.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_OpenMP.hpp index ded401f52d265db6f712e81bad67095b901079f8..dfea81a4d6cb507f0d13d8972ab757b9853ddeea 100644 --- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_OpenMP.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_OpenMP.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -7,101 +7,104 @@ SPDX-License-Identifier: (BSD-3-Clause) */ #ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_OPENMP_HPP_ #define DESUL_ATOMICS_COMPARE_EXCHANGE_OPENMP_HPP_ -#include "desul/atomics/Common.hpp" -#include <cstdio> #include <omp.h> +#include "desul/atomics/Common.hpp" + #ifdef DESUL_HAVE_OPENMP_ATOMICS namespace desul { #if _OPENMP > 201800 // atomic_thread_fence for Core Scope inline void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeCore) { - // There is no seq_cst flush in OpenMP, isn't it the same anyway for fence? - #pragma omp flush acq_rel +// There is no seq_cst flush in OpenMP, isn't it the same anyway for fence? +#pragma omp flush acq_rel } inline void atomic_thread_fence(MemoryOrderAcqRel, MemoryScopeCore) { - #pragma omp flush acq_rel +#pragma omp flush acq_rel } inline void atomic_thread_fence(MemoryOrderRelease, MemoryScopeCore) { - #pragma omp flush release +#pragma omp flush release } inline void atomic_thread_fence(MemoryOrderAcquire, MemoryScopeCore) { - #pragma omp flush acquire +#pragma omp flush acquire } // atomic_thread_fence for Device Scope inline void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeDevice) { - // There is no seq_cst flush in OpenMP, isn't it the same anyway for fence? - #pragma omp flush acq_rel +// There is no seq_cst flush in OpenMP, isn't it the same anyway for fence? +#pragma omp flush acq_rel } inline void atomic_thread_fence(MemoryOrderAcqRel, MemoryScopeDevice) { - #pragma omp flush acq_rel +#pragma omp flush acq_rel } inline void atomic_thread_fence(MemoryOrderRelease, MemoryScopeDevice) { - #pragma omp flush release +#pragma omp flush release } inline void atomic_thread_fence(MemoryOrderAcquire, MemoryScopeDevice) { - #pragma omp flush acquire +#pragma omp flush acquire } #else // atomic_thread_fence for Core Scope inline void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeCore) { - #pragma omp flush +#pragma omp flush } inline void atomic_thread_fence(MemoryOrderAcqRel, MemoryScopeCore) { - #pragma omp flush +#pragma omp flush } inline void atomic_thread_fence(MemoryOrderRelease, MemoryScopeCore) { - #pragma omp flush +#pragma omp flush } inline void atomic_thread_fence(MemoryOrderAcquire, MemoryScopeCore) { - #pragma omp flush +#pragma omp flush } // atomic_thread_fence for Device Scope inline void atomic_thread_fence(MemoryOrderSeqCst, MemoryScopeDevice) { - #pragma omp flush +#pragma omp flush } inline void atomic_thread_fence(MemoryOrderAcqRel, MemoryScopeDevice) { - #pragma omp flush +#pragma omp flush } inline void atomic_thread_fence(MemoryOrderRelease, MemoryScopeDevice) { - #pragma omp flush +#pragma omp flush } inline void atomic_thread_fence(MemoryOrderAcquire, MemoryScopeDevice) { - #pragma omp flush +#pragma omp flush } #endif template <typename T, class MemoryOrder, class MemoryScope> -T atomic_exchange( - T* dest, T value, MemoryOrder, MemoryScope) { +T atomic_exchange(T* dest, T value, MemoryOrder, MemoryScope) { T return_val; - if(!std::is_same<MemoryOrder,MemoryOrderRelaxed>::value) - atomic_thread_fence(MemoryOrderAcquire(),MemoryScope()); + if (!std::is_same<MemoryOrder, MemoryOrderRelaxed>::value) + atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); T& x = *dest; - #pragma omp atomic capture - { return_val = x; x = value; } - if(!std::is_same<MemoryOrder,MemoryOrderRelaxed>::value) - atomic_thread_fence(MemoryOrderRelease(),MemoryScope()); +#pragma omp atomic capture + { + return_val = x; + x = value; + } + if (!std::is_same<MemoryOrder, MemoryOrderRelaxed>::value) + atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); return return_val; } -// OpenMP doesn't have compare exchange, so we use build-ins and rely on testing that this works -// Note that means we test this in OpenMPTarget offload regions! +// OpenMP doesn't have compare exchange, so we use build-ins and rely on testing that +// this works Note that means we test this in OpenMPTarget offload regions! template <typename T, class MemoryOrder, class MemoryScope> -std::enable_if_t<Impl::atomic_always_lock_free(sizeof(T)),T> atomic_compare_exchange( +std::enable_if_t<Impl::atomic_always_lock_free(sizeof(T)), T> atomic_compare_exchange( T* dest, T compare, T value, MemoryOrder, MemoryScope) { using cas_t = typename Impl::atomic_compare_exchange_type<sizeof(T)>::type; - cas_t retval = __sync_val_compare_and_swap( - reinterpret_cast<volatile cas_t*>(dest), - reinterpret_cast<cas_t&>(compare), - reinterpret_cast<cas_t&>(value)); + cas_t retval = __sync_val_compare_and_swap(reinterpret_cast<volatile cas_t*>(dest), + reinterpret_cast<cas_t&>(compare), + reinterpret_cast<cas_t&>(value)); return reinterpret_cast<T&>(retval); } -#if defined(__clang__) && (__clang_major__>=7) +#if defined(__clang__) && (__clang_major__ >= 7) // Disable warning for large atomics on clang 7 and up (checked with godbolt) +// clang-format off // error: large atomic operation may incur significant performance penalty [-Werror,-Watomic-alignment] +// clang-format on #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Watomic-alignment" #endif @@ -130,7 +133,7 @@ atomic_compare_exchange(T* /*dest*/, T /*compare*/, T value, MemoryOrder, Memory } #pragma omp end declare variant -#if defined(__clang__) && (__clang_major__>=7) +#if defined(__clang__) && (__clang_major__ >= 7) #pragma GCC diagnostic pop #endif diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_SYCL.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_SYCL.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6c8c68511001962e4d3a56ab425ee982560971a5 --- /dev/null +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_SYCL.hpp @@ -0,0 +1,99 @@ +/* +Copyright (c) 2019, Lawrence Livermore National Security, LLC +and DESUL project contributors. See the COPYRIGHT file for details. +Source: https://github.com/desul/desul + +SPDX-License-Identifier: (BSD-3-Clause) +*/ + +#ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_SYCL_HPP_ +#define DESUL_ATOMICS_COMPARE_EXCHANGE_SYCL_HPP_ + +// clang-format off +#include "desul/atomics/SYCLConversions.hpp" +#include "desul/atomics/Common.hpp" + +#include <CL/sycl.hpp> +// clang-format on + +#ifdef DESUL_HAVE_SYCL_ATOMICS + +namespace desul { + +template <class MemoryOrder, class MemoryScope> +inline void atomic_thread_fence(MemoryOrder, MemoryScope) { + sycl::atomic_fence( + Impl::DesulToSYCLMemoryOrder<MemoryOrder, /*extended namespace*/ false>::value, + Impl::DesulToSYCLMemoryScope<MemoryScope, /*extended namespace*/ false>::value); +} + +template <typename T, class MemoryOrder, class MemoryScope> +typename std::enable_if<sizeof(T) == 4, T>::type atomic_compare_exchange( + T* const dest, T compare, T value, MemoryOrder, MemoryScope) { + static_assert(sizeof(unsigned int) == 4, + "this function assumes an unsigned int is 32-bit"); + Impl::sycl_atomic_ref<unsigned int, MemoryOrder, MemoryScope> dest_ref( + *reinterpret_cast<unsigned int*>(dest)); + dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned int*>(&compare), + *reinterpret_cast<unsigned int*>(&value)); + return compare; +} +template <typename T, class MemoryOrder, class MemoryScope> +typename std::enable_if<sizeof(T) == 8, T>::type atomic_compare_exchange( + T* const dest, T compare, T value, MemoryOrder, MemoryScope) { + static_assert(sizeof(unsigned long long int) == 8, + "this function assumes an unsigned long long is 64-bit"); + Impl::sycl_atomic_ref<unsigned long long int, MemoryOrder, MemoryScope> dest_ref( + *reinterpret_cast<unsigned long long int*>(dest)); + dest_ref.compare_exchange_strong(*reinterpret_cast<unsigned long long int*>(&compare), + *reinterpret_cast<unsigned long long int*>(&value)); + return compare; +} + +template <typename T, class MemoryOrder, class MemoryScope> +typename std::enable_if<sizeof(T) == 4, T>::type atomic_exchange(T* const dest, + T value, + MemoryOrder, + MemoryScope) { + static_assert(sizeof(unsigned int) == 4, + "this function assumes an unsigned int is 32-bit"); + Impl::sycl_atomic_ref<unsigned int, MemoryOrder, MemoryScope> dest_ref( + *reinterpret_cast<unsigned int*>(dest)); + unsigned int return_val = dest_ref.exchange(*reinterpret_cast<unsigned int*>(&value)); + return reinterpret_cast<T&>(return_val); +} +template <typename T, class MemoryOrder, class MemoryScope> +typename std::enable_if<sizeof(T) == 8, T>::type atomic_exchange(T* const dest, + T value, + MemoryOrder, + MemoryScope) { + static_assert(sizeof(unsigned long long int) == 8, + "this function assumes an unsigned long long is 64-bit"); + Impl::sycl_atomic_ref<unsigned long long int, MemoryOrder, MemoryScope> dest_ref( + *reinterpret_cast<unsigned long long int*>(dest)); + unsigned long long int return_val = + dest_ref.exchange(reinterpret_cast<unsigned long long int&>(value)); + return reinterpret_cast<T&>(return_val); +} + +template <typename T, class MemoryOrder, class MemoryScope> +typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type +atomic_compare_exchange( + T* const /*dest*/, T compare, T /*value*/, MemoryOrder, MemoryScope) { + // FIXME_SYCL not implemented + assert(false); + return compare; +} + +template <typename T, class MemoryOrder, class MemoryScope> +typename std::enable_if<(sizeof(T) != 8) && (sizeof(T) != 4), T>::type atomic_exchange( + T* const /*dest*/, T value, MemoryOrder, MemoryScope) { + // FIXME_SYCL not implemented + assert(false); + return value; +} + +} // namespace desul + +#endif +#endif diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_ScopeCaller.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_ScopeCaller.hpp similarity index 100% rename from packages/kokkos/core/src/desul/atomics/Compare_Exchange_ScopeCaller.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_ScopeCaller.hpp diff --git a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_Serial.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_Serial.hpp similarity index 83% rename from packages/kokkos/core/src/desul/atomics/Compare_Exchange_Serial.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_Serial.hpp index be7b46d5fa0540f20abfb8903f20a3e2f7d80e5a..9d0db5c9e1318e8350e621dbf17f5e6c2c2de83d 100644 --- a/packages/kokkos/core/src/desul/atomics/Compare_Exchange_Serial.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_Serial.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -10,13 +10,11 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifdef DESUL_HAVE_SERIAL_ATOMICS namespace desul { -template<class MemoryScope> -void atomic_thread_fence(MemoryOrderAcquire, MemoryScope) { -} +template <class MemoryScope> +void atomic_thread_fence(MemoryOrderAcquire, MemoryScope) {} -template<class MemoryScope> -void atomic_thread_fence(MemoryOrderRelease, MemoryScope) { -} +template <class MemoryScope> +void atomic_thread_fence(MemoryOrderRelease, MemoryScope) {} template <typename T, class MemoryScope> T atomic_compare_exchange( diff --git a/packages/kokkos/core/src/desul/atomics/GCC.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/GCC.hpp similarity index 90% rename from packages/kokkos/core/src/desul/atomics/GCC.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/GCC.hpp index cd0c2bea1180662969e0af8abee5d23a1b7334ca..239c84fd30dc899c85109d82a3de7d9e66b3b73d 100644 --- a/packages/kokkos/core/src/desul/atomics/GCC.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/GCC.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -10,7 +10,7 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifdef DESUL_HAVE_GCC_ATOMICS -#include<type_traits> +#include <type_traits> /* Built - in Function : type __atomic_add_fetch(type * ptr, type val, int memorder) Built - in Function : type __atomic_sub_fetch(type * ptr, type val, int memorder) @@ -91,18 +91,20 @@ DESUL_GCC_INTEGRAL_OP_ATOMICS(MemoryOrderSeqCst, MemoryScopeDevice) DESUL_GCC_INTEGRAL_OP_ATOMICS(MemoryOrderSeqCst, MemoryScopeCore) template <typename T, class MemoryOrder, class MemoryScope> -std::enable_if_t<!Impl::atomic_exchange_available_gcc<T>::value, T> -atomic_exchange(T* const dest, - Impl::dont_deduce_this_parameter_t<const T> val, - MemoryOrder /*order*/, - MemoryScope scope) { +std::enable_if_t<!Impl::atomic_exchange_available_gcc<T>::value, T> atomic_exchange( + T* const dest, + Impl::dont_deduce_this_parameter_t<const T> val, + MemoryOrder /*order*/, + MemoryScope scope) { // Acquire a lock for the address + // clang-format off while (!Impl::lock_address((void*)dest, scope)) {} + // clang-format on - atomic_thread_fence(MemoryOrderAcquire(),scope); + atomic_thread_fence(MemoryOrderAcquire(), scope); T return_val = *dest; *dest = val; - atomic_thread_fence(MemoryOrderRelease(),scope); + atomic_thread_fence(MemoryOrderRelease(), scope); Impl::unlock_address((void*)dest, scope); return return_val; } @@ -110,18 +112,20 @@ atomic_exchange(T* const dest, template <typename T, class MemoryOrder, class MemoryScope> std::enable_if_t<!Impl::atomic_exchange_available_gcc<T>::value, T> atomic_compare_exchange(T* const dest, - Impl::dont_deduce_this_parameter_t<const T> compare, - Impl::dont_deduce_this_parameter_t<const T> val, - MemoryOrder /*order*/, - MemoryScope scope) { + Impl::dont_deduce_this_parameter_t<const T> compare, + Impl::dont_deduce_this_parameter_t<const T> val, + MemoryOrder /*order*/, + MemoryScope scope) { // Acquire a lock for the address + // clang-format off while (!Impl::lock_address((void*)dest, scope)) {} + // clang-format on - atomic_thread_fence(MemoryOrderAcquire(),scope); + atomic_thread_fence(MemoryOrderAcquire(), scope); T return_val = *dest; - if(return_val == compare) { + if (return_val == compare) { *dest = val; - atomic_thread_fence(MemoryOrderRelease(),scope); + atomic_thread_fence(MemoryOrderRelease(), scope); } Impl::unlock_address((void*)dest, scope); return return_val; diff --git a/packages/kokkos/core/src/desul/atomics/Generic.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Generic.hpp similarity index 100% rename from packages/kokkos/core/src/desul/atomics/Generic.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Generic.hpp diff --git a/packages/kokkos/core/src/desul/atomics/HIP.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/HIP.hpp similarity index 99% rename from packages/kokkos/core/src/desul/atomics/HIP.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/HIP.hpp index 5365ab91316e4bab9381b13a7135e61a3b2ca9b2..e51406e54dfd8661e9b8ec9cdfed5e3e293f00d7 100644 --- a/packages/kokkos/core/src/desul/atomics/HIP.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/HIP.hpp @@ -109,7 +109,6 @@ DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(dec_mod, unsigned int) #undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL #undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP - // 2/ host-side fallback implementation for atomic functions not provided by GCC #define DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_FUN(OP_LOWERCASE, OP_PASCAL_CASE, TYPE) \ @@ -177,7 +176,6 @@ DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_INCREMENT_DECREMENT(unsigned long long) #undef DESUL_IMPL_HIP_HOST_FALLBACK_ATOMIC_INCREMENT_DECREMENT - // 3/ device-side fallback implementation for atomic functions defined in GCC overload // set @@ -222,4 +220,3 @@ DESUL_IMPL_HIP_DEVICE_FALLBACK_ATOMIC_FUN(nand, Nand) #endif #endif - diff --git a/packages/kokkos/core/src/desul/atomics/Lock_Array.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array.hpp similarity index 81% rename from packages/kokkos/core/src/desul/atomics/Lock_Array.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array.hpp index 8fd0e8bbd7718a1097d898b01b20aa71ff515f2f..6b2d4e74bd3cf28c9d5c8389e2153758f097e3b1 100644 --- a/packages/kokkos/core/src/desul/atomics/Lock_Array.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array.hpp @@ -57,17 +57,14 @@ inline void finalize_lock_arrays() { } template <typename MemoryScope> inline bool lock_address(void* ptr, MemoryScope ms) { - return 0 == atomic_exchange(host_locks__::get_host_lock_(ptr), - int32_t(1), - MemoryOrderSeqCst(), - ms); + return 0 == + atomic_exchange( + host_locks__::get_host_lock_(ptr), int32_t(1), MemoryOrderSeqCst(), ms); } template <typename MemoryScope> void unlock_address(void* ptr, MemoryScope ms) { - (void)atomic_exchange(host_locks__::get_host_lock_(ptr), - int32_t(0), - MemoryOrderSeqCst(), - ms); + (void)atomic_exchange( + host_locks__::get_host_lock_(ptr), int32_t(0), MemoryOrderSeqCst(), ms); } } // namespace Impl } // namespace desul diff --git a/packages/kokkos/core/src/desul/atomics/Lock_Array_Cuda.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array_Cuda.hpp similarity index 75% rename from packages/kokkos/core/src/desul/atomics/Lock_Array_Cuda.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array_Cuda.hpp index de99185349043dc6e0f13c7e57c14dbc080deb9e..2166fa3cb78e70af887ff7f74e2cac9f141bf1de 100644 --- a/packages/kokkos/core/src/desul/atomics/Lock_Array_Cuda.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array_Cuda.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -9,8 +9,8 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifndef DESUL_ATOMICS_LOCK_ARRAY_CUDA_HPP_ #define DESUL_ATOMICS_LOCK_ARRAY_CUDA_HPP_ -#include "desul/atomics/Macros.hpp" #include "desul/atomics/Common.hpp" +#include "desul/atomics/Macros.hpp" #ifdef DESUL_HAVE_CUDA_ATOMICS @@ -23,7 +23,7 @@ namespace Impl { #define DESUL_IMPL_BALLOT_MASK(m, x) __ballot_sync(m, x) #define DESUL_IMPL_ACTIVEMASK __activemask() #else -#define DESUL_IMPL_BALLOT_MASK(m, x) m==0?0:1 +#define DESUL_IMPL_BALLOT_MASK(m, x) m == 0 ? 0 : 1 #define DESUL_IMPL_ACTIVEMASK 0 #endif @@ -32,14 +32,13 @@ namespace Impl { extern int32_t* CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h; extern int32_t* CUDA_SPACE_ATOMIC_LOCKS_NODE_h; - /// \brief After this call, the g_host_cuda_lock_arrays variable has /// valid, initialized arrays. /// /// This call is idempotent. /// The function is templated to make it a weak symbol to deal with Kokkos/RAJA /// snapshotted version while also linking against pure Desul -template<typename /*AlwaysInt*/ = int> +template <typename /*AlwaysInt*/ = int> void init_lock_arrays_cuda(); /// \brief After this call, the g_host_cuda_lock_arrays variable has @@ -47,8 +46,8 @@ void init_lock_arrays_cuda(); /// /// This call is idempotent. /// The function is templated to make it a weak symbol to deal with Kokkos/RAJA -/// snappshotted version while also linking against pure Desul -template<typename T = int> +/// snapshotted version while also linking against pure Desul +template <typename /*AlwaysInt*/ = int> void finalize_lock_arrays_cuda(); } // namespace Impl @@ -77,7 +76,7 @@ namespace Impl { /// instances in other translation units, we must update this CUDA global /// variable based on the Host global variable prior to running any kernels /// that will use it. -/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro. +/// That is the purpose of the ensure_cuda_lock_arrays_on_device function. __device__ #ifdef __CUDACC_RDC__ __constant__ extern @@ -139,34 +138,42 @@ namespace { static int lock_array_copied = 0; inline int eliminate_warning_for_lock_array() { return lock_array_copied; } } // namespace -} // namespace Impl -} // namespace desul -/* It is critical that this code be a macro, so that it will - capture the right address for desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE - putting this in an inline function will NOT do the right thing! */ -#define DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \ - { \ - if (::desul::Impl::lock_array_copied == 0) { \ - cudaMemcpyToSymbol(::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE, \ - &::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h, \ - sizeof(int32_t*)); \ - cudaMemcpyToSymbol(::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_NODE, \ - &::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_NODE_h, \ - sizeof(int32_t*)); \ - } \ - ::desul::Impl::lock_array_copied = 1; \ + +#ifdef __CUDACC_RDC__ +inline +#else +static +#endif + void + copy_cuda_lock_arrays_to_device() { + if (lock_array_copied == 0) { + cudaMemcpyToSymbol(CUDA_SPACE_ATOMIC_LOCKS_DEVICE, + &CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h, + sizeof(int32_t*)); + cudaMemcpyToSymbol(CUDA_SPACE_ATOMIC_LOCKS_NODE, + &CUDA_SPACE_ATOMIC_LOCKS_NODE_h, + sizeof(int32_t*)); } + lock_array_copied = 1; +} +} // namespace Impl +} // namespace desul #endif /* defined( __CUDACC__ ) */ -#endif /* defined( KOKKOS_ENABLE_CUDA ) */ +#endif /* defined( DESUL_HAVE_CUDA_ATOMICS ) */ + +namespace desul { #if defined(__CUDACC_RDC__) || (!defined(__CUDACC__)) -#define DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() +inline void ensure_cuda_lock_arrays_on_device() {} #else -#define DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \ - DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() +static inline void ensure_cuda_lock_arrays_on_device() { + Impl::copy_cuda_lock_arrays_to_device(); +} #endif -#endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP_ */ +} // namespace desul + +#endif /* #ifndef DESUL_ATOMICS_LOCK_ARRAY_CUDA_HPP_ */ diff --git a/packages/kokkos/core/src/desul/atomics/Lock_Array_HIP.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array_HIP.hpp similarity index 82% rename from packages/kokkos/core/src/desul/atomics/Lock_Array_HIP.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array_HIP.hpp index 9e6f5e59800b6778bf2c0592f0104526a730ac00..7c843f23c9547db6dc818a6d762ac3d7fa2c58ca 100644 --- a/packages/kokkos/core/src/desul/atomics/Lock_Array_HIP.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/Lock_Array_HIP.hpp @@ -6,8 +6,8 @@ Source: https://github.com/desul/desul SPDX-License-Identifier: (BSD-3-Clause) */ -#ifndef DESUL_ATORMICS_LOCK_ARRAY_HIP_HPP_ -#define DESUL_ATORMICS_LOCK_ARRAY_HIP_HPP_ +#ifndef DESUL_ATOMICS_LOCK_ARRAY_HIP_HPP_ +#define DESUL_ATOMICS_LOCK_ARRAY_HIP_HPP_ #include "desul/atomics/Common.hpp" #include "desul/atomics/Macros.hpp" @@ -23,6 +23,8 @@ namespace Impl { #ifdef __HIP_DEVICE_COMPILE__ #define DESUL_IMPL_BALLOT_MASK(x) __ballot(x) +#else +#define DESUL_IMPL_BALLOT_MASK(x) 0 #endif /** @@ -37,8 +39,8 @@ extern int32_t* HIP_SPACE_ATOMIC_LOCKS_NODE_h; /// /// This call is idempotent. /// The function is templated to make it a weak symbol to deal with Kokkos/RAJA -/// snappshotted version while also linking against pure Desul -template<typename T = int> +/// snapshotted version while also linking against pure Desul +template <typename /*AlwaysInt*/ = int> void init_lock_arrays_hip(); /// \brief After this call, the g_host_cuda_lock_arrays variable has @@ -46,8 +48,8 @@ void init_lock_arrays_hip(); /// /// This call is idempotent. /// The function is templated to make it a weak symbol to deal with Kokkos/RAJA -/// snappshotted version while also linking against pure Desul -template<typename T = int> +/// snapshotted version while also linking against pure Desul +template <typename /*AlwaysInt*/ = int> void finalize_lock_arrays_hip(); } // namespace Impl } // namespace desul @@ -145,17 +147,18 @@ inline int eliminate_warning_for_lock_array() { return lock_array_copied; } /* It is critical that this code be a macro, so that it will capture the right address for g_device_hip_lock_arrays! putting this in an inline function will NOT do the right thing! */ -#define DESUL_IMPL_COPY_HIP_LOCK_ARRAYS_TO_DEVICE() \ - { \ - if (::desul::Impl::lock_array_copied == 0) { \ - (void) hipMemcpyToSymbol(HIP_SYMBOL(::desul::Impl::HIP_SPACE_ATOMIC_LOCKS_DEVICE), \ - &::desul::Impl::HIP_SPACE_ATOMIC_LOCKS_DEVICE_h, \ - sizeof(int32_t*)); \ - (void) hipMemcpyToSymbol(HIP_SYMBOL(::desul::Impl::HIP_SPACE_ATOMIC_LOCKS_NODE), \ - &::desul::Impl::HIP_SPACE_ATOMIC_LOCKS_NODE_h, \ - sizeof(int32_t*)); \ - } \ - ::desul::Impl::lock_array_copied = 1; \ +#define DESUL_IMPL_COPY_HIP_LOCK_ARRAYS_TO_DEVICE() \ + { \ + if (::desul::Impl::lock_array_copied == 0) { \ + (void)hipMemcpyToSymbol( \ + HIP_SYMBOL(::desul::Impl::HIP_SPACE_ATOMIC_LOCKS_DEVICE), \ + &::desul::Impl::HIP_SPACE_ATOMIC_LOCKS_DEVICE_h, \ + sizeof(int32_t*)); \ + (void)hipMemcpyToSymbol(HIP_SYMBOL(::desul::Impl::HIP_SPACE_ATOMIC_LOCKS_NODE), \ + &::desul::Impl::HIP_SPACE_ATOMIC_LOCKS_NODE_h, \ + sizeof(int32_t*)); \ + } \ + ::desul::Impl::lock_array_copied = 1; \ } #endif diff --git a/packages/kokkos/core/src/desul/atomics/Macros.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/Macros.hpp similarity index 100% rename from packages/kokkos/core/src/desul/atomics/Macros.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/Macros.hpp diff --git a/packages/kokkos/core/src/desul/atomics/OpenMP.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/OpenMP.hpp similarity index 87% rename from packages/kokkos/core/src/desul/atomics/OpenMP.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/OpenMP.hpp index 3fa22c36aca37e9e91e5b08aac9b3e61b8256ebc..bc6fb26c2fc9ee1ec7d9f4deb6c4592a81c19fc9 100644 --- a/packages/kokkos/core/src/desul/atomics/OpenMP.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/OpenMP.hpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -10,6 +10,6 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifdef DESUL_HAVE_OPENMP_ATOMICS -#include<desul/atomics/openmp/OpenMP_40.hpp> +#include <desul/atomics/openmp/OpenMP_40.hpp> #endif #endif diff --git a/packages/kokkos/tpls/desul/include/desul/atomics/SYCL.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/SYCL.hpp new file mode 100644 index 0000000000000000000000000000000000000000..da34564f6967313e9c7c8ea1e215feec67b40197 --- /dev/null +++ b/packages/kokkos/tpls/desul/include/desul/atomics/SYCL.hpp @@ -0,0 +1,64 @@ +/* +Copyright (c) 2019, Lawrence Livermore National Security, LLC +and DESUL project contributors. See the COPYRIGHT file for details. +Source: https://github.com/desul/desul + +SPDX-License-Identifier: (BSD-3-Clause) +*/ +#ifndef DESUL_ATOMICS_SYCL_HPP_ +#define DESUL_ATOMICS_SYCL_HPP_ + +#ifdef DESUL_HAVE_SYCL_ATOMICS + +// clang-format off +#include "desul/atomics/SYCLConversions.hpp" +#include "desul/atomics/Common.hpp" +// clang-format on + +namespace desul { + +#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, TYPE) \ + template <class MemoryOrder> \ + TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeDevice) { \ + Impl::sycl_atomic_ref<TYPE, MemoryOrder, MemoryScopeDevice> dest_ref(*dest); \ + return dest_ref.fetch_##OPER(val); \ + } \ + template <class MemoryOrder> \ + TYPE atomic_fetch_##OPER(TYPE* dest, TYPE val, MemoryOrder, MemoryScopeCore) { \ + Impl::sycl_atomic_ref<TYPE, MemoryOrder, MemoryScopeCore> dest_ref(*dest); \ + return dest_ref.fetch_##OPER(val); \ + } + +#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(OPER) \ + DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, int) \ + DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned int) \ + DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, long) \ + DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned long) \ + DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, long long) \ + DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, unsigned long long) + +#define DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(OPER) \ + DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, float) \ + DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER(OPER, double) + +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(add) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(sub) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(and) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(or) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(xor) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(min) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL(max) + +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(add) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(sub) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(min) +DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT(max) + +#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_FLOATING_POINT +#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER_INTEGRAL +#undef DESUL_IMPL_SYCL_ATOMIC_FETCH_OPER + +} // namespace desul + +#endif // DESUL_HAVE_SYCL_ATOMICS +#endif // DESUL_ATOMICS_SYCL_HPP_ diff --git a/packages/kokkos/core/src/desul/atomics/SYCLConversions.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/SYCLConversions.hpp similarity index 79% rename from packages/kokkos/core/src/desul/atomics/SYCLConversions.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/SYCLConversions.hpp index 0ff3c7fee7ec436cd727bed96c3b80d0b742a67c..7debf91d6debc6354b16e2ccc7b5aafad0629983 100644 --- a/packages/kokkos/core/src/desul/atomics/SYCLConversions.hpp +++ b/packages/kokkos/tpls/desul/include/desul/atomics/SYCLConversions.hpp @@ -80,16 +80,20 @@ struct DesulToSYCLMemoryScope<MemoryScopeSystem, extended_namespace> { sycl_memory_scope<extended_namespace>::system; }; -template <class T, - class MemoryOrder, - class MemoryScope, - sycl::access::address_space AddressSpace> -using sycl_atomic_ref = - sycl::ext::oneapi::atomic_ref<T, - DesulToSYCLMemoryOrder<MemoryOrder>::value, - DesulToSYCLMemoryScope<MemoryScope>::value, - AddressSpace>; - +// FIXME_SYCL generic_space isn't available yet for CUDA. +#ifdef __NVPTX__ +template <class T, class MemoryOrder, class MemoryScope> +using sycl_atomic_ref = sycl::atomic_ref<T, + DesulToSYCLMemoryOrder<MemoryOrder>::value, + DesulToSYCLMemoryScope<MemoryScope>::value, + sycl::access::address_space::global_space>; +#else +template <class T, class MemoryOrder, class MemoryScope> +using sycl_atomic_ref = sycl::atomic_ref<T, + DesulToSYCLMemoryOrder<MemoryOrder>::value, + DesulToSYCLMemoryScope<MemoryScope>::value, + sycl::access::address_space::generic_space>; +#endif } // namespace Impl } // namespace desul diff --git a/packages/kokkos/core/src/desul/atomics/cuda/CUDA_asm.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/CUDA_asm.hpp similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/CUDA_asm.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/CUDA_asm.hpp diff --git a/packages/kokkos/core/src/desul/atomics/cuda/CUDA_asm_exchange.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/CUDA_asm_exchange.hpp similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/CUDA_asm_exchange.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/CUDA_asm_exchange.hpp diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm.inc b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm.inc similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm.inc rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm.inc diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_forceglobal diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_generic diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_isglobal b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_isglobal similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_isglobal rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_isglobal diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_predicate b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_predicate similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_predicate rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_fetch_op.inc_predicate diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_forceglobal b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_forceglobal similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_forceglobal rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_forceglobal diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_generic b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_generic similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_generic rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_generic diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_isglobal b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_isglobal similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_isglobal rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_isglobal diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_predicate b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_predicate similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_predicate rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_atomic_op.inc_predicate diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_exchange.inc b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_exchange.inc similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_exchange.inc rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_exchange.inc diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_exchange_memorder.inc b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_exchange_memorder.inc similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_exchange_memorder.inc rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_exchange_memorder.inc diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_exchange_op.inc b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_exchange_op.inc similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_exchange_op.inc rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_exchange_op.inc diff --git a/packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_memorder.inc b/packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_memorder.inc similarity index 100% rename from packages/kokkos/core/src/desul/atomics/cuda/cuda_cc7_asm_memorder.inc rename to packages/kokkos/tpls/desul/include/desul/atomics/cuda/cuda_cc7_asm_memorder.inc diff --git a/packages/kokkos/core/src/desul/atomics/openmp/OpenMP_40.hpp b/packages/kokkos/tpls/desul/include/desul/atomics/openmp/OpenMP_40.hpp similarity index 100% rename from packages/kokkos/core/src/desul/atomics/openmp/OpenMP_40.hpp rename to packages/kokkos/tpls/desul/include/desul/atomics/openmp/OpenMP_40.hpp diff --git a/packages/kokkos/core/src/desul/atomics/openmp/OpenMP_40_op.inc b/packages/kokkos/tpls/desul/include/desul/atomics/openmp/OpenMP_40_op.inc similarity index 100% rename from packages/kokkos/core/src/desul/atomics/openmp/OpenMP_40_op.inc rename to packages/kokkos/tpls/desul/include/desul/atomics/openmp/OpenMP_40_op.inc diff --git a/packages/kokkos/core/src/desul/src/Lock_Array_CUDA.cpp b/packages/kokkos/tpls/desul/src/Lock_Array_CUDA.cpp similarity index 79% rename from packages/kokkos/core/src/desul/src/Lock_Array_CUDA.cpp rename to packages/kokkos/tpls/desul/src/Lock_Array_CUDA.cpp index 8913f8bc7b80fc11844438f384582e7a036c824f..19944b378e2c47090dbe3ce28913017a3f308933 100644 --- a/packages/kokkos/core/src/desul/src/Lock_Array_CUDA.cpp +++ b/packages/kokkos/tpls/desul/src/Lock_Array_CUDA.cpp @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 2019, Lawrence Livermore National Security, LLC and DESUL project contributors. See the COPYRIGHT file for details. Source: https://github.com/desul/desul @@ -6,10 +6,10 @@ Source: https://github.com/desul/desul SPDX-License-Identifier: (BSD-3-Clause) */ -#include <desul/atomics/Lock_Array.hpp> #include <cinttypes> -#include <string> +#include <desul/atomics/Lock_Array.hpp> #include <sstream> +#include <string> #ifdef DESUL_HAVE_CUDA_ATOMICS #ifdef __CUDACC_RDC__ @@ -17,7 +17,7 @@ namespace desul { namespace Impl { __device__ __constant__ int32_t* CUDA_SPACE_ATOMIC_LOCKS_DEVICE = nullptr; __device__ __constant__ int32_t* CUDA_SPACE_ATOMIC_LOCKS_NODE = nullptr; -} +} // namespace Impl } // namespace desul #endif @@ -37,7 +37,6 @@ __global__ void init_lock_arrays_cuda_kernel() { namespace Impl { - int32_t* CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h = nullptr; int32_t* CUDA_SPACE_ATOMIC_LOCKS_NODE_h = nullptr; @@ -46,37 +45,39 @@ int32_t* CUDA_SPACE_ATOMIC_LOCKS_NODE_h = nullptr; namespace { void check_error_and_throw_cuda(cudaError e, const std::string msg) { - if(e != cudaSuccess) { + if (e != cudaSuccess) { std::ostringstream out; out << "Desul::Error: " << msg << " error(" << cudaGetErrorName(e) - << "): " << cudaGetErrorString(e); + << "): " << cudaGetErrorString(e); throw std::runtime_error(out.str()); } } -} +} // namespace // define functions -template<typename T> +template <typename T> void init_lock_arrays_cuda() { if (CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h != nullptr) return; auto error_malloc1 = cudaMalloc(&CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h, - sizeof(int32_t) * (CUDA_SPACE_ATOMIC_MASK + 1)); - check_error_and_throw_cuda(error_malloc1, "init_lock_arrays_cuda: cudaMalloc device locks"); + sizeof(int32_t) * (CUDA_SPACE_ATOMIC_MASK + 1)); + check_error_and_throw_cuda(error_malloc1, + "init_lock_arrays_cuda: cudaMalloc device locks"); auto error_malloc2 = cudaMallocHost(&CUDA_SPACE_ATOMIC_LOCKS_NODE_h, - sizeof(int32_t) * (CUDA_SPACE_ATOMIC_MASK + 1)); - check_error_and_throw_cuda(error_malloc2, "init_lock_arrays_cuda: cudaMalloc host locks"); + sizeof(int32_t) * (CUDA_SPACE_ATOMIC_MASK + 1)); + check_error_and_throw_cuda(error_malloc2, + "init_lock_arrays_cuda: cudaMalloc host locks"); auto error_sync1 = cudaDeviceSynchronize(); - DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE(); + copy_cuda_lock_arrays_to_device(); check_error_and_throw_cuda(error_sync1, "init_lock_arrays_cuda: post mallocs"); init_lock_arrays_cuda_kernel<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256, 256>>>(); auto error_sync2 = cudaDeviceSynchronize(); check_error_and_throw_cuda(error_sync2, "init_lock_arrays_cuda: post init kernel"); } -template<typename T> +template <typename T> void finalize_lock_arrays_cuda() { if (CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h == nullptr) return; cudaFree(CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h); @@ -84,7 +85,7 @@ void finalize_lock_arrays_cuda() { CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h = nullptr; CUDA_SPACE_ATOMIC_LOCKS_NODE_h = nullptr; #ifdef __CUDACC_RDC__ - DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE(); + copy_cuda_lock_arrays_to_device(); #endif } diff --git a/packages/kokkos/core/src/desul/src/Lock_Array_HIP.cpp b/packages/kokkos/tpls/desul/src/Lock_Array_HIP.cpp similarity index 83% rename from packages/kokkos/core/src/desul/src/Lock_Array_HIP.cpp rename to packages/kokkos/tpls/desul/src/Lock_Array_HIP.cpp index 40030df643fa1e85a0fe433b0135386418590193..5ccc6f7d54a1721af83c44b0246eea3fbc962c9f 100644 --- a/packages/kokkos/core/src/desul/src/Lock_Array_HIP.cpp +++ b/packages/kokkos/tpls/desul/src/Lock_Array_HIP.cpp @@ -8,8 +8,8 @@ SPDX-License-Identifier: (BSD-3-Clause) #include <cinttypes> #include <desul/atomics/Lock_Array.hpp> -#include <string> #include <sstream> +#include <string> #ifdef DESUL_HAVE_HIP_ATOMICS #ifdef DESUL_HIP_RDC @@ -45,27 +45,29 @@ int32_t* HIP_SPACE_ATOMIC_LOCKS_NODE_h = nullptr; namespace { void check_error_and_throw_hip(hipError_t e, const std::string msg) { - if(e != hipSuccess) { + if (e != hipSuccess) { std::ostringstream out; out << "Desul::Error: " << msg << " error(" << hipGetErrorName(e) - << "): " << hipGetErrorString(e); + << "): " << hipGetErrorString(e); throw std::runtime_error(out.str()); } } -} +} // namespace -template<typename T> +template <typename T> void init_lock_arrays_hip() { if (HIP_SPACE_ATOMIC_LOCKS_DEVICE_h != nullptr) return; auto error_malloc1 = hipMalloc(&HIP_SPACE_ATOMIC_LOCKS_DEVICE_h, - sizeof(int32_t) * (HIP_SPACE_ATOMIC_MASK + 1)); - check_error_and_throw_hip(error_malloc1, "init_lock_arrays_hip: hipMalloc device locks"); + sizeof(int32_t) * (HIP_SPACE_ATOMIC_MASK + 1)); + check_error_and_throw_hip(error_malloc1, + "init_lock_arrays_hip: hipMalloc device locks"); auto error_malloc2 = hipHostMalloc(&HIP_SPACE_ATOMIC_LOCKS_NODE_h, - sizeof(int32_t) * (HIP_SPACE_ATOMIC_MASK + 1)); - check_error_and_throw_hip(error_malloc2, "init_lock_arrays_hip: hipMallocHost host locks"); + sizeof(int32_t) * (HIP_SPACE_ATOMIC_MASK + 1)); + check_error_and_throw_hip(error_malloc2, + "init_lock_arrays_hip: hipMallocHost host locks"); auto error_sync1 = hipDeviceSynchronize(); DESUL_IMPL_COPY_HIP_LOCK_ARRAYS_TO_DEVICE(); @@ -77,7 +79,7 @@ void init_lock_arrays_hip() { check_error_and_throw_hip(error_sync2, "init_lock_arrays_hip: post init"); } -template<typename T> +template <typename T> void finalize_lock_arrays_hip() { if (HIP_SPACE_ATOMIC_LOCKS_DEVICE_h == nullptr) return; auto error_free1 = hipFree(HIP_SPACE_ATOMIC_LOCKS_DEVICE_h); @@ -98,4 +100,3 @@ template void finalize_lock_arrays_hip<int>(); } // namespace desul #endif - diff --git a/packages/kokkos/tpls/gtest/gtest/gtest-all.cc b/packages/kokkos/tpls/gtest/gtest/gtest-all.cc index f4daf6b01d051e59102c1b8618c5d97cb0236907..9f340e0140520dbfca0b0f31a61b1c1f4c5e7e25 100644 --- a/packages/kokkos/tpls/gtest/gtest/gtest-all.cc +++ b/packages/kokkos/tpls/gtest/gtest/gtest-all.cc @@ -4278,37 +4278,6 @@ void ReportInvalidTestSuiteType(const char* test_suite_name, } } // namespace internal -namespace { - -// A predicate that checks the test name of a TestInfo against a known -// value. -// -// This is used for implementation of the TestSuite class only. We put -// it in the anonymous namespace to prevent polluting the outer -// namespace. -// -// TestNameIs is copyable. -class TestNameIs { - public: - // Constructor. - // - // TestNameIs has NO default constructor. - explicit TestNameIs(const char* name) - : name_(name) {} -#if defined(__EDG__) -#pragma diag_suppress declared_but_not_referenced -#endif - // Returns true if and only if the test name of test_info matches name_. - bool operator()(const TestInfo * test_info) const { - return test_info && test_info->name() == name_; - } - - private: - std::string name_; -}; - -} // namespace - namespace internal { // This method expands all parameterized tests registered with macros TEST_P diff --git a/src/utils/Array.hpp b/src/utils/Array.hpp index d641dbdeffae9fa0b6434ba9c8ef2b30853c0ff0..7535b673565fa04aadba3935420f6946f5ad5544 100644 --- a/src/utils/Array.hpp +++ b/src/utils/Array.hpp @@ -8,7 +8,6 @@ #include <utils/PugsUtils.hpp> #include <utils/Types.hpp> -#include <Kokkos_CopyViews.hpp> #include <algorithm> template <typename DataType> @@ -25,18 +24,21 @@ class [[nodiscard]] Array const size_t m_size; public: - [[nodiscard]] PUGS_INLINE size_t size() const + [[nodiscard]] PUGS_INLINE size_t + size() const { return m_size; } - [[nodiscard]] PUGS_INLINE DataType& operator[](size_t i) const + [[nodiscard]] PUGS_INLINE DataType& + operator[](size_t i) const { Assert(i < m_size, "invalid index"); return m_values[i]; } - PUGS_INLINE void fill(const DataType& data) const + PUGS_INLINE void + fill(const DataType& data) const { for (size_t i = 0; i < m_size; ++i) { m_values[i] = data; @@ -44,7 +46,7 @@ class [[nodiscard]] Array } UnsafeArrayView& operator=(const UnsafeArrayView&) = delete; - UnsafeArrayView& operator=(UnsafeArrayView&&) = delete; + UnsafeArrayView& operator=(UnsafeArrayView&&) = delete; UnsafeArrayView(const Array<DataType>& array, index_type begin, index_type size) : m_values{&array[begin]}, m_size{size} @@ -68,12 +70,14 @@ class [[nodiscard]] Array friend Array<std::add_const_t<DataType>>; public: - [[nodiscard]] PUGS_INLINE size_t size() const noexcept + [[nodiscard]] PUGS_INLINE size_t + size() const noexcept { return m_values.extent(0); } - [[nodiscard]] friend PUGS_INLINE Array<std::remove_const_t<DataType>> copy(const Array<DataType>& source) + [[nodiscard]] friend PUGS_INLINE Array<std::remove_const_t<DataType>> + copy(const Array<DataType>& source) { Array<std::remove_const_t<DataType>> image(source.size()); Kokkos::deep_copy(image.m_values, source.m_values); @@ -81,8 +85,8 @@ class [[nodiscard]] Array return image; } - friend PUGS_INLINE void copy_to(const Array<DataType>& source, - const Array<std::remove_const_t<DataType>>& destination) + friend PUGS_INLINE void + copy_to(const Array<DataType>& source, const Array<std::remove_const_t<DataType>>& destination) { Assert(source.size() == destination.size(), "incompatible Array sizes"); Kokkos::deep_copy(destination.m_values, source.m_values); @@ -96,14 +100,16 @@ class [[nodiscard]] Array typename Array<DataType2>::index_type begin, typename Array<DataType2>::index_type size); - [[nodiscard]] PUGS_INLINE DataType& operator[](index_type i) const noexcept(NO_ASSERT) + [[nodiscard]] PUGS_INLINE DataType& + operator[](index_type i) const noexcept(NO_ASSERT) { Assert(i < m_values.extent(0), "invalid index"); return m_values[i]; } PUGS_INLINE - void fill(const DataType& data) const + void + fill(const DataType& data) const { static_assert(not std::is_const_v<DataType>, "Cannot modify Array of const"); @@ -111,7 +117,8 @@ class [[nodiscard]] Array } template <typename DataType2> - PUGS_INLINE Array& operator=(const Array<DataType2>& array) noexcept + PUGS_INLINE Array& + operator=(const Array<DataType2>& array) noexcept { // ensures that DataType is the same as source DataType2 static_assert(std::is_same<std::remove_const_t<DataType>, std::remove_const_t<DataType2>>(), @@ -154,7 +161,8 @@ class [[nodiscard]] Array #endif // NDEBUG } - friend std::ostream& operator<<(std::ostream& os, const Array& x) + friend std::ostream& + operator<<(std::ostream& os, const Array& x) { if (x.size() > 0) { os << 0 << ':' << NaNHelper(x[0]); @@ -172,13 +180,14 @@ class [[nodiscard]] Array Array(const Array&) = default; template <typename DataType2> - PUGS_INLINE Array(const Array<DataType2>& array) noexcept + PUGS_INLINE + Array(const Array<DataType2>& array) noexcept { this->operator=(array); } PUGS_INLINE - Array(Array &&) = default; + Array(Array&&) = default; PUGS_INLINE ~Array() = default; diff --git a/src/utils/PugsUtils.cpp b/src/utils/PugsUtils.cpp index 705fbd5e0a349ff60ef3a1c680dd3d3e0a4d25ec..76509c611f824ae6993c554360eba0f09e611c48 100644 --- a/src/utils/PugsUtils.cpp +++ b/src/utils/PugsUtils.cpp @@ -18,6 +18,7 @@ #include <CLI/CLI.hpp> #include <iostream> +#include <thread> std::string pugsVersion() @@ -150,7 +151,8 @@ initialize(int& argc, char* argv[]) #else // PUGS_HAS_MPI std::cout << "Sequential build\n"; #endif // PUGS_HAS_MPI - Kokkos::DefaultExecutionSpace::print_configuration(std::cout); + std::cout << "Number of threads " << Kokkos::DefaultHostExecutionSpace::concurrency() << " / " + << std::max(std::thread::hardware_concurrency(), 1u) << '\n'; std::cout << rang::style::reset; std::cout << "-------------------------------------------------------\n"; } diff --git a/src/utils/Table.hpp b/src/utils/Table.hpp index 6171f1a242d0cce43137c4a24458f4c00059269c..6972ea1030dedbdd30c039e0880a3773024603dc 100644 --- a/src/utils/Table.hpp +++ b/src/utils/Table.hpp @@ -7,8 +7,6 @@ #include <utils/PugsMacros.hpp> #include <utils/PugsUtils.hpp> -#include <Kokkos_CopyViews.hpp> - #include <iostream> template <typename DataType> @@ -35,19 +33,22 @@ class [[nodiscard]] Table const size_t m_row; public: - PUGS_INLINE size_t size() const noexcept + PUGS_INLINE size_t + size() const noexcept { return m_table.numberOfColumns(); } PUGS_INLINE - DataType& operator[](size_t i) const + DataType& + operator[](size_t i) const { Assert(i < m_table.numberOfColumns(), "invalid index"); return m_table(m_row, i); } - PUGS_INLINE void fill(const DataType& data) const + PUGS_INLINE void + fill(const DataType& data) const { for (size_t i = 0; i < this->size(); ++i) { m_table(m_row, i) = data; @@ -55,7 +56,7 @@ class [[nodiscard]] Table } UnsafeRowView& operator=(const UnsafeRowView&) = delete; - UnsafeRowView& operator=(UnsafeRowView&&) = delete; + UnsafeRowView& operator=(UnsafeRowView&&) = delete; UnsafeRowView(const Table<DataType>& table, index_type row) : m_table{table}, m_row{row} { @@ -91,18 +92,21 @@ class [[nodiscard]] Table const size_t m_row; public: - [[nodiscard]] PUGS_INLINE size_t size() const + [[nodiscard]] PUGS_INLINE size_t + size() const { return m_table_view.numberOfColumns(); } - [[nodiscard]] PUGS_INLINE DataType& operator[](size_t i) const + [[nodiscard]] PUGS_INLINE DataType& + operator[](size_t i) const { Assert(i < m_table_view.numberOfColumns(), "invalid index"); return m_table_view(m_row, i); } - PUGS_INLINE void fill(const DataType& data) const + PUGS_INLINE void + fill(const DataType& data) const { for (size_t i = 0; i < this->size(); ++i) { m_table_view(m_row, i) = data; @@ -123,23 +127,27 @@ class [[nodiscard]] Table ~RowView() = default; }; - [[nodiscard]] PUGS_INLINE size_t numberOfRows() const noexcept + [[nodiscard]] PUGS_INLINE size_t + numberOfRows() const noexcept { return m_row_size; } - [[nodiscard]] PUGS_INLINE size_t numberOfColumns() const noexcept + [[nodiscard]] PUGS_INLINE size_t + numberOfColumns() const noexcept { return m_column_size; } - [[nodiscard]] PUGS_INLINE RowView operator[](size_t i) const + [[nodiscard]] PUGS_INLINE RowView + operator[](size_t i) const { Assert(i < this->numberOfRows(), "invalid index"); return RowView(*this, i); } - [[nodiscard]] PUGS_INLINE DataType& operator()(size_t i, size_t j) const + [[nodiscard]] PUGS_INLINE DataType& + operator()(size_t i, size_t j) const { Assert(i < m_row_size, "invalid row index"); Assert(j < m_column_size, "invalid column index"); @@ -147,7 +155,8 @@ class [[nodiscard]] Table return m_table(m_row_begin + i, m_column_begin + j); } - PUGS_INLINE void fill(const DataType& data) const + PUGS_INLINE void + fill(const DataType& data) const { for (size_t i = 0; i < m_row_size; ++i) { for (size_t j = 0; j < m_column_size; ++j) { @@ -156,9 +165,12 @@ class [[nodiscard]] Table } } UnsafeTableView& operator=(const UnsafeTableView&) = delete; - UnsafeTableView& operator=(UnsafeTableView&&) = delete; + UnsafeTableView& operator=(UnsafeTableView&&) = delete; - UnsafeTableView(const Table<DataType>& table, index_type row_begin, index_type row_size, index_type column_begin, + UnsafeTableView(const Table<DataType>& table, + index_type row_begin, + index_type row_size, + index_type column_begin, index_type column_size) : m_table{table}, m_row_begin{row_begin}, @@ -181,22 +193,26 @@ class [[nodiscard]] Table ~UnsafeTableView() = default; }; - [[nodiscard]] PUGS_INLINE size_t numberOfRows() const noexcept + [[nodiscard]] PUGS_INLINE size_t + numberOfRows() const noexcept { return m_values.extent(0); } - [[nodiscard]] PUGS_INLINE size_t numberOfColumns() const noexcept + [[nodiscard]] PUGS_INLINE size_t + numberOfColumns() const noexcept { return m_values.extent(1); } - [[nodiscard]] PUGS_INLINE Table<DataType>::UnsafeRowView operator[](index_type i) const + [[nodiscard]] PUGS_INLINE Table<DataType>::UnsafeRowView + operator[](index_type i) const { return UnsafeRowView(*this, i); } - [[nodiscard]] friend PUGS_INLINE Table<std::remove_const_t<DataType>> copy(const Table<DataType>& source) + [[nodiscard]] friend PUGS_INLINE Table<std::remove_const_t<DataType>> + copy(const Table<DataType>& source) { Table<std::remove_const_t<DataType>> image(source.numberOfRows(), source.numberOfColumns()); Kokkos::deep_copy(image.m_values, source.m_values); @@ -204,8 +220,8 @@ class [[nodiscard]] Table return image; } - friend PUGS_INLINE void copy_to(const Table<DataType>& source, - const Table<std::remove_const_t<DataType>>& destination) + friend PUGS_INLINE void + copy_to(const Table<DataType>& source, const Table<std::remove_const_t<DataType>>& destination) { Assert(source.numberOfRows() == destination.numberOfRows(), "incompatible number of rows"); Assert(source.numberOfColumns() == destination.numberOfColumns(), "incompatible number of columns"); @@ -216,12 +232,15 @@ class [[nodiscard]] Table friend PUGS_INLINE Table<DataType2> encapsulate(const Kokkos::View<DataType2**, RT...>& values); template <typename DataType2> - friend PUGS_INLINE typename Table<DataType2>::UnsafeTableView - subTableView(const Table<DataType2>& table, typename Table<DataType2>::index_type row_begin, - typename Table<DataType2>::index_type row_size, typename Table<DataType2>::index_type column_begin, - typename Table<DataType2>::index_type column_size); - - [[nodiscard]] PUGS_INLINE DataType& operator()(index_type i, index_type j) const noexcept(NO_ASSERT) + friend PUGS_INLINE typename Table<DataType2>::UnsafeTableView subTableView( + const Table<DataType2>& table, + typename Table<DataType2>::index_type row_begin, + typename Table<DataType2>::index_type row_size, + typename Table<DataType2>::index_type column_begin, + typename Table<DataType2>::index_type column_size); + + [[nodiscard]] PUGS_INLINE DataType& + operator()(index_type i, index_type j) const noexcept(NO_ASSERT) { Assert(i < this->numberOfRows(), "invalid row index"); Assert(j < this->numberOfColumns(), "invalid column index"); @@ -229,7 +248,8 @@ class [[nodiscard]] Table } PUGS_INLINE - void fill(const DataType& data) const + void + fill(const DataType& data) const { static_assert(not std::is_const<DataType>(), "Cannot modify Table of const"); @@ -237,7 +257,8 @@ class [[nodiscard]] Table } template <typename DataType2> - PUGS_INLINE Table& operator=(const Table<DataType2>& table) noexcept + PUGS_INLINE Table& + operator=(const Table<DataType2>& table) noexcept { // ensures that DataType is the same as source DataType2 static_assert(std::is_same<std::remove_const_t<DataType>, std::remove_const_t<DataType2>>(), @@ -281,7 +302,8 @@ class [[nodiscard]] Table #endif // NDEBUG } - friend std::ostream& operator<<(std::ostream& os, const Table& t) + friend std::ostream& + operator<<(std::ostream& os, const Table& t) { for (size_t i = 0; i < t.numberOfRows(); ++i) { os << i << '|'; @@ -300,13 +322,14 @@ class [[nodiscard]] Table Table(const Table&) = default; template <typename DataType2> - PUGS_INLINE Table(const Table<DataType2>& table) noexcept + PUGS_INLINE + Table(const Table<DataType2>& table) noexcept { this->operator=(table); } PUGS_INLINE - Table(Table &&) = default; + Table(Table&&) = default; PUGS_INLINE ~Table() = default; diff --git a/tests/mpi_test_main.cpp b/tests/mpi_test_main.cpp index f2a2fe63a437e969a393a418eb529653b692dd12..eeaf00129829c33fd5e54cdcb44e09bb2419e91f 100644 --- a/tests/mpi_test_main.cpp +++ b/tests/mpi_test_main.cpp @@ -28,7 +28,15 @@ main(int argc, char* argv[]) const int nb_max_threads = std::max(std::thread::hardware_concurrency(), 1u); const int nb_threads = std::max(nb_max_threads / parallel::Messenger::getInstance().size(), 1ul); - Kokkos::initialize({nb_threads, -1, -1, true}); + { + Kokkos::InitArguments args; + args.num_threads = nb_threads; + args.num_numa = -1; + args.device_id = -1; + args.disable_warnings = true; + + Kokkos::initialize(args); + } PETScWrapper::initialize(argc, argv); diff --git a/tests/test_main.cpp b/tests/test_main.cpp index 3eff2f31167d8aa3e6a776d8daaf3b00e75316c9..0dcf46cfe0e22d9bce9572b27b072a4a58c16b98 100644 --- a/tests/test_main.cpp +++ b/tests/test_main.cpp @@ -23,7 +23,15 @@ main(int argc, char* argv[]) parallel::Messenger::create(argc, argv); const int nb_threads = std::max(std::thread::hardware_concurrency(), 1u); - Kokkos::initialize({nb_threads, -1, -1, true}); + { + Kokkos::InitArguments args; + args.num_threads = nb_threads; + args.num_numa = -1; + args.device_id = -1; + args.disable_warnings = true; + + Kokkos::initialize(args); + } PETScWrapper::initialize(argc, argv); SLEPcWrapper::initialize(argc, argv);