diff --git a/packages/CLI11/.github/actions/cmake_config/Dockerfile b/packages/CLI11/.github/actions/cmake_config/Dockerfile deleted file mode 100644 index 63b28a9949de219993f26a48d85db4afe6bce6df..0000000000000000000000000000000000000000 --- a/packages/CLI11/.github/actions/cmake_config/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM ubuntu:18.04 - -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - g++=4:7.4.0-1ubuntu2.3 \ - wget=1.19.4-1ubuntu2.2 \ - libidn11=1.33-2.1ubuntu1.2 \ - ca-certificates=20180409 \ - make=4.1-9.1ubuntu1 \ - git=1:2.17.1-1ubuntu0.7 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -COPY entrypoint.sh /entrypoint.sh - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/packages/CLI11/.github/actions/cmake_config/action.yml b/packages/CLI11/.github/actions/cmake_config/action.yml deleted file mode 100644 index 73ff1661b5a6fe2840339996cf88671bc1053b67..0000000000000000000000000000000000000000 --- a/packages/CLI11/.github/actions/cmake_config/action.yml +++ /dev/null @@ -1,16 +0,0 @@ -description: 'Test out a bare bones configuration with a CMake version' -inputs: - version: - description: 'The full version of CMake to check' - required: true - options: - description: 'The CMake configuration options' - required: false - default: "" -name: 'Configure with CMake' -runs: - using: 'docker' - image: 'Dockerfile' - args: - - ${{ inputs.version }} - - ${{ inputs.options }} diff --git a/packages/CLI11/.github/actions/cmake_config/entrypoint.sh b/packages/CLI11/.github/actions/cmake_config/entrypoint.sh deleted file mode 100755 index e3bd622e1575375a746dc9425a7012b1777a79f1..0000000000000000000000000000000000000000 --- a/packages/CLI11/.github/actions/cmake_config/entrypoint.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -l - -set -ex - -mkdir -p cmake_dir -mkdir -p build_tmp -mkdir -p cmake_sources -rm -rf cmake_dir/* build_tmp/* - -v=$1 -fn=cmake-$v-Linux-x86_64.tar.gz - -if [ ! -f cmake_sources/$fn ]; then - wget -qO cmake_sources/$fn "https://cmake.org/files/v${v%.*}/$fn" -fi - -tar -xzf cmake_sources/$fn --strip-components=1 -C $PWD/cmake_dir - -export PATH=$PWD/cmake_dir/bin:$PATH - -cmake --version - -cd build_tmp && cmake .. $2 diff --git a/packages/CLI11/.github/actions/quick_cmake/action.yml b/packages/CLI11/.github/actions/quick_cmake/action.yml new file mode 100644 index 0000000000000000000000000000000000000000..da721a78c32e275b7f8c93e7a8457b1d009925fc --- /dev/null +++ b/packages/CLI11/.github/actions/quick_cmake/action.yml @@ -0,0 +1,18 @@ +name: Quick CMake config +description: 'Runs CMake 3.4+ (if already setup)' +inputs: + args: + description: 'Other arguments' + required: false + default: '' + +runs: + using: composite + steps: + - run: | + mkdir -p build-tmp + touch build-tmp/tmp + rm -r build-tmp/* + (cd build-tmp && cmake .. ${{ inputs.args}}) + rm -r build-tmp + shell: bash diff --git a/packages/CLI11/.github/workflows/tests.yml b/packages/CLI11/.github/workflows/tests.yml index 9fcfc705b2860f8ff85464eae2562ae90229f123..60f10f9623eaf0218344a63d60737eea92d5ace2 100644 --- a/packages/CLI11/.github/workflows/tests.yml +++ b/packages/CLI11/.github/workflows/tests.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - - uses: pre-commit/action@v2.0.0 + - uses: pre-commit/action@v2.0.2 cuda-build: name: CUDA build only @@ -28,7 +28,7 @@ jobs: - name: Add wget run: apt-get update && apt-get install -y wget - name: Setup cmake - uses: jwlawson/actions-setup-cmake@v1.7 + uses: jwlawson/actions-setup-cmake@v1.8 - name: Configure run: cmake -S . -B build -DCLI11_CUDA_TESTS=ON - name: Build @@ -39,81 +39,144 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - with: - submodules: true + - name: CMake 3.4 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.4.3 + cmake-version: "3.4" + - name: Check CMake 3.4 + uses: ./.github/actions/quick_cmake + - name: CMake 3.5 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.5.2 + cmake-version: "3.5" + - name: Check CMake 3.5 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.6 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.6.3 + cmake-version: "3.6" + - name: Check CMake 3.6 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.7 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.7.2 + cmake-version: "3.7" + - name: Check CMake 3.7 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.8 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.8.2 + cmake-version: "3.8" + - name: Check CMake 3.8 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.9 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.9.6 + cmake-version: "3.9" + - name: Check CMake 3.9 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.10 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.10.3 + cmake-version: "3.10" + - name: Check CMake 3.10 + uses: ./.github/actions/quick_cmake if: success() || failure() - - name: CMake 3.11 (full) - uses: ./.github/actions/cmake_config + + - name: CMake 3.11 + uses: jwlawson/actions-setup-cmake@v1.8 + with: + cmake-version: "3.11" + - name: Check CMake 3.11 (full) + uses: ./.github/actions/quick_cmake with: - version: 3.11.4 - options: -DCLI11_SANITIZERS=ON -DCLI11_BUILD_EXAMPLES_JSON=ON + args: -DCLI11_SANITIZERS=ON -DCLI11_BUILD_EXAMPLES_JSON=ON if: success() || failure() + - name: CMake 3.12 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.12.4 + cmake-version: "3.12" + - name: Check CMake 3.12 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.13 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.13.5 + cmake-version: "3.13" + - name: Check CMake 3.13 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.14 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.14.7 + cmake-version: "3.14" + - name: Check CMake 3.14 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.15 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.15.6 + cmake-version: "3.15" + - name: Check CMake 3.15 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.16 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.16.8 + cmake-version: "3.16" + - name: Check CMake 3.16 + uses: ./.github/actions/quick_cmake if: success() || failure() + - name: CMake 3.17 - uses: ./.github/actions/cmake_config + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.17.3 + cmake-version: "3.17" + - name: Check CMake 3.17 + uses: ./.github/actions/quick_cmake if: success() || failure() - - name: CMake 3.18 (full) - uses: ./.github/actions/cmake_config + + - name: CMake 3.18 + uses: jwlawson/actions-setup-cmake@v1.8 with: - version: 3.18.0 - options: -DCLI11_SANITIZERS=ON -DCLI11_BUILD_EXAMPLES_JSON=ON + cmake-version: "3.18" + - name: Check CMake 3.18 + uses: ./.github/actions/quick_cmake if: success() || failure() + + - name: CMake 3.19 + uses: jwlawson/actions-setup-cmake@v1.8 + with: + cmake-version: "3.19" + - name: Check CMake 3.19 (full) + uses: ./.github/actions/quick_cmake + with: + args: -DCLI11_SANITIZERS=ON -DCLI11_BUILD_EXAMPLES_JSON=ON + if: success() || failure() + + - name: CMake 3.20 + uses: jwlawson/actions-setup-cmake@v1.8 + with: + cmake-version: "3.20" + - name: Check CMake 3.20 + uses: ./.github/actions/quick_cmake + if: success() || failure() + + diff --git a/packages/CLI11/.gitignore b/packages/CLI11/.gitignore index 2a6ef59a35af60bd3b7ae6ec675f2b045fd8dfbb..cc1b9d0c7f77776f258bfccbe82224fe588e9582 100644 --- a/packages/CLI11/.gitignore +++ b/packages/CLI11/.gitignore @@ -12,3 +12,4 @@ a.out* /node_modules/* /package.json /yarn.lock +/CLI11.hpp diff --git a/packages/CLI11/.gitmodules b/packages/CLI11/.gitmodules deleted file mode 100644 index 6051b7f20049a412a56aa8f664a56a8e35b0278f..0000000000000000000000000000000000000000 --- a/packages/CLI11/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "extern/googletest"] - path = extern/googletest - url = ../../google/googletest.git diff --git a/packages/CLI11/.gitrepo b/packages/CLI11/.gitrepo index e423eb3eead27d0875cc8ea21a30bf8ceec6ed8c..732e03b962d7b98a783a3ae9f93d8ac9232673e5 100644 --- a/packages/CLI11/.gitrepo +++ b/packages/CLI11/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:CLIUtils/CLI11.git branch = master - commit = 639a8add1e248c7337b420ff68572ddb3893e080 - parent = b7068f18e2c214064a81a5b561d5f04a80d2a847 + commit = 4af78beef777e313814b4daff70e2da9171a385a + parent = 015d2fd5332b347d28c47c8dfe3f401382724178 cmdver = 0.4.3 method = merge diff --git a/packages/CLI11/CHANGELOG.md b/packages/CLI11/CHANGELOG.md index f9cc853b7f3bf554c2743b98de8f6fe53e610617..d2a59b4786a836a07801228e5371e095b42c5e23 100644 --- a/packages/CLI11/CHANGELOG.md +++ b/packages/CLI11/CHANGELOG.md @@ -1,14 +1,42 @@ ## Version 2.0: In progress -* Built-in config format is TOML compliant now [#435] -* Config updates [#442] -* More powerful containers, `%%` separator [#423] -* Add a version flag easily [#452] +* Built-in config format is TOML compliant now [#435][] + * Support multiline TOML [#528][] +* Support short/positional options in config mode [#443][] +* More powerful containers, `%%` separator [#423][] +* Add a version flag easily [#452][] +* Support atomic types [#520][] +* Add a type validator `CLI::TypeValidator<TYPE>` [#526][] +* Support `->silent()` on subcommands. [#529][] +* Add alias section to help for subcommands [#545][] +* Redesigned MakeSingleFiles to have a higher level of manual control, to support future features. [#546][] +* Moved testing from GTest to Catch2 [#574][] + +* Bugfix: avoid listing helpall as a required flag [#530][] +* Bugfix: avoid a clash with WINDOWS define [#563][] + +* Removed deprecated set commands, use validators instead. [#565][] + +* Build: support pkg-config [#523][] + [#435]: https://github.com/CLIUtils/CLI11/pull/435 [#443]: https://github.com/CLIUtils/CLI11/pull/443 [#423]: https://github.com/CLIUtils/CLI11/pull/423 [#452]: https://github.com/CLIUtils/CLI11/pull/452 +[#520]: https://github.com/CLIUtils/CLI11/pull/520 +[#523]: https://github.com/CLIUtils/CLI11/pull/523 +[#526]: https://github.com/CLIUtils/CLI11/pull/526 +[#528]: https://github.com/CLIUtils/CLI11/pull/528 +[#529]: https://github.com/CLIUtils/CLI11/pull/529 +[#530]: https://github.com/CLIUtils/CLI11/pull/530 +[#545]: https://github.com/CLIUtils/CLI11/pull/545 +[#546]: https://github.com/CLIUtils/CLI11/pull/546 +[#563]: https://github.com/CLIUtils/CLI11/pull/563 +[#565]: https://github.com/CLIUtils/CLI11/pull/565 +[#574]: https://github.com/CLIUtils/CLI11/pull/574 + + ### Version 1.9.1: Backporting fixes diff --git a/packages/CLI11/CPPLINT.cfg b/packages/CLI11/CPPLINT.cfg index d497667bbc899dc8056d25387413cd382d7737f9..0a1758da0e5b703d4677bcaf60d171455c4aee66 100644 --- a/packages/CLI11/CPPLINT.cfg +++ b/packages/CLI11/CPPLINT.cfg @@ -5,6 +5,8 @@ linelength=120 # As in .clang-format filter=-build/c++11 # Reports e.g. chrono and thread, which overlap with Chromium's API. Not applicable to general C++ projects. filter=-build/include_order # Requires unusual include order that encourages creating not self-contained headers filter=-readability/nolint # Conflicts with clang-tidy +filter=-readability/check # Catch uses CHECK(a == b) (Tests only) +filter=-build/namespaces # Currently using it for one test (Tests only) filter=-runtime/references # Requires fundamental change of API, don't see need for this filter=-whitespace/blank_line # Unnecessarily strict with blank lines that otherwise help with readability filter=-whitespace/indent # Requires strange 3-space indent of private/protected/public markers diff --git a/packages/CLI11/README.md b/packages/CLI11/README.md index 8ffb6b25ddd02583763cb18dcd4902ca221a8b63..846d44ec7310e567fa32897088c03d659d1e3b56 100644 --- a/packages/CLI11/README.md +++ b/packages/CLI11/README.md @@ -224,7 +224,7 @@ While all options internally are the same type, there are several ways to add an app.add_option(option_name, help_str="") app.add_option(option_name, - variable_to_bind_to, // bool, char(see note)๐ง, int, float, vector, enum, or string-like, or anything with a defined conversion from a string or that takes an int ๐, double ๐, or string in a constructor. Also allowed are tuples ๐, std::array ๐ or std::pair ๐. Also supported are complex numbers๐ง, wrapper types๐ง, and containers besides vector๐ง of any other supported type. + variable_to_bind_to, // bool, char(see note)๐ง, int, float, vector, enum, std::atomic ๐ง, or string-like, or anything with a defined conversion from a string or that takes an int ๐, double ๐, or string in a constructor. Also allowed are tuples ๐, std::array ๐ or std::pair ๐. Also supported are complex numbers๐ง, wrapper types๐ง, and containers besides vector๐ง of any other supported type. help_string="") app.add_option_function<type>(option_name, @@ -245,7 +245,7 @@ app.add_flag(option_name, help_string="") app.add_flag(option_name, - variable_to_bind_to, // bool, int, float, complex, containers, enum, or string-like, or any singular object with a defined conversion from a string like add_option + variable_to_bind_to, // bool, int, float, complex, containers, enum, std::atomic ๐ง, or string-like, or any singular object with a defined conversion from a string like add_option help_string="") app.add_flag_function(option_name, diff --git a/packages/CLI11/azure-pipelines.yml b/packages/CLI11/azure-pipelines.yml index 90017c62f8f111acfa4eb95c78f07d2c1453fd9c..c72c748e6f59d6af651e28231b6cbb0d53f8a32d 100644 --- a/packages/CLI11/azure-pipelines.yml +++ b/packages/CLI11/azure-pipelines.yml @@ -121,7 +121,7 @@ jobs: cli11.std: 17 cli11.options: -DCLI11_FORCE_LIBCXX=ON clang10_20: - containerImage: helics/buildenv:clang10-builder + containerImage: silkeh/clang:10 cli11.std: 20 cli11.options: -DCLI11_FORCE_LIBCXX=ON -DCMAKE_CXX_FLAGS=-std=c++20 container: $[ variables['containerImage'] ] diff --git a/packages/CLI11/cmake/AddGoogletest.cmake b/packages/CLI11/cmake/AddGoogletest.cmake deleted file mode 100644 index ae0dc18fcb96f974148645e2ab2fb0abc8efe564..0000000000000000000000000000000000000000 --- a/packages/CLI11/cmake/AddGoogletest.cmake +++ /dev/null @@ -1,49 +0,0 @@ -# -# -# Includes GTest and provides a helper macro to add tests. Add make check, as well, which -# gives output on failed tests without having to set an environment variable. -# -# -set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) -set(BUILD_SHARED_LIBS OFF) - -add_subdirectory("${CLI11_SOURCE_DIR}/extern/googletest" "${CLI11_BINARY_DIR}/extern/googletest" EXCLUDE_FROM_ALL) - - -if(GOOGLE_TEST_INDIVIDUAL) - if(NOT CMAKE_VERSION VERSION_LESS 3.9) - include(GoogleTest) - else() - set(GOOGLE_TEST_INDIVIDUAL OFF) - endif() -endif() - -# Target must already exist -macro(add_gtest TESTNAME) - target_link_libraries(${TESTNAME} PUBLIC gtest gmock gtest_main) - - if(GOOGLE_TEST_INDIVIDUAL) - if(CMAKE_VERSION VERSION_LESS 3.10) - gtest_add_tests(TARGET ${TESTNAME} - TEST_PREFIX "${TESTNAME}." - TEST_LIST TmpTestList) - set_tests_properties(${TmpTestList} PROPERTIES FOLDER "Tests") - else() - gtest_discover_tests(${TESTNAME} - TEST_PREFIX "${TESTNAME}." - PROPERTIES FOLDER "Tests") - - endif() - else() - add_test(${TESTNAME} ${TESTNAME}) - set_target_properties(${TESTNAME} PROPERTIES FOLDER "Tests") - if (CLI11_FORCE_LIBCXX) - set_property(TARGET ${T} APPEND_STRING - PROPERTY LINK_FLAGS -stdlib=libc++) - endif() - endif() - -endmacro() - -set_target_properties(gtest gtest_main gmock gmock_main - PROPERTIES FOLDER "Extern") diff --git a/packages/CLI11/extern/googletest b/packages/CLI11/extern/googletest deleted file mode 160000 index 859bfe8981d6724c4ea06e73d29accd8588f3230..0000000000000000000000000000000000000000 --- a/packages/CLI11/extern/googletest +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 859bfe8981d6724c4ea06e73d29accd8588f3230 diff --git a/packages/CLI11/include/CLI/App.hpp b/packages/CLI11/include/CLI/App.hpp index d3c513bd03de33141eafa41ea176fe742992e69d..edbe2b7386ba7be718762c034fa2c3d3bf87ce9b 100644 --- a/packages/CLI11/include/CLI/App.hpp +++ b/packages/CLI11/include/CLI/App.hpp @@ -44,7 +44,7 @@ namespace CLI { #endif namespace detail { -enum class Classifier { NONE, POSITIONAL_MARK, SHORT, LONG, WINDOWS, SUBCOMMAND, SUBCOMMAND_TERMINATOR }; +enum class Classifier { NONE, POSITIONAL_MARK, SHORT, LONG, WINDOWS_STYLE, SUBCOMMAND, SUBCOMMAND_TERMINATOR }; struct AppFriend; } // namespace detail @@ -897,56 +897,6 @@ class App { } #endif - /// Add set of options (No default, temp reference, such as an inline set) DEPRECATED - template <typename T> - Option *add_set(std::string option_name, - T &member, ///< The selected member of the set - std::set<T> options, ///< The set of possibilities - std::string option_description = "") { - - Option *opt = add_option(option_name, member, std::move(option_description)); - opt->check(IsMember{options}); - return opt; - } - - /// Add set of options (No default, set can be changed afterwards - do not destroy the set) DEPRECATED - template <typename T> - Option *add_mutable_set(std::string option_name, - T &member, ///< The selected member of the set - const std::set<T> &options, ///< The set of possibilities - std::string option_description = "") { - - Option *opt = add_option(option_name, member, std::move(option_description)); - opt->check(IsMember{&options}); - return opt; - } - - /// Add set of options (with default, static set, such as an inline set) DEPRECATED - template <typename T> - Option *add_set(std::string option_name, - T &member, ///< The selected member of the set - std::set<T> options, ///< The set of possibilities - std::string option_description, - bool defaulted) { - - Option *opt = add_option(option_name, member, std::move(option_description), defaulted); - opt->check(IsMember{options}); - return opt; - } - - /// Add set of options (with default, set can be changed afterwards - do not destroy the set) DEPRECATED - template <typename T> - Option *add_mutable_set(std::string option_name, - T &member, ///< The selected member of the set - const std::set<T> &options, ///< The set of possibilities - std::string option_description, - bool defaulted) { - - Option *opt = add_option(option_name, member, std::move(option_description), defaulted); - opt->check(IsMember{&options}); - return opt; - } - /// Add a complex number DEPRECATED --use add_option instead template <typename T, typename XC = double> Option *add_complex(std::string option_name, @@ -2072,7 +2022,7 @@ class App { return detail::Classifier::SHORT; } if((allow_windows_style_options_) && (detail::split_windows_style(current, dummy1, dummy2))) - return detail::Classifier::WINDOWS; + return detail::Classifier::WINDOWS_STYLE; if((current == "++") && !name_.empty() && parent_ != nullptr) return detail::Classifier::SUBCOMMAND_TERMINATOR; return detail::Classifier::NONE; @@ -2525,7 +2475,7 @@ class App { break; case detail::Classifier::LONG: case detail::Classifier::SHORT: - case detail::Classifier::WINDOWS: + case detail::Classifier::WINDOWS_STYLE: // If already parsed a subcommand, don't accept options_ _parse_arg(args, classifier); break; @@ -2742,7 +2692,7 @@ class App { if(!detail::split_short(current, arg_name, rest)) throw HorribleError("Short parsed but missing! You should not see this"); break; - case detail::Classifier::WINDOWS: + case detail::Classifier::WINDOWS_STYLE: if(!detail::split_windows_style(current, arg_name, value)) throw HorribleError("windows option parsed but missing! You should not see this"); break; @@ -2760,7 +2710,7 @@ class App { return opt->check_lname(arg_name); if(current_type == detail::Classifier::SHORT) return opt->check_sname(arg_name); - // this will only get called for detail::Classifier::WINDOWS + // this will only get called for detail::Classifier::WINDOWS_STYLE return opt->check_lname(arg_name) || opt->check_sname(arg_name); }); diff --git a/packages/CLI11/tests/AppTest.cpp b/packages/CLI11/tests/AppTest.cpp index 89f52c0853354280eee2745920a92b290a55fe4f..6c3e71ced8ee59cbe4409698b86b44b56430a881 100644 --- a/packages/CLI11/tests/AppTest.cpp +++ b/packages/CLI11/tests/AppTest.cpp @@ -9,129 +9,127 @@ #include <cstdint> #include <cstdlib> -#include "gmock/gmock.h" - -TEST_F(TApp, OneFlagShort) { +TEST_CASE_METHOD(TApp, "OneFlagShort", "[app]") { app.add_flag("-c,--count"); args = {"-c"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); } -TEST_F(TApp, OneFlagShortValues) { +TEST_CASE_METHOD(TApp, "OneFlagShortValues", "[app]") { app.add_flag("-c{v1},--count{v2}"); args = {"-c"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); auto v = app["-c"]->results(); - EXPECT_EQ(v[0], "v1"); + CHECK("v1" == v[0]); - EXPECT_THROW(app["--invalid"], CLI::OptionNotFound); + CHECK_THROWS_AS(app["--invalid"], CLI::OptionNotFound); } -TEST_F(TApp, OneFlagShortValuesAs) { +TEST_CASE_METHOD(TApp, "OneFlagShortValuesAs", "[app]") { auto flg = app.add_flag("-c{1},--count{2}"); args = {"-c"}; run(); auto opt = app["-c"]; - EXPECT_EQ(opt->as<int>(), 1); + CHECK(1 == opt->as<int>()); args = {"--count"}; run(); - EXPECT_EQ(opt->as<int>(), 2); + CHECK(2 == opt->as<int>()); flg->take_first(); args = {"-c", "--count"}; run(); - EXPECT_EQ(opt->as<int>(), 1); + CHECK(1 == opt->as<int>()); flg->take_last(); - EXPECT_EQ(opt->as<int>(), 2); + CHECK(2 == opt->as<int>()); flg->multi_option_policy(CLI::MultiOptionPolicy::Throw); - EXPECT_THROW(opt->as<int>(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(opt->as<int>(), CLI::ArgumentMismatch); flg->multi_option_policy(CLI::MultiOptionPolicy::TakeAll); auto vec = opt->as<std::vector<int>>(); - EXPECT_EQ(vec[0], 1); - EXPECT_EQ(vec[1], 2); + CHECK(1 == vec[0]); + CHECK(2 == vec[1]); flg->multi_option_policy(CLI::MultiOptionPolicy::Join); - EXPECT_EQ(opt->as<std::string>(), "1\n2"); + CHECK("1\n2" == opt->as<std::string>()); flg->delimiter(','); - EXPECT_EQ(opt->as<std::string>(), "1,2"); + CHECK("1,2" == opt->as<std::string>()); } -TEST_F(TApp, OneFlagShortWindows) { +TEST_CASE_METHOD(TApp, "OneFlagShortWindows", "[app]") { app.add_flag("-c,--count"); args = {"/c"}; app.allow_windows_style_options(); run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); } -TEST_F(TApp, WindowsLongShortMix1) { +TEST_CASE_METHOD(TApp, "WindowsLongShortMix1", "[app]") { app.allow_windows_style_options(); auto a = app.add_flag("-c"); auto b = app.add_flag("--c"); args = {"/c"}; run(); - EXPECT_EQ(1u, a->count()); - EXPECT_EQ(0u, b->count()); + CHECK(a->count() == 1u); + CHECK(b->count() == 0u); } -TEST_F(TApp, WindowsLongShortMix2) { +TEST_CASE_METHOD(TApp, "WindowsLongShortMix2", "[app]") { app.allow_windows_style_options(); auto a = app.add_flag("--c"); auto b = app.add_flag("-c"); args = {"/c"}; run(); - EXPECT_EQ(1u, a->count()); - EXPECT_EQ(0u, b->count()); + CHECK(a->count() == 1u); + CHECK(b->count() == 0u); } -TEST_F(TApp, CountNonExist) { +TEST_CASE_METHOD(TApp, "CountNonExist", "[app]") { app.add_flag("-c,--count"); args = {"-c"}; run(); - EXPECT_THROW(app.count("--nonexist"), CLI::OptionNotFound); + CHECK_THROWS_AS(app.count("--nonexist"), CLI::OptionNotFound); } -TEST_F(TApp, OneFlagLong) { +TEST_CASE_METHOD(TApp, "OneFlagLong", "[app]") { app.add_flag("-c,--count"); args = {"--count"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); } -TEST_F(TApp, DashedOptions) { +TEST_CASE_METHOD(TApp, "DashedOptions", "[app]") { app.add_flag("-c"); app.add_flag("--q"); app.add_flag("--this,--that"); args = {"-c", "--q", "--this", "--that"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--q")); - EXPECT_EQ(2u, app.count("--this")); - EXPECT_EQ(2u, app.count("--that")); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--q") == 1u); + CHECK(app.count("--this") == 2u); + CHECK(app.count("--that") == 2u); } -TEST_F(TApp, DashedOptionsSingleString) { +TEST_CASE_METHOD(TApp, "DashedOptionsSingleString", "[app]") { app.add_flag("-c"); app.add_flag("--q"); app.add_flag("--this,--that"); app.parse("-c --q --this --that"); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--q")); - EXPECT_EQ(2u, app.count("--this")); - EXPECT_EQ(2u, app.count("--that")); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--q") == 1u); + CHECK(app.count("--this") == 2u); + CHECK(app.count("--that") == 2u); } -TEST_F(TApp, RequireOptionsError) { - using ::testing::HasSubstr; - using ::testing::Not; +TEST_CASE_METHOD(TApp, "RequireOptionsError", "[app]") { + using Catch::Matchers::Contains; + app.add_flag("-c"); app.add_flag("--q"); app.add_flag("--this,--that"); @@ -141,267 +139,267 @@ TEST_F(TApp, RequireOptionsError) { try { app.parse("-c --q --this --that"); } catch(const CLI::RequiredError &re) { - EXPECT_THAT(re.what(), Not(HasSubstr("-h,--help"))); - EXPECT_THAT(re.what(), Not(HasSubstr("help_all"))); + CHECK_THAT(re.what(), !Contains("-h,--help")); + CHECK_THAT(re.what(), !Contains("help_all")); } - EXPECT_NO_THROW(app.parse("-c --q")); - EXPECT_NO_THROW(app.parse("-c --this --that")); + CHECK_NOTHROW(app.parse("-c --q")); + CHECK_NOTHROW(app.parse("-c --this --that")); } -TEST_F(TApp, BoolFlagOverride) { +TEST_CASE_METHOD(TApp, "BoolFlagOverride", "[app]") { bool val{false}; auto flg = app.add_flag("--this,--that", val); app.parse("--this"); - EXPECT_TRUE(val); + CHECK(val); app.parse("--this=false"); - EXPECT_FALSE(val); + CHECK(!val); flg->disable_flag_override(true); app.parse("--this"); - EXPECT_TRUE(val); + CHECK(val); // this is allowed since the matching string is the default app.parse("--this=true"); - EXPECT_TRUE(val); + CHECK(val); - EXPECT_THROW(app.parse("--this=false"), CLI::ArgumentMismatch); + CHECK_THROWS_AS(app.parse("--this=false"), CLI::ArgumentMismatch); // try a string that specifies 'use default val' - EXPECT_NO_THROW(app.parse("--this={}")); + CHECK_NOTHROW(app.parse("--this={}")); } -TEST_F(TApp, OneFlagRef) { +TEST_CASE_METHOD(TApp, "OneFlagRef", "[app]") { int ref{0}; app.add_flag("-c,--count", ref); args = {"--count"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); - EXPECT_EQ(1, ref); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); + CHECK(ref == 1); } -TEST_F(TApp, OneFlagRefValue) { +TEST_CASE_METHOD(TApp, "OneFlagRefValue", "[app]") { int ref{0}; app.add_flag("-c,--count", ref); args = {"--count=7"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); - EXPECT_EQ(7, ref); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); + CHECK(ref == 7); } -TEST_F(TApp, OneFlagRefValueFalse) { +TEST_CASE_METHOD(TApp, "OneFlagRefValueFalse", "[app]") { int ref{0}; auto flg = app.add_flag("-c,--count", ref); args = {"--count=false"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); - EXPECT_EQ(-1, ref); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); + CHECK(ref == -1); - EXPECT_FALSE(flg->check_fname("c")); + CHECK(!flg->check_fname("c")); args = {"--count=0"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); - EXPECT_EQ(-1, ref); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); + CHECK(ref == -1); args = {"--count=happy"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, FlagNegation) { +TEST_CASE_METHOD(TApp, "FlagNegation", "[app]") { int ref{0}; auto flg = app.add_flag("-c,--count,--ncount{false}", ref); args = {"--count", "-c", "--ncount"}; - EXPECT_FALSE(flg->check_fname("count")); - EXPECT_TRUE(flg->check_fname("ncount")); + CHECK(!flg->check_fname("count")); + CHECK(flg->check_fname("ncount")); run(); - EXPECT_EQ(3u, app.count("-c")); - EXPECT_EQ(3u, app.count("--count")); - EXPECT_EQ(3u, app.count("--ncount")); - EXPECT_EQ(1, ref); + CHECK(app.count("-c") == 3u); + CHECK(app.count("--count") == 3u); + CHECK(app.count("--ncount") == 3u); + CHECK(ref == 1); } -TEST_F(TApp, FlagNegationShortcutNotation) { +TEST_CASE_METHOD(TApp, "FlagNegationShortcutNotation", "[app]") { int ref{0}; app.add_flag("-c,--count{true},!--ncount", ref); args = {"--count=TRUE", "-c", "--ncount"}; run(); - EXPECT_EQ(3u, app.count("-c")); - EXPECT_EQ(3u, app.count("--count")); - EXPECT_EQ(3u, app.count("--ncount")); - EXPECT_EQ(1, ref); + CHECK(app.count("-c") == 3u); + CHECK(app.count("--count") == 3u); + CHECK(app.count("--ncount") == 3u); + CHECK(ref == 1); } -TEST_F(TApp, FlagNegationShortcutNotationInvalid) { +TEST_CASE_METHOD(TApp, "FlagNegationShortcutNotationInvalid", "[app]") { int ref{0}; app.add_flag("-c,--count,!--ncount", ref); args = {"--ncount=happy"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, OneString) { +TEST_CASE_METHOD(TApp, "OneString", "[app]") { std::string str; app.add_option("-s,--string", str); args = {"--string", "mystring"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "mystring"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("mystring" == str); } -TEST_F(TApp, OneStringWindowsStyle) { +TEST_CASE_METHOD(TApp, "OneStringWindowsStyle", "[app]") { std::string str; app.add_option("-s,--string", str); args = {"/string", "mystring"}; app.allow_windows_style_options(); run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "mystring"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("mystring" == str); } -TEST_F(TApp, OneStringSingleStringInput) { +TEST_CASE_METHOD(TApp, "OneStringSingleStringInput", "[app]") { std::string str; app.add_option("-s,--string", str); app.parse("--string mystring"); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "mystring"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("mystring" == str); } -TEST_F(TApp, OneStringEqualVersion) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersion", "[app]") { std::string str; app.add_option("-s,--string", str); args = {"--string=mystring"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "mystring"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("mystring" == str); } -TEST_F(TApp, OneStringEqualVersionWindowsStyle) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionWindowsStyle", "[app]") { std::string str; app.add_option("-s,--string", str); args = {"/string:mystring"}; app.allow_windows_style_options(); run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "mystring"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("mystring" == str); } -TEST_F(TApp, OneStringEqualVersionSingleString) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleString", "[app]") { std::string str; app.add_option("-s,--string", str); app.parse("--string=mystring"); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "mystring"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("mystring" == str); } -TEST_F(TApp, OneStringEqualVersionSingleStringQuoted) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuoted", "[app]") { std::string str; app.add_option("-s,--string", str); app.parse(R"raw(--string="this is my quoted string")raw"); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "this is my quoted string"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("this is my quoted string" == str); } -TEST_F(TApp, OneStringEqualVersionSingleStringQuotedMultiple) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedMultiple", "[app]") { std::string str, str2, str3; app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); app.add_option("-m,--mstr", str3); app.parse(R"raw(--string="this is my quoted string" -t 'qstring 2' -m=`"quoted string"`)raw"); - EXPECT_EQ(str, "this is my quoted string"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("this is my quoted string" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); } -TEST_F(TApp, OneStringEqualVersionSingleStringEmbeddedEqual) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringEmbeddedEqual", "[app]") { std::string str, str2, str3; app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); app.add_option("-m,--mstr", str3); app.parse(R"raw(--string="app=\"test1 b\" test2=\"frogs\"" -t 'qstring 2' -m=`"quoted string"`)raw"); - EXPECT_EQ(str, "app=\"test1 b\" test2=\"frogs\""); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("app=\"test1 b\" test2=\"frogs\"" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); app.parse(R"raw(--string="app='test1 b' test2='frogs'" -t 'qstring 2' -m=`"quoted string"`)raw"); - EXPECT_EQ(str, "app='test1 b' test2='frogs'"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("app='test1 b' test2='frogs'" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); } -TEST_F(TApp, OneStringEqualVersionSingleStringEmbeddedEqualWindowsStyle) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringEmbeddedEqualWindowsStyle", "[app]") { std::string str, str2, str3; app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); app.add_option("--mstr", str3); app.allow_windows_style_options(); app.parse(R"raw(/string:"app:\"test1 b\" test2:\"frogs\"" /t 'qstring 2' /mstr:`"quoted string"`)raw"); - EXPECT_EQ(str, "app:\"test1 b\" test2:\"frogs\""); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("app:\"test1 b\" test2:\"frogs\"" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); app.parse(R"raw(/string:"app:'test1 b' test2:'frogs'" /t 'qstring 2' /mstr:`"quoted string"`)raw"); - EXPECT_EQ(str, "app:'test1 b' test2:'frogs'"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("app:'test1 b' test2:'frogs'" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); } -TEST_F(TApp, OneStringEqualVersionSingleStringQuotedMultipleMixedStyle) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedMultipleMixedStyle", "[app]") { std::string str, str2, str3; app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); app.add_option("-m,--mstr", str3); app.allow_windows_style_options(); app.parse(R"raw(/string:"this is my quoted string" /t 'qstring 2' -m=`"quoted string"`)raw"); - EXPECT_EQ(str, "this is my quoted string"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("this is my quoted string" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); } -TEST_F(TApp, OneStringEqualVersionSingleStringQuotedMultipleInMiddle) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedMultipleInMiddle", "[app]") { std::string str, str2, str3; app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); app.add_option("-m,--mstr", str3); app.parse(R"raw(--string="this is my quoted string" -t "qst\"ring 2" -m=`"quoted string"`)raw"); - EXPECT_EQ(str, "this is my quoted string"); - EXPECT_EQ(str2, "qst\"ring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("this is my quoted string" == str); + CHECK("qst\"ring 2" == str2); + CHECK("\"quoted string\"" == str3); } -TEST_F(TApp, OneStringEqualVersionSingleStringQuotedEscapedCharacters) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedEscapedCharacters", "[app]") { std::string str, str2, str3; app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); app.add_option("-m,--mstr", str3); app.parse(R"raw(--string="this is my \"quoted\" string" -t 'qst\'ring 2' -m=`"quoted\` string"`")raw"); - EXPECT_EQ(str, "this is my \"quoted\" string"); - EXPECT_EQ(str2, "qst\'ring 2"); - EXPECT_EQ(str3, "\"quoted` string\""); + CHECK("this is my \"quoted\" string" == str); + CHECK("qst\'ring 2" == str2); + CHECK("\"quoted` string\"" == str3); } -TEST_F(TApp, OneStringEqualVersionSingleStringQuotedMultipleWithEqual) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedMultipleWithEqual", "[app]") { std::string str, str2, str3, str4; app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); app.add_option("-m,--mstr", str3); app.add_option("-j,--jstr", str4); app.parse(R"raw(--string="this is my quoted string" -t 'qstring 2' -m=`"quoted string"` --jstr=Unquoted)raw"); - EXPECT_EQ(str, "this is my quoted string"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); - EXPECT_EQ(str4, "Unquoted"); + CHECK("this is my quoted string" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); + CHECK("Unquoted" == str4); } -TEST_F(TApp, OneStringEqualVersionSingleStringQuotedMultipleWithEqualAndProgram) { +TEST_CASE_METHOD(TApp, "OneStringEqualVersionSingleStringQuotedMultipleWithEqualAndProgram", "[app]") { std::string str, str2, str3, str4; app.add_option("-s,--string", str); app.add_option("-t,--tstr", str2); @@ -410,78 +408,78 @@ TEST_F(TApp, OneStringEqualVersionSingleStringQuotedMultipleWithEqualAndProgram) app.parse( R"raw(program --string="this is my quoted string" -t 'qstring 2' -m=`"quoted string"` --jstr=Unquoted)raw", true); - EXPECT_EQ(str, "this is my quoted string"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); - EXPECT_EQ(str4, "Unquoted"); + CHECK("this is my quoted string" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); + CHECK("Unquoted" == str4); } -TEST_F(TApp, OneStringFlagLike) { +TEST_CASE_METHOD(TApp, "OneStringFlagLike", "[app]") { std::string str{"something"}; app.add_option("-s,--string", str)->expected(0, 1); args = {"--string"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_TRUE(str.empty()); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK(str.empty()); } -TEST_F(TApp, OneIntFlagLike) { +TEST_CASE_METHOD(TApp, "OneIntFlagLike", "[app]") { int val{0}; auto opt = app.add_option("-i", val)->expected(0, 1); args = {"-i"}; run(); - EXPECT_EQ(1u, app.count("-i")); + CHECK(app.count("-i") == 1u); opt->default_str("7"); run(); - EXPECT_EQ(val, 7); + CHECK(7 == val); opt->default_val(9); run(); - EXPECT_EQ(val, 9); + CHECK(9 == val); } -TEST_F(TApp, TogetherInt) { +TEST_CASE_METHOD(TApp, "TogetherInt", "[app]") { int i{0}; app.add_option("-i,--int", i); args = {"-i4"}; run(); - EXPECT_EQ(1u, app.count("--int")); - EXPECT_EQ(1u, app.count("-i")); - EXPECT_EQ(i, 4); - EXPECT_EQ(app["-i"]->as<std::string>(), "4"); - EXPECT_EQ(app["--int"]->as<double>(), 4.0); + CHECK(app.count("--int") == 1u); + CHECK(app.count("-i") == 1u); + CHECK(4 == i); + CHECK("4" == app["-i"]->as<std::string>()); + CHECK(4.0 == app["--int"]->as<double>()); } -TEST_F(TApp, SepInt) { +TEST_CASE_METHOD(TApp, "SepInt", "[app]") { int i{0}; app.add_option("-i,--int", i); args = {"-i", "4"}; run(); - EXPECT_EQ(1u, app.count("--int")); - EXPECT_EQ(1u, app.count("-i")); - EXPECT_EQ(i, 4); + CHECK(app.count("--int") == 1u); + CHECK(app.count("-i") == 1u); + CHECK(4 == i); } -TEST_F(TApp, DefaultStringAgain) { +TEST_CASE_METHOD(TApp, "DefaultStringAgain", "[app]") { std::string str = "previous"; app.add_option("-s,--string", str); run(); - EXPECT_EQ(0u, app.count("-s")); - EXPECT_EQ(0u, app.count("--string")); - EXPECT_EQ(str, "previous"); + CHECK(app.count("-s") == 0u); + CHECK(app.count("--string") == 0u); + CHECK("previous" == str); } -TEST_F(TApp, DefaultStringAgainEmpty) { +TEST_CASE_METHOD(TApp, "DefaultStringAgainEmpty", "[app]") { std::string str = "previous"; app.add_option("-s,--string", str); app.parse(" "); - EXPECT_EQ(0u, app.count("-s")); - EXPECT_EQ(0u, app.count("--string")); - EXPECT_EQ(str, "previous"); + CHECK(app.count("-s") == 0u); + CHECK(app.count("--string") == 0u); + CHECK("previous" == str); } -TEST_F(TApp, DualOptions) { +TEST_CASE_METHOD(TApp, "DualOptions", "[app]") { std::string str = "previous"; std::vector<std::string> vstr = {"previous"}; @@ -491,13 +489,13 @@ TEST_F(TApp, DualOptions) { args = {"--vector=one", "--vector=two"}; run(); - EXPECT_EQ(ans, vstr); + CHECK(vstr == ans); args = {"--string=one", "--string=two"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, LotsOfFlags) { +TEST_CASE_METHOD(TApp, "LotsOfFlags", "[app]") { app.add_flag("-a"); app.add_flag("-A"); @@ -505,66 +503,66 @@ TEST_F(TApp, LotsOfFlags) { args = {"-a", "-b", "-aA"}; run(); - EXPECT_EQ(2u, app.count("-a")); - EXPECT_EQ(1u, app.count("-b")); - EXPECT_EQ(1u, app.count("-A")); - EXPECT_EQ(app.count_all(), 4u); + CHECK(app.count("-a") == 2u); + CHECK(app.count("-b") == 1u); + CHECK(app.count("-A") == 1u); + CHECK(4u == app.count_all()); } -TEST_F(TApp, NumberFlags) { +TEST_CASE_METHOD(TApp, "NumberFlags", "[app]") { int val{0}; app.add_flag("-1{1},-2{2},-3{3},-4{4},-5{5},-6{6}, -7{7}, -8{8}, -9{9}", val); args = {"-7"}; run(); - EXPECT_EQ(1u, app.count("-1")); - EXPECT_EQ(val, 7); + CHECK(app.count("-1") == 1u); + CHECK(7 == val); } -TEST_F(TApp, DisableFlagOverrideTest) { +TEST_CASE_METHOD(TApp, "DisableFlagOverrideTest", "[app]") { int val{0}; auto opt = app.add_flag("--1{1},--2{2},--3{3},--4{4},--5{5},--6{6}, --7{7}, --8{8}, --9{9}", val); - EXPECT_FALSE(opt->get_disable_flag_override()); + CHECK(!opt->get_disable_flag_override()); opt->disable_flag_override(); args = {"--7=5"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); - EXPECT_TRUE(opt->get_disable_flag_override()); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); + CHECK(opt->get_disable_flag_override()); opt->disable_flag_override(false); - EXPECT_FALSE(opt->get_disable_flag_override()); - EXPECT_NO_THROW(run()); - EXPECT_EQ(val, 5); + CHECK(!opt->get_disable_flag_override()); + CHECK_NOTHROW(run()); + CHECK(5 == val); opt->disable_flag_override(); args = {"--7=7"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(TApp, LotsOfFlagsSingleString) { +TEST_CASE_METHOD(TApp, "LotsOfFlagsSingleString", "[app]") { app.add_flag("-a"); app.add_flag("-A"); app.add_flag("-b"); app.parse("-a -b -aA"); - EXPECT_EQ(2u, app.count("-a")); - EXPECT_EQ(1u, app.count("-b")); - EXPECT_EQ(1u, app.count("-A")); + CHECK(app.count("-a") == 2u); + CHECK(app.count("-b") == 1u); + CHECK(app.count("-A") == 1u); } -TEST_F(TApp, LotsOfFlagsSingleStringExtraSpace) { +TEST_CASE_METHOD(TApp, "LotsOfFlagsSingleStringExtraSpace", "[app]") { app.add_flag("-a"); app.add_flag("-A"); app.add_flag("-b"); app.parse(" -a -b -aA "); - EXPECT_EQ(2u, app.count("-a")); - EXPECT_EQ(1u, app.count("-b")); - EXPECT_EQ(1u, app.count("-A")); + CHECK(app.count("-a") == 2u); + CHECK(app.count("-b") == 1u); + CHECK(app.count("-A") == 1u); } -TEST_F(TApp, SingleArgVector) { +TEST_CASE_METHOD(TApp, "SingleArgVector", "[app]") { std::vector<std::string> channels; std::vector<std::string> iargs; @@ -574,65 +572,65 @@ TEST_F(TApp, SingleArgVector) { app.add_option("-p", path); app.parse("-c t1 -c t2 -c t3 a1 a2 a3 a4 -p happy"); - EXPECT_EQ(3u, channels.size()); - EXPECT_EQ(4u, iargs.size()); - EXPECT_EQ(path, "happy"); + CHECK(channels.size() == 3u); + CHECK(iargs.size() == 4u); + CHECK("happy" == path); app.parse("-c t1 a1 -c t2 -c t3 a2 a3 a4 -p happy"); - EXPECT_EQ(3u, channels.size()); - EXPECT_EQ(4u, iargs.size()); - EXPECT_EQ(path, "happy"); + CHECK(channels.size() == 3u); + CHECK(iargs.size() == 4u); + CHECK("happy" == path); } -TEST_F(TApp, FlagLikeOption) { +TEST_CASE_METHOD(TApp, "FlagLikeOption", "[app]") { bool val{false}; auto opt = app.add_option("--flag", val)->type_size(0)->default_str("true"); args = {"--flag"}; run(); - EXPECT_EQ(1u, app.count("--flag")); - EXPECT_TRUE(val); + CHECK(app.count("--flag") == 1u); + CHECK(val); val = false; opt->type_size(0, 0); // should be the same as above - EXPECT_EQ(opt->get_type_size_min(), 0); - EXPECT_EQ(opt->get_type_size_max(), 0); + CHECK(0 == opt->get_type_size_min()); + CHECK(0 == opt->get_type_size_max()); run(); - EXPECT_EQ(1u, app.count("--flag")); - EXPECT_TRUE(val); + CHECK(app.count("--flag") == 1u); + CHECK(val); } -TEST_F(TApp, FlagLikeIntOption) { +TEST_CASE_METHOD(TApp, "FlagLikeIntOption", "[app]") { int val{-47}; auto opt = app.add_option("--flag", val)->expected(0, 1); // normally some default value should be set, but this test is for some paths in the validators checks to skip // validation on empty string if nothing is expected opt->check(CLI::PositiveNumber); args = {"--flag"}; - EXPECT_TRUE(opt->as<std::string>().empty()); + CHECK(opt->as<std::string>().empty()); run(); - EXPECT_EQ(1u, app.count("--flag")); - EXPECT_NE(val, -47); + CHECK(app.count("--flag") == 1u); + CHECK(-47 != val); args = {"--flag", "12"}; run(); - EXPECT_EQ(val, 12); + CHECK(12 == val); args.clear(); run(); - EXPECT_TRUE(opt->as<std::string>().empty()); + CHECK(opt->as<std::string>().empty()); } -TEST_F(TApp, BoolOnlyFlag) { +TEST_CASE_METHOD(TApp, "BoolOnlyFlag", "[app]") { bool bflag{false}; app.add_flag("-b", bflag)->multi_option_policy(CLI::MultiOptionPolicy::Throw); args = {"-b"}; - ASSERT_NO_THROW(run()); - EXPECT_TRUE(bflag); + REQUIRE_NOTHROW(run()); + CHECK(bflag); args = {"-b", "-b"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, ShortOpts) { +TEST_CASE_METHOD(TApp, "ShortOpts", "[app]") { std::uint64_t funnyint{0}; std::string someopt; @@ -645,14 +643,14 @@ TEST_F(TApp, ShortOpts) { run(); - EXPECT_EQ(2u, app.count("-z")); - EXPECT_EQ(1u, app.count("-y")); - EXPECT_EQ(std::uint64_t{2}, funnyint); - EXPECT_EQ("zyz", someopt); - EXPECT_EQ(app.count_all(), 3u); + CHECK(app.count("-z") == 2u); + CHECK(app.count("-y") == 1u); + CHECK(funnyint == std::uint64_t{2}); + CHECK(someopt == "zyz"); + CHECK(3u == app.count_all()); } -TEST_F(TApp, TwoParamTemplateOpts) { +TEST_CASE_METHOD(TApp, "TwoParamTemplateOpts", "[app]") { double funnyint{0.0}; auto opt = app.add_option<double, unsigned int>("-y", funnyint); @@ -661,19 +659,19 @@ TEST_F(TApp, TwoParamTemplateOpts) { run(); - EXPECT_EQ(32.0, funnyint); + CHECK(funnyint == 32.0); args = {"-y", "32.3"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); args = {"-y", "-19"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); opt->capture_default_str(); - EXPECT_TRUE(opt->get_default_str().empty()); + CHECK(opt->get_default_str().empty()); } -TEST_F(TApp, DefaultOpts) { +TEST_CASE_METHOD(TApp, "DefaultOpts", "[app]") { int i{3}; std::string s = "HI"; @@ -685,13 +683,13 @@ TEST_F(TApp, DefaultOpts) { run(); - EXPECT_EQ(1u, app.count("i")); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(2, i); - EXPECT_EQ("9", s); + CHECK(app.count("i") == 1u); + CHECK(app.count("-s") == 1u); + CHECK(i == 2); + CHECK(s == "9"); } -TEST_F(TApp, TakeLastOpt) { +TEST_CASE_METHOD(TApp, "TakeLastOpt", "[app]") { std::string str; app.add_option("--str", str)->multi_option_policy(CLI::MultiOptionPolicy::TakeLast); @@ -700,10 +698,10 @@ TEST_F(TApp, TakeLastOpt) { run(); - EXPECT_EQ(str, "two"); + CHECK("two" == str); } -TEST_F(TApp, TakeLastOpt2) { +TEST_CASE_METHOD(TApp, "TakeLastOpt2", "[app]") { std::string str; app.add_option("--str", str)->take_last(); @@ -712,10 +710,10 @@ TEST_F(TApp, TakeLastOpt2) { run(); - EXPECT_EQ(str, "two"); + CHECK("two" == str); } -TEST_F(TApp, TakeFirstOpt) { +TEST_CASE_METHOD(TApp, "TakeFirstOpt", "[app]") { std::string str; app.add_option("--str", str)->multi_option_policy(CLI::MultiOptionPolicy::TakeFirst); @@ -724,10 +722,10 @@ TEST_F(TApp, TakeFirstOpt) { run(); - EXPECT_EQ(str, "one"); + CHECK("one" == str); } -TEST_F(TApp, TakeFirstOpt2) { +TEST_CASE_METHOD(TApp, "TakeFirstOpt2", "[app]") { std::string str; app.add_option("--str", str)->take_first(); @@ -736,10 +734,10 @@ TEST_F(TApp, TakeFirstOpt2) { run(); - EXPECT_EQ(str, "one"); + CHECK("one" == str); } -TEST_F(TApp, JoinOpt) { +TEST_CASE_METHOD(TApp, "JoinOpt", "[app]") { std::string str; app.add_option("--str", str)->multi_option_policy(CLI::MultiOptionPolicy::Join); @@ -748,10 +746,10 @@ TEST_F(TApp, JoinOpt) { run(); - EXPECT_EQ(str, "one\ntwo"); + CHECK("one\ntwo" == str); } -TEST_F(TApp, JoinOpt2) { +TEST_CASE_METHOD(TApp, "JoinOpt2", "[app]") { std::string str; app.add_option("--str", str)->join(); @@ -760,10 +758,10 @@ TEST_F(TApp, JoinOpt2) { run(); - EXPECT_EQ(str, "one\ntwo"); + CHECK("one\ntwo" == str); } -TEST_F(TApp, TakeLastOptMulti) { +TEST_CASE_METHOD(TApp, "TakeLastOptMulti", "[app]") { std::vector<int> vals; app.add_option("--long", vals)->expected(2)->take_last(); @@ -771,10 +769,10 @@ TEST_F(TApp, TakeLastOptMulti) { run(); - EXPECT_EQ(vals, std::vector<int>({2, 3})); + CHECK(std::vector<int>({2, 3}) == vals); } -TEST_F(TApp, TakeLastOptMulti_alternative_path) { +TEST_CASE_METHOD(TApp, "TakeLastOptMulti_alternative_path", "[app]") { std::vector<int> vals; app.add_option("--long", vals)->expected(2, -1)->take_last(); @@ -782,10 +780,10 @@ TEST_F(TApp, TakeLastOptMulti_alternative_path) { run(); - EXPECT_EQ(vals, std::vector<int>({2, 3})); + CHECK(std::vector<int>({2, 3}) == vals); } -TEST_F(TApp, TakeLastOptMultiCheck) { +TEST_CASE_METHOD(TApp, "TakeLastOptMultiCheck", "[app]") { std::vector<int> vals; auto opt = app.add_option("--long", vals)->expected(-2)->take_last(); @@ -793,12 +791,12 @@ TEST_F(TApp, TakeLastOptMultiCheck) { opt->check((!CLI::PositiveNumber).application_index(1)); args = {"--long", "-1", "2", "-3"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); - EXPECT_EQ(vals, std::vector<int>({2, -3})); + CHECK(std::vector<int>({2, -3}) == vals); } -TEST_F(TApp, TakeFirstOptMulti) { +TEST_CASE_METHOD(TApp, "TakeFirstOptMulti", "[app]") { std::vector<int> vals; app.add_option("--long", vals)->expected(2)->take_first(); @@ -806,10 +804,10 @@ TEST_F(TApp, TakeFirstOptMulti) { run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); } -TEST_F(TApp, ComplexOptMulti) { +TEST_CASE_METHOD(TApp, "ComplexOptMulti", "[app]") { std::complex<double> val; app.add_complex("--long", val)->take_first()->allow_extra_args(); @@ -817,35 +815,35 @@ TEST_F(TApp, ComplexOptMulti) { run(); - EXPECT_DOUBLE_EQ(val.real(), 1); - EXPECT_DOUBLE_EQ(val.imag(), 2); + CHECK(1 == Approx(val.real())); + CHECK(2 == Approx(val.imag())); } -TEST_F(TApp, MissingValueNonRequiredOpt) { +TEST_CASE_METHOD(TApp, "MissingValueNonRequiredOpt", "[app]") { int count{0}; app.add_option("-c,--count", count); args = {"-c"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"--count"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, MissingValueMoreThan) { +TEST_CASE_METHOD(TApp, "MissingValueMoreThan", "[app]") { std::vector<int> vals1; std::vector<int> vals2; app.add_option("-v", vals1)->expected(-2); app.add_option("--vals", vals2)->expected(-2); args = {"-v", "2"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"--vals", "4"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, NoMissingValueMoreThan) { +TEST_CASE_METHOD(TApp, "NoMissingValueMoreThan", "[app]") { std::vector<int> vals1; std::vector<int> vals2; app.add_option("-v", vals1)->expected(-2); @@ -853,104 +851,104 @@ TEST_F(TApp, NoMissingValueMoreThan) { args = {"-v", "2", "3", "4"}; run(); - EXPECT_EQ(vals1, std::vector<int>({2, 3, 4})); + CHECK(std::vector<int>({2, 3, 4}) == vals1); args = {"--vals", "2", "3", "4"}; run(); - EXPECT_EQ(vals2, std::vector<int>({2, 3, 4})); + CHECK(std::vector<int>({2, 3, 4}) == vals2); } -TEST_F(TApp, NotRequiredOptsSingle) { +TEST_CASE_METHOD(TApp, "NotRequiredOptsSingle", "[app]") { std::string str; app.add_option("--str", str); args = {"--str"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, NotRequiredOptsSingleShort) { +TEST_CASE_METHOD(TApp, "NotRequiredOptsSingleShort", "[app]") { std::string str; app.add_option("-s", str); args = {"-s"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, RequiredOptsSingle) { +TEST_CASE_METHOD(TApp, "RequiredOptsSingle", "[app]") { std::string str; app.add_option("--str", str)->required(); args = {"--str"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, RequiredOptsSingleShort) { +TEST_CASE_METHOD(TApp, "RequiredOptsSingleShort", "[app]") { std::string str; app.add_option("-s", str)->required(); args = {"-s"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, RequiredOptsDouble) { +TEST_CASE_METHOD(TApp, "RequiredOptsDouble", "[app]") { std::vector<std::string> strs; app.add_option("--str", strs)->required()->expected(2); args = {"--str", "one"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"--str", "one", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); + CHECK(std::vector<std::string>({"one", "two"}) == strs); } -TEST_F(TApp, RequiredOptsDoubleShort) { +TEST_CASE_METHOD(TApp, "RequiredOptsDoubleShort", "[app]") { std::vector<std::string> strs; app.add_option("-s", strs)->required()->expected(2); args = {"-s", "one"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"-s", "one", "-s", "one", "-s", "one"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, RequiredOptsDoubleNeg) { +TEST_CASE_METHOD(TApp, "RequiredOptsDoubleNeg", "[app]") { std::vector<std::string> strs; app.add_option("-s", strs)->required()->expected(-2); args = {"-s", "one"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"-s", "one", "two", "-s", "three"}; - ASSERT_NO_THROW(run()); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two", "three"})); + REQUIRE_NOTHROW(run()); + CHECK(std::vector<std::string>({"one", "two", "three"}) == strs); args = {"-s", "one", "two"}; - ASSERT_NO_THROW(run()); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); + REQUIRE_NOTHROW(run()); + CHECK(std::vector<std::string>({"one", "two"}) == strs); } // This makes sure unlimited option priority is // correct for space vs. no space #90 -TEST_F(TApp, PositionalNoSpace) { +TEST_CASE_METHOD(TApp, "PositionalNoSpace", "[app]") { std::vector<std::string> options; std::string foo, bar; @@ -961,37 +959,37 @@ TEST_F(TApp, PositionalNoSpace) { args = {"-O", "Test", "param1", "param2"}; run(); - EXPECT_EQ(options.size(), 1u); - EXPECT_EQ(options.at(0), "Test"); + CHECK(1u == options.size()); + CHECK("Test" == options.at(0)); args = {"-OTest", "param1", "param2"}; run(); - EXPECT_EQ(options.size(), 1u); - EXPECT_EQ(options.at(0), "Test"); + CHECK(1u == options.size()); + CHECK("Test" == options.at(0)); } // Tests positionals at end -TEST_F(TApp, PositionalAtEnd) { +TEST_CASE_METHOD(TApp, "PositionalAtEnd", "[app]") { std::string options; std::string foo; app.add_option("-O", options); app.add_option("foo", foo); app.positionals_at_end(); - EXPECT_TRUE(app.get_positionals_at_end()); + CHECK(app.get_positionals_at_end()); args = {"-O", "Test", "param1"}; run(); - EXPECT_EQ(options, "Test"); - EXPECT_EQ(foo, "param1"); + CHECK("Test" == options); + CHECK("param1" == foo); args = {"param2", "-O", "Test"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } // Tests positionals at end -TEST_F(TApp, RequiredPositionals) { +TEST_CASE_METHOD(TApp, "RequiredPositionals", "[app]") { std::vector<std::string> sources; std::string dest; app.add_option("src", sources); @@ -1001,18 +999,18 @@ TEST_F(TApp, RequiredPositionals) { args = {"1", "2", "3"}; run(); - EXPECT_EQ(sources.size(), 2u); - EXPECT_EQ(dest, "3"); + CHECK(2u == sources.size()); + CHECK("3" == dest); args = {"a"}; sources.clear(); run(); - EXPECT_EQ(sources.size(), 0u); - EXPECT_EQ(dest, "a"); + CHECK(0u == sources.size()); + CHECK("a" == dest); } -TEST_F(TApp, RequiredPositionalVector) { +TEST_CASE_METHOD(TApp, "RequiredPositionalVector", "[app]") { std::string d1; std::string d2; std::string d3; @@ -1028,19 +1026,19 @@ TEST_F(TApp, RequiredPositionalVector) { args = {"1", "2", "3"}; run(); - EXPECT_EQ(sources.size(), 1u); - EXPECT_EQ(d1, "1"); - EXPECT_EQ(d2, "2"); - EXPECT_TRUE(d3.empty()); + CHECK(1u == sources.size()); + CHECK("1" == d1); + CHECK("2" == d2); + CHECK(d3.empty()); args = {"a"}; sources.clear(); run(); - EXPECT_EQ(sources.size(), 1u); + CHECK(1u == sources.size()); } // Tests positionals at end -TEST_F(TApp, RequiredPositionalValidation) { +TEST_CASE_METHOD(TApp, "RequiredPositionalValidation", "[app]") { std::vector<std::string> sources; int dest; // required std::string d2; @@ -1052,13 +1050,13 @@ TEST_F(TApp, RequiredPositionalValidation) { args = {"1", "2", "string", "3"}; run(); - EXPECT_EQ(sources.size(), 2u); - EXPECT_EQ(dest, 3); - EXPECT_EQ(d2, "string"); + CHECK(2u == sources.size()); + CHECK(3 == dest); + CHECK("string" == d2); } // Tests positionals at end -TEST_F(TApp, PositionalValidation) { +TEST_CASE_METHOD(TApp, "PositionalValidation", "[app]") { std::string options; std::string foo; @@ -1069,19 +1067,19 @@ TEST_F(TApp, PositionalValidation) { args = {"1", "param1"}; run(); - EXPECT_EQ(options, "1"); - EXPECT_EQ(foo, "param1"); + CHECK("1" == options); + CHECK("param1" == foo); args = {"param1", "1"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); - EXPECT_EQ(options, "1"); - EXPECT_EQ(foo, "param1"); + CHECK("1" == options); + CHECK("param1" == foo); - EXPECT_NE(app.get_option("bar")->get_validator("valbar"), nullptr); + CHECK(nullptr != app.get_option("bar")->get_validator("valbar")); } -TEST_F(TApp, PositionalNoSpaceLong) { +TEST_CASE_METHOD(TApp, "PositionalNoSpaceLong", "[app]") { std::vector<std::string> options; std::string foo, bar; @@ -1092,107 +1090,107 @@ TEST_F(TApp, PositionalNoSpaceLong) { args = {"--option", "Test", "param1", "param2"}; run(); - EXPECT_EQ(options.size(), 1u); - EXPECT_EQ(options.at(0), "Test"); + CHECK(1u == options.size()); + CHECK("Test" == options.at(0)); args = {"--option=Test", "param1", "param2"}; run(); - EXPECT_EQ(options.size(), 1u); - EXPECT_EQ(options.at(0), "Test"); + CHECK(1u == options.size()); + CHECK("Test" == options.at(0)); } -TEST_F(TApp, RequiredOptsUnlimited) { +TEST_CASE_METHOD(TApp, "RequiredOptsUnlimited", "[app]") { std::vector<std::string> strs; app.add_option("--str", strs)->required(); args = {"--str"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"--str", "one", "--str", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); + CHECK(std::vector<std::string>({"one", "two"}) == strs); args = {"--str", "one", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); + CHECK(std::vector<std::string>({"one", "two"}) == strs); // It's better to feed a hungry option than to feed allow_extras app.allow_extras(); run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); - EXPECT_EQ(app.remaining(), std::vector<std::string>({})); + CHECK(std::vector<std::string>({"one", "two"}) == strs); + CHECK(std::vector<std::string>({}) == app.remaining()); app.allow_extras(false); std::vector<std::string> remain; auto popt = app.add_option("positional", remain); run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); - EXPECT_EQ(remain, std::vector<std::string>()); + CHECK(std::vector<std::string>({"one", "two"}) == strs); + CHECK(std::vector<std::string>() == remain); args = {"--str", "one", "--", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one"})); - EXPECT_EQ(remain, std::vector<std::string>({"two"})); + CHECK(std::vector<std::string>({"one"}) == strs); + CHECK(std::vector<std::string>({"two"}) == remain); args = {"one", "--str", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"two"})); - EXPECT_EQ(remain, std::vector<std::string>({"one"})); + CHECK(std::vector<std::string>({"two"}) == strs); + CHECK(std::vector<std::string>({"one"}) == remain); args = {"--str", "one", "two"}; popt->required(); run(); - EXPECT_EQ(strs, std::vector<std::string>({"one"})); - EXPECT_EQ(remain, std::vector<std::string>({"two"})); + CHECK(std::vector<std::string>({"one"}) == strs); + CHECK(std::vector<std::string>({"two"}) == remain); } -TEST_F(TApp, RequiredOptsUnlimitedShort) { +TEST_CASE_METHOD(TApp, "RequiredOptsUnlimitedShort", "[app]") { std::vector<std::string> strs; app.add_option("-s", strs)->required(); args = {"-s"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"-s", "one", "-s", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); + CHECK(std::vector<std::string>({"one", "two"}) == strs); args = {"-s", "one", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); + CHECK(std::vector<std::string>({"one", "two"}) == strs); // It's better to feed a hungry option than to feed allow_extras app.allow_extras(); run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); - EXPECT_EQ(app.remaining(), std::vector<std::string>({})); + CHECK(std::vector<std::string>({"one", "two"}) == strs); + CHECK(std::vector<std::string>({}) == app.remaining()); app.allow_extras(false); std::vector<std::string> remain; app.add_option("positional", remain); run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "two"})); - EXPECT_EQ(remain, std::vector<std::string>()); + CHECK(std::vector<std::string>({"one", "two"}) == strs); + CHECK(std::vector<std::string>() == remain); args = {"-s", "one", "--", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one"})); - EXPECT_EQ(remain, std::vector<std::string>({"two"})); + CHECK(std::vector<std::string>({"one"}) == strs); + CHECK(std::vector<std::string>({"two"}) == remain); args = {"one", "-s", "two"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"two"})); - EXPECT_EQ(remain, std::vector<std::string>({"one"})); + CHECK(std::vector<std::string>({"two"}) == strs); + CHECK(std::vector<std::string>({"one"}) == remain); } -TEST_F(TApp, OptsUnlimitedEnd) { +TEST_CASE_METHOD(TApp, "OptsUnlimitedEnd", "[app]") { std::vector<std::string> strs; app.add_option("-s,--str", strs); app.allow_extras(); @@ -1201,11 +1199,11 @@ TEST_F(TApp, OptsUnlimitedEnd) { run(); - EXPECT_EQ(strs, std::vector<std::string>({"two", "three"})); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"one", "four"})); + CHECK(std::vector<std::string>({"two", "three"}) == strs); + CHECK(std::vector<std::string>({"one", "four"}) == app.remaining()); } -TEST_F(TApp, RequireOptPriority) { +TEST_CASE_METHOD(TApp, "RequireOptPriority", "[app]") { std::vector<std::string> strs; app.add_option("--str", strs); @@ -1215,17 +1213,17 @@ TEST_F(TApp, RequireOptPriority) { args = {"--str", "one", "two", "three"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one"})); - EXPECT_EQ(remain, std::vector<std::string>({"two", "three"})); + CHECK(std::vector<std::string>({"one"}) == strs); + CHECK(std::vector<std::string>({"two", "three"}) == remain); args = {"two", "three", "--str", "one", "four"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "four"})); - EXPECT_EQ(remain, std::vector<std::string>({"two", "three"})); + CHECK(std::vector<std::string>({"one", "four"}) == strs); + CHECK(std::vector<std::string>({"two", "three"}) == remain); } -TEST_F(TApp, RequireOptPriorityShort) { +TEST_CASE_METHOD(TApp, "RequireOptPriorityShort", "[app]") { std::vector<std::string> strs; app.add_option("-s", strs)->required(); @@ -1235,53 +1233,53 @@ TEST_F(TApp, RequireOptPriorityShort) { args = {"-s", "one", "two", "three"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one"})); - EXPECT_EQ(remain, std::vector<std::string>({"two", "three"})); + CHECK(std::vector<std::string>({"one"}) == strs); + CHECK(std::vector<std::string>({"two", "three"}) == remain); args = {"two", "three", "-s", "one", "four"}; run(); - EXPECT_EQ(strs, std::vector<std::string>({"one", "four"})); - EXPECT_EQ(remain, std::vector<std::string>({"two", "three"})); + CHECK(std::vector<std::string>({"one", "four"}) == strs); + CHECK(std::vector<std::string>({"two", "three"}) == remain); } -TEST_F(TApp, NotRequiredExpectedDouble) { +TEST_CASE_METHOD(TApp, "NotRequiredExpectedDouble", "[app]") { std::vector<std::string> strs; app.add_option("--str", strs)->expected(2); args = {"--str", "one"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, NotRequiredExpectedDoubleShort) { +TEST_CASE_METHOD(TApp, "NotRequiredExpectedDoubleShort", "[app]") { std::vector<std::string> strs; app.add_option("-s", strs)->expected(2); args = {"-s", "one"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, RequiredFlags) { +TEST_CASE_METHOD(TApp, "RequiredFlags", "[app]") { app.add_flag("-a")->required(); app.add_flag("-b")->mandatory(); // Alternate term - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"-a"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"-b"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"-a", "-b"}; run(); } -TEST_F(TApp, CallbackFlags) { +TEST_CASE_METHOD(TApp, "CallbackFlags", "[app]") { std::int64_t value{0}; @@ -1290,20 +1288,20 @@ TEST_F(TApp, CallbackFlags) { app.add_flag_function("-v", func); run(); - EXPECT_EQ(value, 0u); + CHECK(0u == value); args = {"-v"}; run(); - EXPECT_EQ(value, 1u); + CHECK(1u == value); args = {"-vv"}; run(); - EXPECT_EQ(value, 2u); + CHECK(2u == value); - EXPECT_THROW(app.add_flag_function("hi", func), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_flag_function("hi", func), CLI::IncorrectConstruction); } -TEST_F(TApp, CallbackFlagsFalse) { +TEST_CASE_METHOD(TApp, "CallbackFlagsFalse", "[app]") { std::int64_t value = 0; auto func = [&value](std::int64_t x) { value = x; }; @@ -1311,28 +1309,28 @@ TEST_F(TApp, CallbackFlagsFalse) { app.add_flag_function("-v,-f{false},--val,--fval{false}", func); run(); - EXPECT_EQ(value, 0); + CHECK(0 == value); args = {"-f"}; run(); - EXPECT_EQ(value, -1); + CHECK(-1 == value); args = {"-vfv"}; run(); - EXPECT_EQ(value, 1); + CHECK(1 == value); args = {"--fval"}; run(); - EXPECT_EQ(value, -1); + CHECK(-1 == value); args = {"--fval=2"}; run(); - EXPECT_EQ(value, -2); + CHECK(-2 == value); - EXPECT_THROW(app.add_flag_function("hi", func), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_flag_function("hi", func), CLI::IncorrectConstruction); } -TEST_F(TApp, CallbackFlagsFalseShortcut) { +TEST_CASE_METHOD(TApp, "CallbackFlagsFalseShortcut", "[app]") { std::int64_t value = 0; auto func = [&value](std::int64_t x) { value = x; }; @@ -1340,29 +1338,29 @@ TEST_F(TApp, CallbackFlagsFalseShortcut) { app.add_flag_function("-v,!-f,--val,!--fval", func); run(); - EXPECT_EQ(value, 0); + CHECK(0 == value); args = {"-f"}; run(); - EXPECT_EQ(value, -1); + CHECK(-1 == value); args = {"-vfv"}; run(); - EXPECT_EQ(value, 1); + CHECK(1 == value); args = {"--fval"}; run(); - EXPECT_EQ(value, -1); + CHECK(-1 == value); args = {"--fval=2"}; run(); - EXPECT_EQ(value, -2); + CHECK(-2 == value); - EXPECT_THROW(app.add_flag_function("hi", func), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_flag_function("hi", func), CLI::IncorrectConstruction); } #if __cplusplus >= 201402L || _MSC_VER >= 1900 -TEST_F(TApp, CallbackFlagsAuto) { +TEST_CASE_METHOD(TApp, "CallbackFlagsAuto", "[app]") { std::int64_t value{0}; @@ -1371,21 +1369,21 @@ TEST_F(TApp, CallbackFlagsAuto) { app.add_flag("-v", func); run(); - EXPECT_EQ(value, 0u); + CHECK(0u == value); args = {"-v"}; run(); - EXPECT_EQ(value, 1u); + CHECK(1u == value); args = {"-vv"}; run(); - EXPECT_EQ(value, 2u); + CHECK(2u == value); - EXPECT_THROW(app.add_flag("hi", func), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_flag("hi", func), CLI::IncorrectConstruction); } #endif -TEST_F(TApp, Positionals) { +TEST_CASE_METHOD(TApp, "Positionals", "[app]") { std::string posit1; std::string posit2; @@ -1396,13 +1394,13 @@ TEST_F(TApp, Positionals) { run(); - EXPECT_EQ(1u, app.count("posit1")); - EXPECT_EQ(1u, app.count("posit2")); - EXPECT_EQ("thing1", posit1); - EXPECT_EQ("thing2", posit2); + CHECK(app.count("posit1") == 1u); + CHECK(app.count("posit2") == 1u); + CHECK(posit1 == "thing1"); + CHECK(posit2 == "thing2"); } -TEST_F(TApp, ForcedPositional) { +TEST_CASE_METHOD(TApp, "ForcedPositional", "[app]") { std::vector<std::string> posit; auto one = app.add_flag("--one"); app.add_option("posit", posit); @@ -1410,18 +1408,18 @@ TEST_F(TApp, ForcedPositional) { args = {"--one", "two", "three"}; run(); std::vector<std::string> answers1 = {"two", "three"}; - EXPECT_TRUE(one->count()); - EXPECT_EQ(answers1, posit); + CHECK(one->count()); + CHECK(posit == answers1); args = {"--", "--one", "two", "three"}; std::vector<std::string> answers2 = {"--one", "two", "three"}; run(); - EXPECT_FALSE(one->count()); - EXPECT_EQ(answers2, posit); + CHECK(!one->count()); + CHECK(posit == answers2); } -TEST_F(TApp, MixedPositionals) { +TEST_CASE_METHOD(TApp, "MixedPositionals", "[app]") { int positional_int{0}; std::string positional_string; @@ -1432,28 +1430,28 @@ TEST_F(TApp, MixedPositionals) { run(); - EXPECT_EQ(1u, app.count("posit2")); - EXPECT_EQ(1u, app.count("--posit1")); - EXPECT_EQ(7, positional_int); - EXPECT_EQ("thing2", positional_string); + CHECK(app.count("posit2") == 1u); + CHECK(app.count("--posit1") == 1u); + CHECK(positional_int == 7); + CHECK(positional_string == "thing2"); } -TEST_F(TApp, BigPositional) { +TEST_CASE_METHOD(TApp, "BigPositional", "[app]") { std::vector<std::string> vec; app.add_option("pos", vec); args = {"one"}; run(); - EXPECT_EQ(args, vec); + CHECK(vec == args); args = {"one", "two"}; run(); - EXPECT_EQ(args, vec); + CHECK(vec == args); } -TEST_F(TApp, Reset) { +TEST_CASE_METHOD(TApp, "Reset", "[app]") { app.add_flag("--simple"); double doub{0.0}; @@ -1463,139 +1461,139 @@ TEST_F(TApp, Reset) { run(); - EXPECT_EQ(1u, app.count("--simple")); - EXPECT_EQ(1u, app.count("-d")); - EXPECT_DOUBLE_EQ(1.2, doub); + CHECK(app.count("--simple") == 1u); + CHECK(app.count("-d") == 1u); + CHECK(doub == Approx(1.2)); app.clear(); - EXPECT_EQ(0u, app.count("--simple")); - EXPECT_EQ(0u, app.count("-d")); + CHECK(app.count("--simple") == 0u); + CHECK(app.count("-d") == 0u); run(); - EXPECT_EQ(1u, app.count("--simple")); - EXPECT_EQ(1u, app.count("-d")); - EXPECT_DOUBLE_EQ(1.2, doub); + CHECK(app.count("--simple") == 1u); + CHECK(app.count("-d") == 1u); + CHECK(doub == Approx(1.2)); } -TEST_F(TApp, RemoveOption) { +TEST_CASE_METHOD(TApp, "RemoveOption", "[app]") { app.add_flag("--one"); auto opt = app.add_flag("--two"); - EXPECT_TRUE(app.remove_option(opt)); - EXPECT_FALSE(app.remove_option(opt)); + CHECK(app.remove_option(opt)); + CHECK(!app.remove_option(opt)); args = {"--two"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(TApp, RemoveNeedsLinks) { +TEST_CASE_METHOD(TApp, "RemoveNeedsLinks", "[app]") { auto one = app.add_flag("--one"); auto two = app.add_flag("--two"); two->needs(one); one->needs(two); - EXPECT_TRUE(app.remove_option(one)); + CHECK(app.remove_option(one)); args = {"--two"}; run(); } -TEST_F(TApp, RemoveExcludesLinks) { +TEST_CASE_METHOD(TApp, "RemoveExcludesLinks", "[app]") { auto one = app.add_flag("--one"); auto two = app.add_flag("--two"); two->excludes(one); one->excludes(two); - EXPECT_TRUE(app.remove_option(one)); + CHECK(app.remove_option(one)); args = {"--two"}; run(); // Mostly hoping it does not crash } -TEST_F(TApp, FileNotExists) { +TEST_CASE_METHOD(TApp, "FileNotExists", "[app]") { std::string myfile{"TestNonFileNotUsed.txt"}; - ASSERT_NO_THROW(CLI::NonexistentPath(myfile)); + REQUIRE_NOTHROW(CLI::NonexistentPath(myfile)); std::string filename; auto opt = app.add_option("--file", filename)->check(CLI::NonexistentPath, "path_check"); args = {"--file", myfile}; run(); - EXPECT_EQ(myfile, filename); + CHECK(filename == myfile); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); - EXPECT_THROW(run(), CLI::ValidationError); + CHECK(ok); + CHECK_THROWS_AS(run(), CLI::ValidationError); // deactivate the check, so it should run now opt->get_validator("path_check")->active(false); - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); std::remove(myfile.c_str()); - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK(!CLI::ExistingFile(myfile).empty()); } -TEST_F(TApp, FileExists) { +TEST_CASE_METHOD(TApp, "FileExists", "[app]") { std::string myfile{"TestNonFileNotUsed.txt"}; - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK(!CLI::ExistingFile(myfile).empty()); std::string filename = "Failed"; app.add_option("--file", filename)->check(CLI::ExistingFile); args = {"--file", myfile}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); + CHECK(ok); run(); - EXPECT_EQ(myfile, filename); + CHECK(filename == myfile); std::remove(myfile.c_str()); - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK(!CLI::ExistingFile(myfile).empty()); } -TEST_F(TApp, NotFileExists) { +TEST_CASE_METHOD(TApp, "NotFileExists", "[app]") { std::string myfile{"TestNonFileNotUsed.txt"}; - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK(!CLI::ExistingFile(myfile).empty()); std::string filename = "Failed"; app.add_option("--file", filename)->check(!CLI::ExistingFile); args = {"--file", myfile}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); - EXPECT_THROW(run(), CLI::ValidationError); + CHECK(ok); + CHECK_THROWS_AS(run(), CLI::ValidationError); std::remove(myfile.c_str()); - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK(!CLI::ExistingFile(myfile).empty()); } -TEST_F(TApp, DefaultedResult) { +TEST_CASE_METHOD(TApp, "DefaultedResult", "[app]") { std::string sval = "NA"; int ival{0}; auto opts = app.add_option("--string", sval)->capture_default_str(); auto optv = app.add_option("--val", ival); args = {}; run(); - EXPECT_EQ(sval, "NA"); + CHECK("NA" == sval); std::string nString; opts->results(nString); - EXPECT_EQ(nString, "NA"); + CHECK("NA" == nString); int newIval; - // EXPECT_THROW(optv->results(newIval), CLI::ConversionError); + // CHECK_THROWS_AS (optv->results(newIval), CLI::ConversionError); optv->default_str("442"); optv->results(newIval); - EXPECT_EQ(newIval, 442); + CHECK(442 == newIval); } -TEST_F(TApp, OriginalOrder) { +TEST_CASE_METHOD(TApp, "OriginalOrder", "[app]") { std::vector<int> st1; CLI::Option *op1 = app.add_option("-a", st1); std::vector<int> st2; @@ -1605,13 +1603,13 @@ TEST_F(TApp, OriginalOrder) { run(); - EXPECT_EQ(st1, std::vector<int>({1, 3, 4})); - EXPECT_EQ(st2, std::vector<int>({2})); + CHECK(std::vector<int>({1, 3, 4}) == st1); + CHECK(std::vector<int>({2}) == st2); - EXPECT_EQ(app.parse_order(), std::vector<CLI::Option *>({op1, op2, op1, op1})); + CHECK(std::vector<CLI::Option *>({op1, op2, op1, op1}) == app.parse_order()); } -TEST_F(TApp, NeedsFlags) { +TEST_CASE_METHOD(TApp, "NeedsFlags", "[app]") { CLI::Option *opt = app.add_flag("-s,--string"); app.add_flag("--both")->needs(opt); @@ -1624,12 +1622,12 @@ TEST_F(TApp, NeedsFlags) { run(); args = {"--both"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); - EXPECT_NO_THROW(opt->needs(opt)); + CHECK_NOTHROW(opt->needs(opt)); } -TEST_F(TApp, ExcludesFlags) { +TEST_CASE_METHOD(TApp, "ExcludesFlags", "[app]") { CLI::Option *opt = app.add_flag("-s,--string"); app.add_flag("--nostr")->excludes(opt); @@ -1642,15 +1640,15 @@ TEST_F(TApp, ExcludesFlags) { run(); args = {"--nostr", "-s"}; - EXPECT_THROW(run(), CLI::ExcludesError); + CHECK_THROWS_AS(run(), CLI::ExcludesError); args = {"--string", "--nostr"}; - EXPECT_THROW(run(), CLI::ExcludesError); + CHECK_THROWS_AS(run(), CLI::ExcludesError); - EXPECT_THROW(opt->excludes(opt), CLI::IncorrectConstruction); + CHECK_THROWS_AS(opt->excludes(opt), CLI::IncorrectConstruction); } -TEST_F(TApp, ExcludesMixedFlags) { +TEST_CASE_METHOD(TApp, "ExcludesMixedFlags", "[app]") { CLI::Option *opt1 = app.add_flag("--opt1"); app.add_flag("--opt2"); CLI::Option *opt3 = app.add_flag("--opt3"); @@ -1665,13 +1663,13 @@ TEST_F(TApp, ExcludesMixedFlags) { run(); args = {"--no", "--opt1"}; - EXPECT_THROW(run(), CLI::ExcludesError); + CHECK_THROWS_AS(run(), CLI::ExcludesError); args = {"--no", "--opt2"}; - EXPECT_THROW(run(), CLI::ExcludesError); + CHECK_THROWS_AS(run(), CLI::ExcludesError); } -TEST_F(TApp, NeedsMultiFlags) { +TEST_CASE_METHOD(TApp, "NeedsMultiFlags", "[app]") { CLI::Option *opt1 = app.add_flag("--opt1"); CLI::Option *opt2 = app.add_flag("--opt2"); CLI::Option *opt3 = app.add_flag("--opt3"); @@ -1686,19 +1684,19 @@ TEST_F(TApp, NeedsMultiFlags) { run(); args = {"--optall"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--optall", "--opt1"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--optall", "--opt2", "--opt1"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--optall", "--opt1", "--opt2", "--opt3"}; run(); } -TEST_F(TApp, NeedsMixedFlags) { +TEST_CASE_METHOD(TApp, "NeedsMixedFlags", "[app]") { CLI::Option *opt1 = app.add_flag("--opt1"); app.add_flag("--opt2"); app.add_flag("--opt3"); @@ -1713,19 +1711,19 @@ TEST_F(TApp, NeedsMixedFlags) { run(); args = {"--optall"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--optall", "--opt1"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--optall", "--opt2", "--opt1"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--optall", "--opt1", "--opt2", "--opt3"}; run(); } -TEST_F(TApp, NeedsChainedFlags) { +TEST_CASE_METHOD(TApp, "NeedsChainedFlags", "[app]") { CLI::Option *opt1 = app.add_flag("--opt1"); CLI::Option *opt2 = app.add_flag("--opt2")->needs(opt1); app.add_flag("--opt3")->needs(opt2); @@ -1736,16 +1734,16 @@ TEST_F(TApp, NeedsChainedFlags) { run(); args = {"--opt2"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--opt3"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--opt3", "--opt2"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--opt3", "--opt1"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--opt2", "--opt1"}; run(); @@ -1754,7 +1752,7 @@ TEST_F(TApp, NeedsChainedFlags) { run(); } -TEST_F(TApp, Env) { +TEST_CASE_METHOD(TApp, "Env", "[app]") { put_env("CLI11_TEST_ENV_TMP", "2"); @@ -1763,18 +1761,18 @@ TEST_F(TApp, Env) { run(); - EXPECT_EQ(2, val); - EXPECT_EQ(1u, vopt->count()); + CHECK(val == 2); + CHECK(vopt->count() == 1u); vopt->required(); run(); unset_env("CLI11_TEST_ENV_TMP"); - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } // curiously check if an environmental only option works -TEST_F(TApp, EnvOnly) { +TEST_CASE_METHOD(TApp, "EnvOnly", "[app]") { put_env("CLI11_TEST_ENV_TMP", "2"); @@ -1783,25 +1781,25 @@ TEST_F(TApp, EnvOnly) { run(); - EXPECT_EQ(2, val); - EXPECT_EQ(1u, vopt->count()); + CHECK(val == 2); + CHECK(vopt->count() == 1u); vopt->required(); run(); unset_env("CLI11_TEST_ENV_TMP"); - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } -TEST_F(TApp, RangeInt) { +TEST_CASE_METHOD(TApp, "RangeInt", "[app]") { int x{0}; app.add_option("--one", x)->check(CLI::Range(3, 6)); args = {"--one=1"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--one=7"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--one=3"}; run(); @@ -1813,17 +1811,17 @@ TEST_F(TApp, RangeInt) { run(); } -TEST_F(TApp, RangeDouble) { +TEST_CASE_METHOD(TApp, "RangeDouble", "[app]") { double x{0.0}; /// Note that this must be a double in Range, too app.add_option("--one", x)->check(CLI::Range(3.0, 6.0)); args = {"--one=1"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--one=7"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--one=3"}; run(); @@ -1835,26 +1833,26 @@ TEST_F(TApp, RangeDouble) { run(); } -TEST_F(TApp, typeCheck) { +TEST_CASE_METHOD(TApp, "typeCheck", "[app]") { /// Note that this must be a double in Range, too app.add_option("--one")->check(CLI::TypeValidator<unsigned int>()); args = {"--one=1"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"--one=-7"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--one=error"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--one=4.568"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } // Check to make sure programmatic access to left over is available -TEST_F(TApp, AllowExtras) { +TEST_CASE_METHOD(TApp, "AllowExtras", "[app]") { app.allow_extras(); @@ -1863,32 +1861,32 @@ TEST_F(TApp, AllowExtras) { args = {"-x", "-f"}; - ASSERT_NO_THROW(run()); - EXPECT_TRUE(val); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"-x"})); + REQUIRE_NOTHROW(run()); + CHECK(val); + CHECK(std::vector<std::string>({"-x"}) == app.remaining()); } -TEST_F(TApp, AllowExtrasOrder) { +TEST_CASE_METHOD(TApp, "AllowExtrasOrder", "[app]") { app.allow_extras(); args = {"-x", "-f"}; - ASSERT_NO_THROW(run()); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"-x", "-f"})); + REQUIRE_NOTHROW(run()); + CHECK(std::vector<std::string>({"-x", "-f"}) == app.remaining()); std::vector<std::string> left_over = app.remaining(); app.parse(left_over); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"-f", "-x"})); - EXPECT_EQ(app.remaining_for_passthrough(), left_over); + CHECK(std::vector<std::string>({"-f", "-x"}) == app.remaining()); + CHECK(left_over == app.remaining_for_passthrough()); } -TEST_F(TApp, AllowExtrasCascade) { +TEST_CASE_METHOD(TApp, "AllowExtrasCascade", "[app]") { app.allow_extras(); args = {"-x", "45", "-f", "27"}; - ASSERT_NO_THROW(run()); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"-x", "45", "-f", "27"})); + REQUIRE_NOTHROW(run()); + CHECK(std::vector<std::string>({"-x", "45", "-f", "27"}) == app.remaining()); std::vector<std::string> left_over = app.remaining_for_passthrough(); @@ -1899,23 +1897,23 @@ TEST_F(TApp, AllowExtrasCascade) { capp.add_option("-f", v2); capp.parse(left_over); - EXPECT_EQ(v1, 45); - EXPECT_EQ(v2, 27); + CHECK(45 == v1); + CHECK(27 == v2); } // makes sure the error throws on the rValue version of the parse -TEST_F(TApp, ExtrasErrorRvalueParse) { +TEST_CASE_METHOD(TApp, "ExtrasErrorRvalueParse", "[app]") { args = {"-x", "45", "-f", "27"}; - EXPECT_THROW(app.parse(std::vector<std::string>({"-x", "45", "-f", "27"})), CLI::ExtrasError); + CHECK_THROWS_AS(app.parse(std::vector<std::string>({"-x", "45", "-f", "27"})), CLI::ExtrasError); } -TEST_F(TApp, AllowExtrasCascadeDirect) { +TEST_CASE_METHOD(TApp, "AllowExtrasCascadeDirect", "[app]") { app.allow_extras(); args = {"-x", "45", "-f", "27"}; - ASSERT_NO_THROW(run()); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"-x", "45", "-f", "27"})); + REQUIRE_NOTHROW(run()); + CHECK(std::vector<std::string>({"-x", "45", "-f", "27"}) == app.remaining()); CLI::App capp{"cascade_program"}; int v1{0}; @@ -1924,11 +1922,11 @@ TEST_F(TApp, AllowExtrasCascadeDirect) { capp.add_option("-f", v2); capp.parse(app.remaining_for_passthrough()); - EXPECT_EQ(v1, 45); - EXPECT_EQ(v2, 27); + CHECK(45 == v1); + CHECK(27 == v2); } -TEST_F(TApp, AllowExtrasArgModify) { +TEST_CASE_METHOD(TApp, "AllowExtrasArgModify", "[app]") { int v1{0}; int v2{0}; @@ -1937,88 +1935,89 @@ TEST_F(TApp, AllowExtrasArgModify) { args = {"27", "-f", "45", "-x"}; auto cargs = args; app.parse(args); - EXPECT_EQ(args, std::vector<std::string>({"45", "-x"})); + CHECK(std::vector<std::string>({"45", "-x"}) == args); CLI::App capp{"cascade_program"}; capp.add_option("-x", v1); capp.parse(args); - EXPECT_EQ(v1, 45); - EXPECT_EQ(v2, 27); + CHECK(45 == v1); + CHECK(27 == v2); } // Test horrible error -TEST_F(TApp, CheckShortFail) { +TEST_CASE_METHOD(TApp, "CheckShortFail", "[app]") { args = {"--two"}; - EXPECT_THROW(CLI::detail::AppFriend::parse_arg(&app, args, CLI::detail::Classifier::SHORT), CLI::HorribleError); + CHECK_THROWS_AS(CLI::detail::AppFriend::parse_arg(&app, args, CLI::detail::Classifier::SHORT), CLI::HorribleError); } // Test horrible error -TEST_F(TApp, CheckLongFail) { +TEST_CASE_METHOD(TApp, "CheckLongFail", "[app]") { args = {"-t"}; - EXPECT_THROW(CLI::detail::AppFriend::parse_arg(&app, args, CLI::detail::Classifier::LONG), CLI::HorribleError); + CHECK_THROWS_AS(CLI::detail::AppFriend::parse_arg(&app, args, CLI::detail::Classifier::LONG), CLI::HorribleError); } // Test horrible error -TEST_F(TApp, CheckWindowsFail) { +TEST_CASE_METHOD(TApp, "CheckWindowsFail", "[app]") { args = {"-t"}; - EXPECT_THROW(CLI::detail::AppFriend::parse_arg(&app, args, CLI::detail::Classifier::WINDOWS), CLI::HorribleError); + CHECK_THROWS_AS(CLI::detail::AppFriend::parse_arg(&app, args, CLI::detail::Classifier::WINDOWS_STYLE), + CLI::HorribleError); } // Test horrible error -TEST_F(TApp, CheckOtherFail) { +TEST_CASE_METHOD(TApp, "CheckOtherFail", "[app]") { args = {"-t"}; - EXPECT_THROW(CLI::detail::AppFriend::parse_arg(&app, args, CLI::detail::Classifier::NONE), CLI::HorribleError); + CHECK_THROWS_AS(CLI::detail::AppFriend::parse_arg(&app, args, CLI::detail::Classifier::NONE), CLI::HorribleError); } // Test horrible error -TEST_F(TApp, CheckSubcomFail) { +TEST_CASE_METHOD(TApp, "CheckSubcomFail", "[app]") { args = {"subcom"}; - EXPECT_THROW(CLI::detail::AppFriend::parse_subcommand(&app, args), CLI::HorribleError); + CHECK_THROWS_AS(CLI::detail::AppFriend::parse_subcommand(&app, args), CLI::HorribleError); } -TEST_F(TApp, FallthroughParentFail) { - EXPECT_THROW(CLI::detail::AppFriend::get_fallthrough_parent(&app), CLI::HorribleError); +TEST_CASE_METHOD(TApp, "FallthroughParentFail", "[app]") { + CHECK_THROWS_AS(CLI::detail::AppFriend::get_fallthrough_parent(&app), CLI::HorribleError); } -TEST_F(TApp, FallthroughParents) { +TEST_CASE_METHOD(TApp, "FallthroughParents", "[app]") { auto sub = app.add_subcommand("test"); - EXPECT_EQ(CLI::detail::AppFriend::get_fallthrough_parent(sub), &app); + CHECK(&app == CLI::detail::AppFriend::get_fallthrough_parent(sub)); auto ssub = sub->add_subcommand("sub2"); - EXPECT_EQ(CLI::detail::AppFriend::get_fallthrough_parent(ssub), sub); + CHECK(sub == CLI::detail::AppFriend::get_fallthrough_parent(ssub)); auto og1 = app.add_option_group("g1"); auto og2 = og1->add_option_group("g2"); auto og3 = og2->add_option_group("g3"); - EXPECT_EQ(CLI::detail::AppFriend::get_fallthrough_parent(og3), &app); + CHECK(&app == CLI::detail::AppFriend::get_fallthrough_parent(og3)); auto ogb1 = sub->add_option_group("g1"); auto ogb2 = ogb1->add_option_group("g2"); auto ogb3 = ogb2->add_option_group("g3"); - EXPECT_EQ(CLI::detail::AppFriend::get_fallthrough_parent(ogb3), sub); + CHECK(sub == CLI::detail::AppFriend::get_fallthrough_parent(ogb3)); ogb2->name("groupb"); - EXPECT_EQ(CLI::detail::AppFriend::get_fallthrough_parent(ogb3), ogb2); + CHECK(ogb2 == CLI::detail::AppFriend::get_fallthrough_parent(ogb3)); } -TEST_F(TApp, OptionWithDefaults) { +TEST_CASE_METHOD(TApp, "OptionWithDefaults", "[app]") { int someint{2}; app.add_option("-a", someint)->capture_default_str(); args = {"-a1", "-a2"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } // Added to test ->transform -TEST_F(TApp, OrderedModifyingTransforms) { +TEST_CASE_METHOD(TApp, "OrderedModifyingTransforms", "[app]") { std::vector<std::string> val; auto m = app.add_option("-m", val); m->transform([](std::string x) { return x + "1"; }); @@ -2028,29 +2027,29 @@ TEST_F(TApp, OrderedModifyingTransforms) { run(); - EXPECT_EQ(val, std::vector<std::string>({"one21", "two21"})); + CHECK(std::vector<std::string>({"one21", "two21"}) == val); } -TEST_F(TApp, ThrowingTransform) { +TEST_CASE_METHOD(TApp, "ThrowingTransform", "[app]") { std::string val; auto m = app.add_option("-m,--mess", val); m->transform([](std::string) -> std::string { throw CLI::ValidationError("My Message"); }); - ASSERT_NO_THROW(run()); + REQUIRE_NOTHROW(run()); args = {"-mone"}; - ASSERT_THROW(run(), CLI::ValidationError); + REQUIRE_THROWS_AS(run(), CLI::ValidationError); try { run(); } catch(const CLI::ValidationError &e) { - EXPECT_EQ(e.what(), std::string("--mess: My Message")); + CHECK(std::string("--mess: My Message") == e.what()); } } // This was added to make running a simple function on each item easier -TEST_F(TApp, EachItem) { +TEST_CASE_METHOD(TApp, "EachItem", "[app]") { std::vector<std::string> results; std::vector<std::string> dummy; @@ -2063,35 +2062,35 @@ TEST_F(TApp, EachItem) { run(); - EXPECT_EQ(results, dummy); + CHECK(dummy == results); } // #128 -TEST_F(TApp, RepeatingMultiArgumentOptions) { +TEST_CASE_METHOD(TApp, "RepeatingMultiArgumentOptions", "[app]") { std::vector<std::string> entries; app.add_option("--entry", entries, "set a key and value")->type_name("KEY VALUE")->type_size(-2); args = {"--entry", "key1", "value1", "--entry", "key2", "value2"}; - ASSERT_NO_THROW(run()); - EXPECT_EQ(entries, std::vector<std::string>({"key1", "value1", "key2", "value2"})); + REQUIRE_NOTHROW(run()); + CHECK(std::vector<std::string>({"key1", "value1", "key2", "value2"}) == entries); args.pop_back(); - ASSERT_THROW(run(), CLI::ArgumentMismatch); + REQUIRE_THROWS_AS(run(), CLI::ArgumentMismatch); } // #122 -TEST_F(TApp, EmptyOptionEach) { +TEST_CASE_METHOD(TApp, "EmptyOptionEach", "[app]") { std::string q; app.add_option("--each")->each([&q](std::string s) { q = s; }); args = {"--each", "that"}; run(); - EXPECT_EQ(q, "that"); + CHECK("that" == q); } // #122 -TEST_F(TApp, EmptyOptionFail) { +TEST_CASE_METHOD(TApp, "EmptyOptionFail", "[app]") { std::string q; app.add_option("--each"); @@ -2099,116 +2098,116 @@ TEST_F(TApp, EmptyOptionFail) { run(); } -TEST_F(TApp, BeforeRequirements) { +TEST_CASE_METHOD(TApp, "BeforeRequirements", "[app]") { app.add_flag_function("-a", [](std::int64_t) { throw CLI::Success(); }); app.add_flag_function("-b", [](std::int64_t) { throw CLI::CallForHelp(); }); args = {"extra"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"-a", "extra"}; - EXPECT_THROW(run(), CLI::Success); + CHECK_THROWS_AS(run(), CLI::Success); args = {"-b", "extra"}; - EXPECT_THROW(run(), CLI::CallForHelp); + CHECK_THROWS_AS(run(), CLI::CallForHelp); // These run in definition order. args = {"-a", "-b", "extra"}; - EXPECT_THROW(run(), CLI::Success); + CHECK_THROWS_AS(run(), CLI::Success); // Currently, the original order is not preserved when calling callbacks // args = {"-b", "-a", "extra"}; - // EXPECT_THROW(run(), CLI::CallForHelp); + // CHECK_THROWS_AS (run(), CLI::CallForHelp); } // #209 -TEST_F(TApp, CustomUserSepParse) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse", "[app]") { std::vector<int> vals{1, 2, 3}; args = {"--idx", "1,2,3"}; auto opt = app.add_option("--idx", vals)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2, 3})); + CHECK(std::vector<int>({1, 2, 3}) == vals); std::vector<int> vals2; // check that the results vector gets the results in the same way opt->results(vals2); - EXPECT_EQ(vals2, vals); + CHECK(vals == vals2); app.remove_option(opt); app.add_option("--idx", vals)->delimiter(',')->capture_default_str(); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2, 3})); + CHECK(std::vector<int>({1, 2, 3}) == vals); } // #209 -TEST_F(TApp, DefaultUserSepParse) { +TEST_CASE_METHOD(TApp, "DefaultUserSepParse", "[app]") { std::vector<std::string> vals; args = {"--idx", "1 2 3", "4 5 6"}; auto opt = app.add_option("--idx", vals, ""); run(); - EXPECT_EQ(vals, std::vector<std::string>({"1 2 3", "4 5 6"})); + CHECK(std::vector<std::string>({"1 2 3", "4 5 6"}) == vals); opt->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<std::string>({"1 2 3", "4 5 6"})); + CHECK(std::vector<std::string>({"1 2 3", "4 5 6"}) == vals); } // #209 -TEST_F(TApp, BadUserSepParse) { +TEST_CASE_METHOD(TApp, "BadUserSepParse", "[app]") { std::vector<int> vals; app.add_option("--idx", vals); args = {"--idx", "1,2,3"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } // #209 -TEST_F(TApp, CustomUserSepParse2) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse2", "[app]") { std::vector<int> vals{1, 2, 3}; args = {"--idx", "1,2,"}; auto opt = app.add_option("--idx", vals)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); app.remove_option(opt); app.add_option("--idx", vals, "")->delimiter(',')->capture_default_str(); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); } -TEST_F(TApp, CustomUserSepParseFunction) { +TEST_CASE_METHOD(TApp, "CustomUserSepParseFunction", "[app]") { std::vector<int> vals{1, 2, 3}; args = {"--idx", "1,2,3"}; app.add_option_function<std::vector<int>>("--idx", [&vals](std::vector<int> v) { vals = std::move(v); }) ->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2, 3})); + CHECK(std::vector<int>({1, 2, 3}) == vals); } // delimiter removal -TEST_F(TApp, CustomUserSepParseToggle) { +TEST_CASE_METHOD(TApp, "CustomUserSepParseToggle", "[app]") { std::vector<std::string> vals; args = {"--idx", "1,2,3"}; auto opt = app.add_option("--idx", vals)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<std::string>({"1", "2", "3"})); + CHECK(std::vector<std::string>({"1", "2", "3"}) == vals); opt->delimiter('\0'); run(); - EXPECT_EQ(vals, std::vector<std::string>({"1,2,3"})); + CHECK(std::vector<std::string>({"1,2,3"}) == vals); opt->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<std::string>({"1", "2", "3"})); + CHECK(std::vector<std::string>({"1", "2", "3"}) == vals); } // #209 -TEST_F(TApp, CustomUserSepParse3) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse3", "[app]") { std::vector<int> vals = {1, 2, 3}; args = {"--idx", @@ -2217,42 +2216,42 @@ TEST_F(TApp, CustomUserSepParse3) { "2"}; auto opt = app.add_option("--idx", vals)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); app.remove_option(opt); app.add_option("--idx", vals, "", false)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); } // #209 -TEST_F(TApp, CustomUserSepParse4) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse4", "[app]") { std::vector<int> vals; args = {"--idx", "1, 2"}; auto opt = app.add_option("--idx", vals)->delimiter(',')->capture_default_str(); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); app.remove_option(opt); app.add_option("--idx", vals)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); } // #218 -TEST_F(TApp, CustomUserSepParse5) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse5", "[app]") { std::vector<std::string> bar; args = {"this", "is", "a", "test"}; auto opt = app.add_option("bar", bar, "bar"); run(); - EXPECT_EQ(bar, std::vector<std::string>({"this", "is", "a", "test"})); + CHECK(std::vector<std::string>({"this", "is", "a", "test"}) == bar); app.remove_option(opt); args = {"this", "is", "a", "test"}; app.add_option("bar", bar, "bar")->capture_default_str(); run(); - EXPECT_EQ(bar, std::vector<std::string>({"this", "is", "a", "test"})); + CHECK(std::vector<std::string>({"this", "is", "a", "test"}) == bar); } diff --git a/packages/CLI11/tests/BoostOptionTypeTest.cpp b/packages/CLI11/tests/BoostOptionTypeTest.cpp index 3ddd6ae459bded6af8ee6af1c216891d8c061001..2110bcc6540bd881bb4474b0d00df3e21f0582c0 100644 --- a/packages/CLI11/tests/BoostOptionTypeTest.cpp +++ b/packages/CLI11/tests/BoostOptionTypeTest.cpp @@ -15,137 +15,112 @@ #include <string> #include <vector> -#include "gmock/gmock.h" - -namespace boost { -namespace container { - -template <class T> class TApp_container_single_boost : public TApp { - public: - using container_type = T; - container_type cval{}; - TApp_container_single_boost() : TApp() {} -}; - -using containerTypes_single_boost = - ::testing::Types<small_vector<int, 2>, small_vector<int, 3>, flat_set<int>, stable_vector<int>, slist<int>>; - -TYPED_TEST_SUITE(TApp_container_single_boost, containerTypes_single_boost, ); - -TYPED_TEST(TApp_container_single_boost, containerInt_boost) { - - auto &cv = TApp_container_single_boost<TypeParam>::cval; - CLI::Option *opt = (TApp::app).add_option("-v", cv); - - TApp::args = {"-v", "1", "-1", "-v", "3", "-v", "-976"}; - TApp::run(); - EXPECT_EQ(4u, (TApp::app).count("-v")); - EXPECT_EQ(4u, cv.size()); +using namespace boost::container; + +TEMPLATE_TEST_CASE("Boost container single", + "[boost][optional]", + (small_vector<int, 2>), + (small_vector<int, 3>), + flat_set<int>, + stable_vector<int>, + slist<int>) { + TApp tapp; + TestType cv; + CLI::Option *opt = tapp.app.add_option("-v", cv); + + tapp.args = {"-v", "1", "-1", "-v", "3", "-v", "-976"}; + tapp.run(); + CHECK(tapp.app.count("-v") == 4u); + CHECK(cv.size() == 4u); opt->check(CLI::PositiveNumber.application_index(0)); opt->check((!CLI::PositiveNumber).application_index(1)); - EXPECT_NO_THROW(TApp::run()); - EXPECT_EQ(4u, cv.size()); + CHECK_NOTHROW(tapp.run()); + CHECK(cv.size() == 4u); // v[3] would be negative opt->check(CLI::PositiveNumber.application_index(3)); - EXPECT_THROW(TApp::run(), CLI::ValidationError); + CHECK_THROWS_AS(tapp.run(), CLI::ValidationError); } -template <class T> class TApp_container_pair_boost : public TApp { - public: - using container_type = T; - container_type cval{}; - TApp_container_pair_boost() : TApp() {} -}; - using isp = std::pair<int, std::string>; -using containerTypes_pair_boost = ::testing:: - Types<stable_vector<isp>, small_vector<isp, 2>, flat_set<isp>, slist<isp>, vector<isp>, flat_map<int, std::string>>; -TYPED_TEST_SUITE(TApp_container_pair_boost, containerTypes_pair_boost, ); +TEMPLATE_TEST_CASE("Boost container pair", + "[boost][optional]", + stable_vector<isp>, + (small_vector<isp, 2>), + flat_set<isp>, + slist<isp>, + vector<isp>, + (flat_map<int, std::string>)) { -TYPED_TEST(TApp_container_pair_boost, containerPair_boost) { + TApp tapp; + TestType cv; - auto &cv = TApp_container_pair_boost<TypeParam>::cval; - (TApp::app).add_option("--dict", cv); + tapp.app.add_option("--dict", cv); - TApp::args = {"--dict", "1", "str1", "--dict", "3", "str3"}; + tapp.args = {"--dict", "1", "str1", "--dict", "3", "str3"}; - TApp::run(); - EXPECT_EQ(cv.size(), 2u); + tapp.run(); + CHECK(2u == cv.size()); - TApp::args = {"--dict", "1", "str1", "--dict", "3", "--dict", "-1", "str4"}; - TApp::run(); - EXPECT_EQ(cv.size(), 3u); + tapp.args = {"--dict", "1", "str1", "--dict", "3", "--dict", "-1", "str4"}; + tapp.run(); + CHECK(3u == cv.size()); } -template <class T> class TApp_container_tuple_boost : public TApp { - public: - using container_type = T; - container_type cval{}; - TApp_container_tuple_boost() : TApp() {} -}; - using tup_obj = std::tuple<int, std::string, double>; -using containerTypes_tuple_boost = - ::testing::Types<small_vector<tup_obj, 3>, stable_vector<tup_obj>, flat_set<tup_obj>, slist<tup_obj>>; -TYPED_TEST_SUITE(TApp_container_tuple_boost, containerTypes_tuple_boost, ); +TEMPLATE_TEST_CASE("Boost container tuple", + "[boost][optional]", + (small_vector<tup_obj, 3>), + stable_vector<tup_obj>, + flat_set<tup_obj>, + slist<tup_obj>) { + TApp tapp; + TestType cv; -TYPED_TEST(TApp_container_tuple_boost, containerTuple_boost) { + tapp.app.add_option("--dict", cv); - auto &cv = TApp_container_tuple_boost<TypeParam>::cval; - (TApp::app).add_option("--dict", cv); + tapp.args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7"}; - TApp::args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7"}; + tapp.run(); + CHECK(2u == cv.size()); - TApp::run(); - EXPECT_EQ(cv.size(), 2u); - - TApp::args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7", "--dict", "-1", "str4", "-1.87"}; - TApp::run(); - EXPECT_EQ(cv.size(), 3u); + tapp.args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7", "--dict", "-1", "str4", "-1.87"}; + tapp.run(); + CHECK(3u == cv.size()); } using icontainer1 = vector<int>; using icontainer2 = flat_set<int>; using icontainer3 = slist<int>; -using containerTypes_container_boost = ::testing::Types<std::vector<icontainer1>, - slist<icontainer1>, - flat_set<icontainer1>, - small_vector<icontainer1, 2>, - std::vector<icontainer2>, - slist<icontainer2>, - flat_set<icontainer2>, - stable_vector<icontainer2>, - static_vector<icontainer3, 10>, - slist<icontainer3>, - flat_set<icontainer3>, - static_vector<icontainer3, 10>>; - -template <class T> class TApp_container_container_boost : public TApp { - public: - using container_type = T; - container_type cval{}; - TApp_container_container_boost() : TApp() {} -}; - -TYPED_TEST_SUITE(TApp_container_container_boost, containerTypes_container_boost, ); - -TYPED_TEST(TApp_container_container_boost, containerContainer_boost) { - - auto &cv = TApp_container_container_boost<TypeParam>::cval; - (TApp::app).add_option("--dict", cv); - - TApp::args = {"--dict", "1", "2", "4", "--dict", "3", "1"}; - - TApp::run(); - EXPECT_EQ(cv.size(), 2u); - - TApp::args = {"--dict", "1", "2", "4", "--dict", "3", "1", "--dict", "3", "--dict", - "3", "3", "3", "3", "3", "3", "3", "3", "3", "-3"}; - TApp::run(); - EXPECT_EQ(cv.size(), 4u); -} -} // namespace container -} // namespace boost +TEMPLATE_TEST_CASE("Boost container container", + "[boost][optional]", + std::vector<icontainer1>, + slist<icontainer1>, + flat_set<icontainer1>, + (small_vector<icontainer1, 2>), + std::vector<icontainer2>, + slist<icontainer2>, + flat_set<icontainer2>, + stable_vector<icontainer2>, + (static_vector<icontainer2, 10>), + slist<icontainer3>, + flat_set<icontainer3>, + (static_vector<icontainer3, 10>)) { + + TApp tapp; + TestType cv; + + tapp.app.add_option("--dict", cv); + + tapp.args = {"--dict", "1", "2", "4", "--dict", "3", "1"}; + + tapp.run(); + CHECK(2u == cv.size()); + + tapp.args = {"--dict", "1", "2", "4", "--dict", "3", "1", "--dict", "3", "--dict", + "3", "3", "3", "3", "3", "3", "3", "3", "3", "-3"}; + tapp.run(); + CHECK(4u == cv.size()); +} diff --git a/packages/CLI11/tests/CMakeLists.txt b/packages/CLI11/tests/CMakeLists.txt index cce6e30b8f388309113bed5134a4221dc082e6fd..4a86923f6dd7b74a9f51d02785e9f060e0617129 100644 --- a/packages/CLI11/tests/CMakeLists.txt +++ b/packages/CLI11/tests/CMakeLists.txt @@ -1,8 +1,3 @@ -if(NOT EXISTS "${CLI11_SOURCE_DIR}/extern/googletest/CMakeLists.txt") - message(FATAL_ERROR "You have requested tests be built, but googletest is not downloaded. Please run: - git submodule update --init") -endif() - list(APPEND CMAKE_MODULE_PATH "${CLI11_SOURCE_DIR}/cmake") if(CLI11_SANITIZERS) @@ -29,8 +24,6 @@ else() endmacro() endif() -set(GOOGLE_TEST_INDIVIDUAL OFF) -include(AddGoogletest) # Add boost to test boost::optional (currently explicitly requested)" option(CLI11_BOOST "Turn on boost test (currently may fail with Boost 1.70)" OFF) @@ -70,8 +63,32 @@ endif() set(CLI11_MULTIONLY_TESTS TimerTest) -# Only affects current directory, so safe -include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +add_library(catch_main main.cpp) +target_include_directories(catch_main PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") + +# Currently a required download; could be make to look for existing Catch2, but +# that would require changing the includes. FetchContent would be better, but +# requires newer CMake. + +set(url https://github.com/philsquared/Catch/releases/download/v2.13.4/catch.hpp) +file(DOWNLOAD ${url} "${CMAKE_CURRENT_BINARY_DIR}/catch.hpp" STATUS status EXPECTED_HASH SHA256=6e0fa3dd160891a01c1f3b34e8bcd6e0140abe08eca022e390027f27dec2050b) +list(GET status 0 error) +if(error) + message(FATAL_ERROR "Could not download ${url}") +endif() +target_include_directories(catch_main PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") + +# Target must already exist +macro(add_catch_test TESTNAME) + target_link_libraries(${TESTNAME} PUBLIC catch_main) + + add_test(${TESTNAME} ${TESTNAME}) + set_target_properties(${TESTNAME} PROPERTIES FOLDER "Tests") + if (CLI11_FORCE_LIBCXX) + set_property(TARGET ${T} APPEND_STRING + PROPERTY LINK_FLAGS -stdlib=libc++) + endif() +endmacro() foreach(T IN LISTS CLI11_TESTS) if(CLI11_CUDA_TESTS) @@ -86,12 +103,12 @@ foreach(T IN LISTS CLI11_TESTS) target_link_libraries(${T} PRIVATE CLI11_warnings) endif() target_link_libraries(${T} PRIVATE CLI11) - add_gtest(${T}) + add_catch_test(${T}) if(CLI11_SINGLE_FILE AND CLI11_SINGLE_FILE_TESTS) add_executable(${T}_Single ${T}.cpp) target_link_libraries(${T}_Single PRIVATE CLI11_SINGLE) - add_gtest(${T}_Single) + add_catch_test(${T}_Single) set_property(TARGET ${T}_Single PROPERTY FOLDER "Tests Single File") endif() endforeach() @@ -100,7 +117,7 @@ foreach(T IN LISTS CLI11_MULTIONLY_TESTS) add_executable(${T} ${T}.cpp ${CLI11_headers}) add_sanitizers(${T}) target_link_libraries(${T} PUBLIC CLI11) - add_gtest(${T}) + add_catch_test(${T}) endforeach() # Add -Wno-deprecated-declarations to DeprecatedTest @@ -123,7 +140,7 @@ target_link_libraries(link_test_1 PUBLIC CLI11) set_target_properties(link_test_1 PROPERTIES FOLDER "Tests") add_executable(link_test_2 link_test_2.cpp) target_link_libraries(link_test_2 PUBLIC CLI11 link_test_1) -add_gtest(link_test_2) +add_catch_test(link_test_2) if(CLI11_FORCE_LIBCXX) set_property(TARGET link_test_1 APPEND_STRING PROPERTY LINK_FLAGS -stdlib=libc++) diff --git a/packages/CLI11/tests/ComplexTypeTest.cpp b/packages/CLI11/tests/ComplexTypeTest.cpp index 43c6cd48bab35cdc2a2ad09e6e59ccbc712a9dd6..b9a5d4e51cfc2584b8f199ef9ced5c8be245f8a4 100644 --- a/packages/CLI11/tests/ComplexTypeTest.cpp +++ b/packages/CLI11/tests/ComplexTypeTest.cpp @@ -5,11 +5,11 @@ // SPDX-License-Identifier: BSD-3-Clause #include "app_helper.hpp" -#include "gmock/gmock.h" + #include <complex> #include <cstdint> -using ::testing::HasSubstr; +using Catch::Matchers::Contains; using cx = std::complex<double>; @@ -33,7 +33,7 @@ add_option(CLI::App &app, std::string name, cx &variable, std::string descriptio return opt; } -TEST_F(TApp, AddingComplexParser) { +TEST_CASE_METHOD(TApp, "AddingComplexParser", "[complex]") { cx comp{0, 0}; add_option(app, "-c,--complex", comp); @@ -41,27 +41,27 @@ TEST_F(TApp, AddingComplexParser) { run(); - EXPECT_DOUBLE_EQ(1.5, comp.real()); - EXPECT_DOUBLE_EQ(2.5, comp.imag()); + CHECK(comp.real() == Approx(1.5)); + CHECK(comp.imag() == Approx(2.5)); } -TEST_F(TApp, DefaultedComplex) { +TEST_CASE_METHOD(TApp, "DefaultedComplex", "[complex]") { cx comp{1, 2}; add_option(app, "-c,--complex", comp, "", true); args = {"-c", "4", "3"}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("2")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("2")); - EXPECT_DOUBLE_EQ(1, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(1)); + CHECK(comp.imag() == Approx(2)); run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); } // an example of custom complex number converter that can be used to add new parsing options @@ -117,7 +117,7 @@ template <> bool lexical_cast<std::complex<double>>(const std::string &input, st } // namespace detail } // namespace CLI -TEST_F(TApp, AddingComplexParserDetail) { +TEST_CASE_METHOD(TApp, "AddingComplexParserDetail", "[complex]") { bool skip_tests = false; try { // check if the library actually supports regex, it is possible to link against a non working regex in the @@ -131,7 +131,7 @@ TEST_F(TApp, AddingComplexParserDetail) { if(!rsearch) { skip_tests = true; } else { - EXPECT_EQ(m.size(), 9u); + CHECK(9u == m.size()); } } catch(...) { @@ -146,14 +146,14 @@ TEST_F(TApp, AddingComplexParserDetail) { run(); - EXPECT_DOUBLE_EQ(1.5, comp.real()); - EXPECT_DOUBLE_EQ(2.5, comp.imag()); + CHECK(comp.real() == Approx(1.5)); + CHECK(comp.imag() == Approx(2.5)); args = {"-c", "1.5-2.5j"}; run(); - EXPECT_DOUBLE_EQ(1.5, comp.real()); - EXPECT_DOUBLE_EQ(-2.5, comp.imag()); + CHECK(comp.real() == Approx(1.5)); + CHECK(comp.imag() == Approx(-2.5)); } } #endif @@ -170,7 +170,7 @@ class complex_new { double val2_{0.0}; }; -TEST_F(TApp, newComplex) { +TEST_CASE_METHOD(TApp, "newComplex", "[complex]") { complex_new cval; static_assert(CLI::detail::is_complex<complex_new>::value, "complex new does not register as a complex type"); static_assert(CLI::detail::classify_object<complex_new>::value == CLI::detail::object_category::complex_number, @@ -180,12 +180,12 @@ TEST_F(TApp, newComplex) { run(); - EXPECT_DOUBLE_EQ(1.5, cval.real()); - EXPECT_DOUBLE_EQ(2.5, cval.imag()); + CHECK(cval.real() == Approx(1.5)); + CHECK(cval.imag() == Approx(2.5)); args = {"-c", "1.5-2.5j"}; run(); - EXPECT_DOUBLE_EQ(1.5, cval.real()); - EXPECT_DOUBLE_EQ(-2.5, cval.imag()); + CHECK(cval.real() == Approx(1.5)); + CHECK(cval.imag() == Approx(-2.5)); } diff --git a/packages/CLI11/tests/ConfigFileTest.cpp b/packages/CLI11/tests/ConfigFileTest.cpp index d8b19d142aff335f13e872d38113a405b54c840c..12fa88c10a3949c218b246c1b6a9ddd429f9f309 100644 --- a/packages/CLI11/tests/ConfigFileTest.cpp +++ b/packages/CLI11/tests/ConfigFileTest.cpp @@ -6,63 +6,61 @@ #include "app_helper.hpp" -#include "gmock/gmock.h" #include <cstdio> #include <sstream> -using ::testing::HasSubstr; -using ::testing::Not; +using Catch::Matchers::Contains; -TEST(StringBased, convert_arg_for_ini) { +TEST_CASE("StringBased: convert_arg_for_ini", "[config]") { - EXPECT_EQ(CLI::detail::convert_arg_for_ini(std::string{}), "\"\""); + CHECK("\"\"" == CLI::detail::convert_arg_for_ini(std::string{})); - EXPECT_EQ(CLI::detail::convert_arg_for_ini("true"), "true"); + CHECK("true" == CLI::detail::convert_arg_for_ini("true")); - EXPECT_EQ(CLI::detail::convert_arg_for_ini("nan"), "nan"); + CHECK("nan" == CLI::detail::convert_arg_for_ini("nan")); - EXPECT_EQ(CLI::detail::convert_arg_for_ini("happy hippo"), "\"happy hippo\""); + CHECK("\"happy hippo\"" == CLI::detail::convert_arg_for_ini("happy hippo")); - EXPECT_EQ(CLI::detail::convert_arg_for_ini("47"), "47"); + CHECK("47" == CLI::detail::convert_arg_for_ini("47")); - EXPECT_EQ(CLI::detail::convert_arg_for_ini("47.365225"), "47.365225"); + CHECK("47.365225" == CLI::detail::convert_arg_for_ini("47.365225")); - EXPECT_EQ(CLI::detail::convert_arg_for_ini("+3.28e-25"), "+3.28e-25"); - EXPECT_EQ(CLI::detail::convert_arg_for_ini("-22E14"), "-22E14"); + CHECK("+3.28e-25" == CLI::detail::convert_arg_for_ini("+3.28e-25")); + CHECK("-22E14" == CLI::detail::convert_arg_for_ini("-22E14")); - EXPECT_EQ(CLI::detail::convert_arg_for_ini("a"), "'a'"); + CHECK("'a'" == CLI::detail::convert_arg_for_ini("a")); // hex - EXPECT_EQ(CLI::detail::convert_arg_for_ini("0x5461FAED"), "0x5461FAED"); + CHECK("0x5461FAED" == CLI::detail::convert_arg_for_ini("0x5461FAED")); // hex fail - EXPECT_EQ(CLI::detail::convert_arg_for_ini("0x5461FAEG"), "\"0x5461FAEG\""); + CHECK("\"0x5461FAEG\"" == CLI::detail::convert_arg_for_ini("0x5461FAEG")); // octal - EXPECT_EQ(CLI::detail::convert_arg_for_ini("0o546123567"), "0o546123567"); + CHECK("0o546123567" == CLI::detail::convert_arg_for_ini("0o546123567")); // octal fail - EXPECT_EQ(CLI::detail::convert_arg_for_ini("0o546123587"), "\"0o546123587\""); + CHECK("\"0o546123587\"" == CLI::detail::convert_arg_for_ini("0o546123587")); // binary - EXPECT_EQ(CLI::detail::convert_arg_for_ini("0b01101110010"), "0b01101110010"); + CHECK("0b01101110010" == CLI::detail::convert_arg_for_ini("0b01101110010")); // binary fail - EXPECT_EQ(CLI::detail::convert_arg_for_ini("0b01102110010"), "\"0b01102110010\""); + CHECK("\"0b01102110010\"" == CLI::detail::convert_arg_for_ini("0b01102110010")); } -TEST(StringBased, IniJoin) { +TEST_CASE("StringBased: IniJoin", "[config]") { std::vector<std::string> items = {"one", "two", "three four"}; std::string result = "\"one\" \"two\" \"three four\""; - EXPECT_EQ(CLI::detail::ini_join(items, ' ', '\0', '\0'), result); + CHECK(result == CLI::detail::ini_join(items, ' ', '\0', '\0')); result = "[\"one\", \"two\", \"three four\"]"; - EXPECT_EQ(CLI::detail::ini_join(items), result); + CHECK(result == CLI::detail::ini_join(items)); result = "{\"one\"; \"two\"; \"three four\"}"; - EXPECT_EQ(CLI::detail::ini_join(items, ';', '{', '}'), result); + CHECK(result == CLI::detail::ini_join(items, ';', '{', '}')); } -TEST(StringBased, First) { +TEST_CASE("StringBased: First", "[config]") { std::stringstream ofile; ofile << "one=three\n"; @@ -72,16 +70,16 @@ TEST(StringBased, First) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); - EXPECT_EQ(2u, output.size()); - EXPECT_EQ("one", output.at(0).name); - EXPECT_EQ(1u, output.at(0).inputs.size()); - EXPECT_EQ("three", output.at(0).inputs.at(0)); - EXPECT_EQ("two", output.at(1).name); - EXPECT_EQ(1u, output.at(1).inputs.size()); - EXPECT_EQ("four", output.at(1).inputs.at(0)); + CHECK(output.size() == 2u); + CHECK(output.at(0).name == "one"); + CHECK(output.at(0).inputs.size() == 1u); + CHECK(output.at(0).inputs.at(0) == "three"); + CHECK(output.at(1).name == "two"); + CHECK(output.at(1).inputs.size() == 1u); + CHECK(output.at(1).inputs.at(0) == "four"); } -TEST(StringBased, FirstWithComments) { +TEST_CASE("StringBased: FirstWithComments", "[config]") { std::stringstream ofile; ofile << ";this is a comment\n"; @@ -93,16 +91,16 @@ TEST(StringBased, FirstWithComments) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); - EXPECT_EQ(2u, output.size()); - EXPECT_EQ("one", output.at(0).name); - EXPECT_EQ(1u, output.at(0).inputs.size()); - EXPECT_EQ("three", output.at(0).inputs.at(0)); - EXPECT_EQ("two", output.at(1).name); - EXPECT_EQ(1u, output.at(1).inputs.size()); - EXPECT_EQ("four", output.at(1).inputs.at(0)); + CHECK(output.size() == 2u); + CHECK(output.at(0).name == "one"); + CHECK(output.at(0).inputs.size() == 1u); + CHECK(output.at(0).inputs.at(0) == "three"); + CHECK(output.at(1).name == "two"); + CHECK(output.at(1).inputs.size() == 1u); + CHECK(output.at(1).inputs.at(0) == "four"); } -TEST(StringBased, Quotes) { +TEST_CASE("StringBased: Quotes", "[config]") { std::stringstream ofile; ofile << R"(one = "three")" << '\n'; @@ -113,19 +111,19 @@ TEST(StringBased, Quotes) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); - EXPECT_EQ(3u, output.size()); - EXPECT_EQ("one", output.at(0).name); - EXPECT_EQ(1u, output.at(0).inputs.size()); - EXPECT_EQ("three", output.at(0).inputs.at(0)); - EXPECT_EQ("two", output.at(1).name); - EXPECT_EQ(1u, output.at(1).inputs.size()); - EXPECT_EQ("four", output.at(1).inputs.at(0)); - EXPECT_EQ("five", output.at(2).name); - EXPECT_EQ(1u, output.at(2).inputs.size()); - EXPECT_EQ("six and seven", output.at(2).inputs.at(0)); + CHECK(output.size() == 3u); + CHECK(output.at(0).name == "one"); + CHECK(output.at(0).inputs.size() == 1u); + CHECK(output.at(0).inputs.at(0) == "three"); + CHECK(output.at(1).name == "two"); + CHECK(output.at(1).inputs.size() == 1u); + CHECK(output.at(1).inputs.at(0) == "four"); + CHECK(output.at(2).name == "five"); + CHECK(output.at(2).inputs.size() == 1u); + CHECK(output.at(2).inputs.at(0) == "six and seven"); } -TEST(StringBased, Vector) { +TEST_CASE("StringBased: Vector", "[config]") { std::stringstream ofile; ofile << "one = three\n"; @@ -136,21 +134,21 @@ TEST(StringBased, Vector) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); - EXPECT_EQ(3u, output.size()); - EXPECT_EQ("one", output.at(0).name); - EXPECT_EQ(1u, output.at(0).inputs.size()); - EXPECT_EQ("three", output.at(0).inputs.at(0)); - EXPECT_EQ("two", output.at(1).name); - EXPECT_EQ(1u, output.at(1).inputs.size()); - EXPECT_EQ("four", output.at(1).inputs.at(0)); - EXPECT_EQ("five", output.at(2).name); - EXPECT_EQ(3u, output.at(2).inputs.size()); - EXPECT_EQ("six", output.at(2).inputs.at(0)); - EXPECT_EQ("and", output.at(2).inputs.at(1)); - EXPECT_EQ("seven", output.at(2).inputs.at(2)); -} - -TEST(StringBased, TomlVector) { + CHECK(output.size() == 3u); + CHECK(output.at(0).name == "one"); + CHECK(output.at(0).inputs.size() == 1u); + CHECK(output.at(0).inputs.at(0) == "three"); + CHECK(output.at(1).name == "two"); + CHECK(output.at(1).inputs.size() == 1u); + CHECK(output.at(1).inputs.at(0) == "four"); + CHECK(output.at(2).name == "five"); + CHECK(output.at(2).inputs.size() == 3u); + CHECK(output.at(2).inputs.at(0) == "six"); + CHECK(output.at(2).inputs.at(1) == "and"); + CHECK(output.at(2).inputs.at(2) == "seven"); +} + +TEST_CASE("StringBased: TomlVector", "[config]") { std::stringstream ofile; ofile << "one = [three]\n"; @@ -166,32 +164,32 @@ TEST(StringBased, TomlVector) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); - EXPECT_EQ(5u, output.size()); - EXPECT_EQ("one", output.at(0).name); - EXPECT_EQ(1u, output.at(0).inputs.size()); - EXPECT_EQ("three", output.at(0).inputs.at(0)); - EXPECT_EQ("two", output.at(1).name); - EXPECT_EQ(1u, output.at(1).inputs.size()); - EXPECT_EQ("four", output.at(1).inputs.at(0)); - EXPECT_EQ("five", output.at(2).name); - EXPECT_EQ(3u, output.at(2).inputs.size()); - EXPECT_EQ("six", output.at(2).inputs.at(0)); - EXPECT_EQ("and", output.at(2).inputs.at(1)); - EXPECT_EQ("seven", output.at(2).inputs.at(2)); - EXPECT_EQ("eight", output.at(3).name); - EXPECT_EQ(4u, output.at(3).inputs.size()); - EXPECT_EQ("nine", output.at(3).inputs.at(0)); - EXPECT_EQ("ten", output.at(3).inputs.at(1)); - EXPECT_EQ("eleven", output.at(3).inputs.at(2)); - EXPECT_EQ("twelve", output.at(3).inputs.at(3)); - EXPECT_EQ("one_more", output.at(4).name); - EXPECT_EQ(3u, output.at(4).inputs.size()); - EXPECT_EQ("one", output.at(4).inputs.at(0)); - EXPECT_EQ("two", output.at(4).inputs.at(1)); - EXPECT_EQ("three", output.at(4).inputs.at(2)); -} - -TEST(StringBased, Spaces) { + CHECK(output.size() == 5u); + CHECK(output.at(0).name == "one"); + CHECK(output.at(0).inputs.size() == 1u); + CHECK(output.at(0).inputs.at(0) == "three"); + CHECK(output.at(1).name == "two"); + CHECK(output.at(1).inputs.size() == 1u); + CHECK(output.at(1).inputs.at(0) == "four"); + CHECK(output.at(2).name == "five"); + CHECK(output.at(2).inputs.size() == 3u); + CHECK(output.at(2).inputs.at(0) == "six"); + CHECK(output.at(2).inputs.at(1) == "and"); + CHECK(output.at(2).inputs.at(2) == "seven"); + CHECK(output.at(3).name == "eight"); + CHECK(output.at(3).inputs.size() == 4u); + CHECK(output.at(3).inputs.at(0) == "nine"); + CHECK(output.at(3).inputs.at(1) == "ten"); + CHECK(output.at(3).inputs.at(2) == "eleven"); + CHECK(output.at(3).inputs.at(3) == "twelve"); + CHECK(output.at(4).name == "one_more"); + CHECK(output.at(4).inputs.size() == 3u); + CHECK(output.at(4).inputs.at(0) == "one"); + CHECK(output.at(4).inputs.at(1) == "two"); + CHECK(output.at(4).inputs.at(2) == "three"); +} + +TEST_CASE("StringBased: Spaces", "[config]") { std::stringstream ofile; ofile << "one = three\n"; @@ -201,16 +199,16 @@ TEST(StringBased, Spaces) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); - EXPECT_EQ(2u, output.size()); - EXPECT_EQ("one", output.at(0).name); - EXPECT_EQ(1u, output.at(0).inputs.size()); - EXPECT_EQ("three", output.at(0).inputs.at(0)); - EXPECT_EQ("two", output.at(1).name); - EXPECT_EQ(1u, output.at(1).inputs.size()); - EXPECT_EQ("four", output.at(1).inputs.at(0)); + CHECK(output.size() == 2u); + CHECK(output.at(0).name == "one"); + CHECK(output.at(0).inputs.size() == 1u); + CHECK(output.at(0).inputs.at(0) == "three"); + CHECK(output.at(1).name == "two"); + CHECK(output.at(1).inputs.size() == 1u); + CHECK(output.at(1).inputs.at(0) == "four"); } -TEST(StringBased, Sections) { +TEST_CASE("StringBased: Sections", "[config]") { std::stringstream ofile; ofile << "one=three\n"; @@ -221,18 +219,18 @@ TEST(StringBased, Sections) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); - EXPECT_EQ(4u, output.size()); - EXPECT_EQ("one", output.at(0).name); - EXPECT_EQ(1u, output.at(0).inputs.size()); - EXPECT_EQ("three", output.at(0).inputs.at(0)); - EXPECT_EQ("two", output.at(2).name); - EXPECT_EQ("second", output.at(2).parents.at(0)); - EXPECT_EQ(1u, output.at(2).inputs.size()); - EXPECT_EQ("four", output.at(2).inputs.at(0)); - EXPECT_EQ("second.two", output.at(2).fullname()); + CHECK(output.size() == 4u); + CHECK(output.at(0).name == "one"); + CHECK(output.at(0).inputs.size() == 1u); + CHECK(output.at(0).inputs.at(0) == "three"); + CHECK(output.at(2).name == "two"); + CHECK(output.at(2).parents.at(0) == "second"); + CHECK(output.at(2).inputs.size() == 1u); + CHECK(output.at(2).inputs.at(0) == "four"); + CHECK(output.at(2).fullname() == "second.two"); } -TEST(StringBased, SpacesSections) { +TEST_CASE("StringBased: SpacesSections", "[config]") { std::stringstream ofile; ofile << "one=three\n\n"; @@ -244,19 +242,19 @@ TEST(StringBased, SpacesSections) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); - EXPECT_EQ(4u, output.size()); - EXPECT_EQ("one", output.at(0).name); - EXPECT_EQ(1u, output.at(0).inputs.size()); - EXPECT_EQ("three", output.at(0).inputs.at(0)); - EXPECT_EQ("second", output.at(1).parents.at(0)); - EXPECT_EQ("++", output.at(1).name); - EXPECT_EQ("two", output.at(2).name); - EXPECT_EQ(1u, output.at(2).parents.size()); - EXPECT_EQ("second", output.at(2).parents.at(0)); - EXPECT_EQ(1u, output.at(2).inputs.size()); - EXPECT_EQ("four", output.at(2).inputs.at(0)); - EXPECT_EQ("second", output.at(3).parents.at(0)); - EXPECT_EQ("--", output.at(3).name); + CHECK(output.size() == 4u); + CHECK(output.at(0).name == "one"); + CHECK(output.at(0).inputs.size() == 1u); + CHECK(output.at(0).inputs.at(0) == "three"); + CHECK(output.at(1).parents.at(0) == "second"); + CHECK(output.at(1).name == "++"); + CHECK(output.at(2).name == "two"); + CHECK(output.at(2).parents.size() == 1u); + CHECK(output.at(2).parents.at(0) == "second"); + CHECK(output.at(2).inputs.size() == 1u); + CHECK(output.at(2).inputs.at(0) == "four"); + CHECK(output.at(3).parents.at(0) == "second"); + CHECK(output.at(3).name == "--"); } // check function to make sure that open sections match close sections @@ -284,7 +282,7 @@ bool checkSections(const std::vector<CLI::ConfigItem> &output) { } return open.empty(); } -TEST(StringBased, Layers) { +TEST_CASE("StringBased: Layers", "[config]") { std::stringstream ofile; ofile << "simple = true\n\n"; @@ -298,11 +296,11 @@ TEST(StringBased, Layers) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 2 flags and 4 openings and 4 closings - EXPECT_EQ(10u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 10u); + CHECK(checkSections(output)); } -TEST(StringBased, LayersSkip) { +TEST_CASE("StringBased: LayersSkip", "[config]") { std::stringstream ofile; ofile << "simple = true\n\n"; @@ -314,11 +312,11 @@ TEST(StringBased, LayersSkip) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 2 flags and 4 openings and 4 closings - EXPECT_EQ(10u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 10u); + CHECK(checkSections(output)); } -TEST(StringBased, LayersSkipOrdered) { +TEST_CASE("StringBased: LayersSkipOrdered", "[config]") { std::stringstream ofile; ofile << "simple = true\n\n"; @@ -330,11 +328,11 @@ TEST(StringBased, LayersSkipOrdered) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 2 flags and 4 openings and 4 closings - EXPECT_EQ(12u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 12u); + CHECK(checkSections(output)); } -TEST(StringBased, LayersChange) { +TEST_CASE("StringBased: LayersChange", "[config]") { std::stringstream ofile; ofile << "simple = true\n\n"; @@ -346,11 +344,11 @@ TEST(StringBased, LayersChange) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 2 flags and 3 openings and 3 closings - EXPECT_EQ(8u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 8u); + CHECK(checkSections(output)); } -TEST(StringBased, Layers2LevelChange) { +TEST_CASE("StringBased: Layers2LevelChange", "[config]") { std::stringstream ofile; ofile << "simple = true\n\n"; @@ -362,11 +360,11 @@ TEST(StringBased, Layers2LevelChange) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 2 flags and 5 openings and 5 closings - EXPECT_EQ(12u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 12u); + CHECK(checkSections(output)); } -TEST(StringBased, Layers3LevelChange) { +TEST_CASE("StringBased: Layers3LevelChange", "[config]") { std::stringstream ofile; ofile << "[other.sub2.subsub.cmd]\n"; @@ -377,11 +375,11 @@ TEST(StringBased, Layers3LevelChange) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 1 flags and 7 openings and 7 closings - EXPECT_EQ(15u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 15u); + CHECK(checkSections(output)); } -TEST(StringBased, newSegment) { +TEST_CASE("StringBased: newSegment", "[config]") { std::stringstream ofile; ofile << "[other.sub2.subsub.cmd]\n"; @@ -393,11 +391,11 @@ TEST(StringBased, newSegment) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 2 flags and 5 openings and 5 closings - EXPECT_EQ(12u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 12u); + CHECK(checkSections(output)); } -TEST(StringBased, LayersDirect) { +TEST_CASE("StringBased: LayersDirect", "[config]") { std::stringstream ofile; ofile << "simple = true\n\n"; @@ -409,11 +407,11 @@ TEST(StringBased, LayersDirect) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 2 flags and 4 openings and 4 closings - EXPECT_EQ(10u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 10u); + CHECK(checkSections(output)); } -TEST(StringBased, LayersComplex) { +TEST_CASE("StringBased: LayersComplex", "[config]") { std::stringstream ofile; ofile << "simple = true\n\n"; @@ -429,15 +427,15 @@ TEST(StringBased, LayersComplex) { std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_config(ofile); // 4 flags and 6 openings and 6 closings - EXPECT_EQ(16u, output.size()); - EXPECT_TRUE(checkSections(output)); + CHECK(output.size() == 16u); + CHECK(checkSections(output)); } -TEST(StringBased, file_error) { - EXPECT_THROW(std::vector<CLI::ConfigItem> output = CLI::ConfigINI().from_file("nonexist_file"), CLI::FileError); +TEST_CASE("StringBased: file_error", "[config]") { + CHECK_THROWS_AS(CLI::ConfigINI().from_file("nonexist_file"), CLI::FileError); } -TEST_F(TApp, IniNotRequired) { +TEST_CASE_METHOD(TApp, "IniNotRequired", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -459,22 +457,22 @@ TEST_F(TApp, IniNotRequired) { run(); - EXPECT_EQ(1, one); - EXPECT_EQ(99, two); - EXPECT_EQ(3, three); + CHECK(one == 1); + CHECK(two == 99); + CHECK(three == 3); one = two = three = 0; args = {"--one=1", "--two=2"}; run(); - EXPECT_EQ(1, one); - EXPECT_EQ(2, two); - EXPECT_EQ(3, three); - EXPECT_EQ(app["--config"]->as<std::string>(), "TestIniTmp.ini"); + CHECK(one == 1); + CHECK(two == 2); + CHECK(three == 3); + CHECK("TestIniTmp.ini" == app["--config"]->as<std::string>()); } -TEST_F(TApp, IniSuccessOnUnknownOption) { +TEST_CASE_METHOD(TApp, "IniSuccessOnUnknownOption", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); @@ -489,10 +487,10 @@ TEST_F(TApp, IniSuccessOnUnknownOption) { int two{0}; app.add_option("--two", two); run(); - EXPECT_EQ(99, two); + CHECK(two == 99); } -TEST_F(TApp, IniGetRemainingOption) { +TEST_CASE_METHOD(TApp, "IniGetRemainingOption", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); @@ -508,12 +506,12 @@ TEST_F(TApp, IniGetRemainingOption) { int two{0}; app.add_option("--two", two); - ASSERT_NO_THROW(run()); + REQUIRE_NOTHROW(run()); std::vector<std::string> ExpectedRemaining = {ExtraOption}; - EXPECT_EQ(app.remaining(), ExpectedRemaining); + CHECK(ExpectedRemaining == app.remaining()); } -TEST_F(TApp, IniGetNoRemaining) { +TEST_CASE_METHOD(TApp, "IniGetNoRemaining", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); @@ -526,26 +524,26 @@ TEST_F(TApp, IniGetNoRemaining) { int two{0}; app.add_option("--two", two); - ASSERT_NO_THROW(run()); - EXPECT_EQ(app.remaining().size(), 0u); + REQUIRE_NOTHROW(run()); + CHECK(0u == app.remaining().size()); } -TEST_F(TApp, IniRequiredNoDefault) { +TEST_CASE_METHOD(TApp, "IniRequiredNoDefault", "[config]") { app.set_config("--config")->required(); int two{0}; app.add_option("--two", two); - ASSERT_THROW(run(), CLI::FileError); + REQUIRE_THROWS_AS(run(), CLI::FileError); } -TEST_F(TApp, IniNotRequiredNoDefault) { +TEST_CASE_METHOD(TApp, "IniNotRequiredNoDefault", "[config]") { app.set_config("--config"); int two{0}; app.add_option("--two", two); - ASSERT_NO_THROW(run()); + REQUIRE_NOTHROW(run()); } /// Define a class for testing purposes that does bad things @@ -557,7 +555,7 @@ class EvilConfig : public CLI::Config { virtual std::vector<CLI::ConfigItem> from_config(std::istream &) const { throw CLI::FileError("evil"); } }; -TEST_F(TApp, IniRequiredbadConfigurator) { +TEST_CASE_METHOD(TApp, "IniRequiredbadConfigurator", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -572,10 +570,10 @@ TEST_F(TApp, IniRequiredbadConfigurator) { app.config_formatter(std::make_shared<EvilConfig>()); int two{0}; app.add_option("--two", two); - ASSERT_THROW(run(), CLI::FileError); + REQUIRE_THROWS_AS(run(), CLI::FileError); } -TEST_F(TApp, IniNotRequiredbadConfigurator) { +TEST_CASE_METHOD(TApp, "IniNotRequiredbadConfigurator", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -590,10 +588,10 @@ TEST_F(TApp, IniNotRequiredbadConfigurator) { app.config_formatter(std::make_shared<EvilConfig>()); int two{0}; app.add_option("--two", two); - ASSERT_NO_THROW(run()); + REQUIRE_NOTHROW(run()); } -TEST_F(TApp, IniNotRequiredNotDefault) { +TEST_CASE_METHOD(TApp, "IniNotRequiredNotDefault", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; TempFile tmpini2{"TestIniTmp2.ini"}; @@ -620,19 +618,19 @@ TEST_F(TApp, IniNotRequiredNotDefault) { app.add_option("--three", three); run(); - EXPECT_EQ(app["--config"]->as<std::string>(), tmpini.c_str()); - EXPECT_EQ(99, two); - EXPECT_EQ(3, three); + CHECK(tmpini.c_str() == app["--config"]->as<std::string>()); + CHECK(two == 99); + CHECK(three == 3); args = {"--config", tmpini2}; run(); - EXPECT_EQ(98, two); - EXPECT_EQ(4, three); - EXPECT_EQ(app.get_config_ptr()->as<std::string>(), tmpini2.c_str()); + CHECK(two == 98); + CHECK(three == 4); + CHECK(tmpini2.c_str() == app.get_config_ptr()->as<std::string>()); } -TEST_F(TApp, MultiConfig) { +TEST_CASE_METHOD(TApp, "MultiConfig", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; TempFile tmpini2{"TestIniTmp2.ini"}; @@ -661,19 +659,19 @@ TEST_F(TApp, MultiConfig) { args = {"--config", tmpini2, "--config", tmpini}; run(); - EXPECT_EQ(99, two); - EXPECT_EQ(3, three); - EXPECT_EQ(55, one); + CHECK(two == 99); + CHECK(three == 3); + CHECK(one == 55); args = {"--config", tmpini, "--config", tmpini2}; run(); - EXPECT_EQ(99, two); - EXPECT_EQ(4, three); - EXPECT_EQ(55, one); + CHECK(two == 99); + CHECK(three == 4); + CHECK(one == 55); } -TEST_F(TApp, MultiConfig_single) { +TEST_CASE_METHOD(TApp, "MultiConfig_single", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; TempFile tmpini2{"TestIniTmp2.ini"}; @@ -702,37 +700,37 @@ TEST_F(TApp, MultiConfig_single) { args = {"--config", tmpini2, "--config", tmpini}; run(); - EXPECT_EQ(99, two); - EXPECT_EQ(3, three); - EXPECT_EQ(0, one); + CHECK(two == 99); + CHECK(three == 3); + CHECK(one == 0); two = 0; args = {"--config", tmpini, "--config", tmpini2}; run(); - EXPECT_EQ(0, two); - EXPECT_EQ(4, three); - EXPECT_EQ(55, one); + CHECK(two == 0); + CHECK(three == 4); + CHECK(one == 55); } -TEST_F(TApp, IniRequiredNotFound) { +TEST_CASE_METHOD(TApp, "IniRequiredNotFound", "[config]") { std::string noini = "TestIniNotExist.ini"; app.set_config("--config", noini, "", true); - EXPECT_THROW(run(), CLI::FileError); + CHECK_THROWS_AS(run(), CLI::FileError); } -TEST_F(TApp, IniNotRequiredPassedNotFound) { +TEST_CASE_METHOD(TApp, "IniNotRequiredPassedNotFound", "[config]") { std::string noini = "TestIniNotExist.ini"; app.set_config("--config", "", "", false); args = {"--config", noini}; - EXPECT_THROW(run(), CLI::FileError); + CHECK_THROWS_AS(run(), CLI::FileError); } -TEST_F(TApp, IniOverwrite) { +TEST_CASE_METHOD(TApp, "IniOverwrite", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; { @@ -751,10 +749,10 @@ TEST_F(TApp, IniOverwrite) { run(); - EXPECT_EQ(99, two); + CHECK(two == 99); } -TEST_F(TApp, IniRequired) { +TEST_CASE_METHOD(TApp, "IniRequired", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -775,28 +773,28 @@ TEST_F(TApp, IniRequired) { args = {"--one=1"}; run(); - EXPECT_EQ(one, 1); - EXPECT_EQ(two, 99); - EXPECT_EQ(three, 3); + CHECK(1 == one); + CHECK(99 == two); + CHECK(3 == three); one = two = three = 0; args = {"--one=1", "--two=2"}; - EXPECT_NO_THROW(run()); - EXPECT_EQ(one, 1); - EXPECT_EQ(two, 2); - EXPECT_EQ(three, 3); + CHECK_NOTHROW(run()); + CHECK(1 == one); + CHECK(2 == two); + CHECK(3 == three); args = {}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--two=2"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } -TEST_F(TApp, IniVector) { +TEST_CASE_METHOD(TApp, "IniVector", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -815,10 +813,10 @@ TEST_F(TApp, IniVector) { run(); - EXPECT_EQ(std::vector<int>({2, 3}), two); - EXPECT_EQ(std::vector<int>({1, 2, 3}), three); + CHECK(two == std::vector<int>({2, 3})); + CHECK(three == std::vector<int>({1, 2, 3})); } -TEST_F(TApp, TOMLVector) { +TEST_CASE_METHOD(TApp, "TOMLVector", "[config]") { TempFile tmptoml{"TestTomlTmp.toml"}; @@ -838,11 +836,11 @@ TEST_F(TApp, TOMLVector) { run(); - EXPECT_EQ(std::vector<int>({2, 3}), two); - EXPECT_EQ(std::vector<int>({1, 2, 3}), three); + CHECK(two == std::vector<int>({2, 3})); + CHECK(three == std::vector<int>({1, 2, 3})); } -TEST_F(TApp, ColonValueSep) { +TEST_CASE_METHOD(TApp, "ColonValueSep", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -864,11 +862,11 @@ TEST_F(TApp, ColonValueSep) { run(); - EXPECT_EQ(2, two); - EXPECT_EQ(3, three); + CHECK(two == 2); + CHECK(three == 3); } -TEST_F(TApp, TOMLVectordirect) { +TEST_CASE_METHOD(TApp, "TOMLVectordirect", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -890,11 +888,11 @@ TEST_F(TApp, TOMLVectordirect) { run(); - EXPECT_EQ(std::vector<int>({2, 3}), two); - EXPECT_EQ(std::vector<int>({1, 2, 3}), three); + CHECK(two == std::vector<int>({2, 3})); + CHECK(three == std::vector<int>({1, 2, 3})); } -TEST_F(TApp, TOMLStringVector) { +TEST_CASE_METHOD(TApp, "TOMLStringVector", "[config]") { TempFile tmptoml{"TestTomlTmp.toml"}; @@ -914,11 +912,11 @@ TEST_F(TApp, TOMLStringVector) { run(); - EXPECT_EQ(std::vector<std::string>({"2", "3"}), two); - EXPECT_EQ(std::vector<std::string>({"1", "2", "3"}), three); + CHECK(two == std::vector<std::string>({"2", "3"})); + CHECK(three == std::vector<std::string>({"1", "2", "3"})); } -TEST_F(TApp, IniVectorCsep) { +TEST_CASE_METHOD(TApp, "IniVectorCsep", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -938,11 +936,11 @@ TEST_F(TApp, IniVectorCsep) { run(); - EXPECT_EQ(std::vector<int>({2, 3}), two); - EXPECT_EQ(std::vector<int>({1, 2, 3}), three); + CHECK(two == std::vector<int>({2, 3})); + CHECK(three == std::vector<int>({1, 2, 3})); } -TEST_F(TApp, IniVectorMultiple) { +TEST_CASE_METHOD(TApp, "IniVectorMultiple", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -965,11 +963,11 @@ TEST_F(TApp, IniVectorMultiple) { run(); - EXPECT_EQ(std::vector<int>({2, 3}), two); - EXPECT_EQ(std::vector<int>({1, 2, 3}), three); + CHECK(two == std::vector<int>({2, 3})); + CHECK(three == std::vector<int>({1, 2, 3})); } -TEST_F(TApp, IniLayered) { +TEST_CASE_METHOD(TApp, "IniLayered", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -993,15 +991,15 @@ TEST_F(TApp, IniLayered) { run(); - EXPECT_EQ(1, one); - EXPECT_EQ(2, two); - EXPECT_EQ(3, three); + CHECK(one == 1); + CHECK(two == 2); + CHECK(three == 3); - EXPECT_EQ(subcom->count(), 0U); - EXPECT_FALSE(*subcom); + CHECK(0U == subcom->count()); + CHECK(!*subcom); } -TEST_F(TApp, IniLayeredDotSection) { +TEST_CASE_METHOD(TApp, "IniLayeredDotSection", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1026,15 +1024,15 @@ TEST_F(TApp, IniLayeredDotSection) { run(); - EXPECT_EQ(1, one); - EXPECT_EQ(2, two); - EXPECT_EQ(3, three); + CHECK(one == 1); + CHECK(two == 2); + CHECK(three == 3); - EXPECT_EQ(subcom->count(), 0U); - EXPECT_FALSE(*subcom); + CHECK(0U == subcom->count()); + CHECK(!*subcom); } -TEST_F(TApp, IniSubcommandConfigurable) { +TEST_CASE_METHOD(TApp, "IniSubcommandConfigurable", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1059,16 +1057,16 @@ TEST_F(TApp, IniSubcommandConfigurable) { run(); - EXPECT_EQ(1, one); - EXPECT_EQ(2, two); - EXPECT_EQ(3, three); + CHECK(one == 1); + CHECK(two == 2); + CHECK(three == 3); - EXPECT_EQ(subcom->count(), 1U); - EXPECT_TRUE(*subcom); - EXPECT_TRUE(app.got_subcommand(subcom)); + CHECK(1U == subcom->count()); + CHECK(*subcom); + CHECK(app.got_subcommand(subcom)); } -TEST_F(TApp, IniSubcommandConfigurablePreParse) { +TEST_CASE_METHOD(TApp, "IniSubcommandConfigurablePreParse", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1098,18 +1096,18 @@ TEST_F(TApp, IniSubcommandConfigurablePreParse) { run(); - EXPECT_EQ(1, one); - EXPECT_EQ(2, two); - EXPECT_EQ(3, three); - EXPECT_EQ(0, four); + CHECK(one == 1); + CHECK(two == 2); + CHECK(three == 3); + CHECK(four == 0); - EXPECT_EQ(parse_c.size(), 1U); - EXPECT_EQ(parse_c[0], 2U); + CHECK(1U == parse_c.size()); + CHECK(2U == parse_c[0]); - EXPECT_EQ(subcom2->count(), 0U); + CHECK(0U == subcom2->count()); } -TEST_F(TApp, IniSubcommandConfigurableParseComplete) { +TEST_CASE_METHOD(TApp, "IniSubcommandConfigurableParseComplete", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1142,19 +1140,19 @@ TEST_F(TApp, IniSubcommandConfigurableParseComplete) { run(); - EXPECT_EQ(1, one); - EXPECT_EQ(2, two); - EXPECT_EQ(3, three); - EXPECT_EQ(0, four); + CHECK(one == 1); + CHECK(two == 2); + CHECK(three == 3); + CHECK(four == 0); - ASSERT_EQ(parse_c.size(), 2u); - EXPECT_EQ(parse_c[0], 68U); - EXPECT_EQ(parse_c[1], 58U); - EXPECT_EQ(subsubcom->count(), 1u); - EXPECT_EQ(subcom2->count(), 0u); + REQUIRE(2u == parse_c.size()); + CHECK(68U == parse_c[0]); + CHECK(58U == parse_c[1]); + CHECK(1u == subsubcom->count()); + CHECK(0u == subcom2->count()); } -TEST_F(TApp, IniSubcommandMultipleSections) { +TEST_CASE_METHOD(TApp, "IniSubcommandMultipleSections", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1190,19 +1188,19 @@ TEST_F(TApp, IniSubcommandMultipleSections) { run(); - EXPECT_EQ(1, one); - EXPECT_EQ(2, two); - EXPECT_EQ(3, three); - EXPECT_EQ(4, four); + CHECK(one == 1); + CHECK(two == 2); + CHECK(three == 3); + CHECK(four == 4); - ASSERT_EQ(parse_c.size(), 2u); - EXPECT_EQ(parse_c[0], 68U); - EXPECT_EQ(parse_c[1], 58U); - EXPECT_EQ(subsubcom->count(), 1u); - EXPECT_EQ(subcom2->count(), 0u); // not configurable but value is updated + REQUIRE(2u == parse_c.size()); + CHECK(68U == parse_c[0]); + CHECK(58U == parse_c[1]); + CHECK(1u == subsubcom->count()); + CHECK(0u == subcom2->count()); } -TEST_F(TApp, DuplicateSubcommandCallbacks) { +TEST_CASE_METHOD(TApp, "DuplicateSubcommandCallbacks", "[config]") { TempFile tmptoml{"TesttomlTmp.toml"}; @@ -1219,14 +1217,14 @@ TEST_F(TApp, DuplicateSubcommandCallbacks) { int count{0}; foo->callback([&count]() { ++count; }); foo->immediate_callback(); - EXPECT_TRUE(foo->get_immediate_callback()); + CHECK(foo->get_immediate_callback()); foo->configurable(); run(); - EXPECT_EQ(count, 3); + CHECK(3 == count); } -TEST_F(TApp, IniFailure) { +TEST_CASE_METHOD(TApp, "IniFailure", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1238,10 +1236,10 @@ TEST_F(TApp, IniFailure) { out << "val=1" << std::endl; } - EXPECT_THROW(run(), CLI::ConfigError); + CHECK_THROWS_AS(run(), CLI::ConfigError); } -TEST_F(TApp, IniConfigurable) { +TEST_CASE_METHOD(TApp, "IniConfigurable", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1255,11 +1253,11 @@ TEST_F(TApp, IniConfigurable) { out << "val=1" << std::endl; } - ASSERT_NO_THROW(run()); - EXPECT_TRUE(value); + REQUIRE_NOTHROW(run()); + CHECK(value); } -TEST_F(TApp, IniNotConfigurable) { +TEST_CASE_METHOD(TApp, "IniNotConfigurable", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1273,10 +1271,10 @@ TEST_F(TApp, IniNotConfigurable) { out << "val=1" << std::endl; } - EXPECT_THROW(run(), CLI::ConfigError); + CHECK_THROWS_AS(run(), CLI::ConfigError); } -TEST_F(TApp, IniSubFailure) { +TEST_CASE_METHOD(TApp, "IniSubFailure", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1289,10 +1287,10 @@ TEST_F(TApp, IniSubFailure) { out << "val=1" << std::endl; } - EXPECT_THROW(run(), CLI::ConfigError); + CHECK_THROWS_AS(run(), CLI::ConfigError); } -TEST_F(TApp, IniNoSubFailure) { +TEST_CASE_METHOD(TApp, "IniNoSubFailure", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1304,10 +1302,10 @@ TEST_F(TApp, IniNoSubFailure) { out << "val=1" << std::endl; } - EXPECT_THROW(run(), CLI::ConfigError); + CHECK_THROWS_AS(run(), CLI::ConfigError); } -TEST_F(TApp, IniFlagConvertFailure) { +TEST_CASE_METHOD(TApp, "IniFlagConvertFailure", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1321,13 +1319,13 @@ TEST_F(TApp, IniFlagConvertFailure) { run(); bool result{false}; auto *opt = app.get_option("--flag"); - EXPECT_THROW(opt->results(result), CLI::ConversionError); + CHECK_THROWS_AS(opt->results(result), CLI::ConversionError); std::string res; opt->results(res); - EXPECT_EQ(res, "moobook"); + CHECK("moobook" == res); } -TEST_F(TApp, IniFlagNumbers) { +TEST_CASE_METHOD(TApp, "IniFlagNumbers", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1340,11 +1338,11 @@ TEST_F(TApp, IniFlagNumbers) { out << "flag=3" << std::endl; } - ASSERT_NO_THROW(run()); - EXPECT_TRUE(boo); + REQUIRE_NOTHROW(run()); + CHECK(boo); } -TEST_F(TApp, IniFlagDual) { +TEST_CASE_METHOD(TApp, "IniFlagDual", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1358,10 +1356,10 @@ TEST_F(TApp, IniFlagDual) { out << "flag=1 1" << std::endl; } - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, IniShort) { +TEST_CASE_METHOD(TApp, "IniShort", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1374,11 +1372,11 @@ TEST_F(TApp, IniShort) { out << "f=3" << std::endl; } - ASSERT_NO_THROW(run()); - EXPECT_EQ(key, 3); + REQUIRE_NOTHROW(run()); + CHECK(3 == key); } -TEST_F(TApp, IniPositional) { +TEST_CASE_METHOD(TApp, "IniPositional", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1391,11 +1389,11 @@ TEST_F(TApp, IniPositional) { out << "key=3" << std::endl; } - ASSERT_NO_THROW(run()); - EXPECT_EQ(key, 3); + REQUIRE_NOTHROW(run()); + CHECK(3 == key); } -TEST_F(TApp, IniEnvironmental) { +TEST_CASE_METHOD(TApp, "IniEnvironmental", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1408,11 +1406,11 @@ TEST_F(TApp, IniEnvironmental) { out << "CLI11_TEST_ENV_KEY_TMP=3" << std::endl; } - ASSERT_NO_THROW(run()); - EXPECT_EQ(key, 3); + REQUIRE_NOTHROW(run()); + CHECK(3 == key); } -TEST_F(TApp, IniFlagText) { +TEST_CASE_METHOD(TApp, "IniFlagText", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1433,13 +1431,13 @@ TEST_F(TApp, IniFlagText) { run(); - EXPECT_TRUE(flag1); - EXPECT_TRUE(flag2); - EXPECT_FALSE(flag3); - EXPECT_TRUE(flag4); + CHECK(flag1); + CHECK(flag2); + CHECK(!flag3); + CHECK(flag4); } -TEST_F(TApp, IniFlags) { +TEST_CASE_METHOD(TApp, "IniFlags", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); @@ -1461,13 +1459,13 @@ TEST_F(TApp, IniFlags) { run(); - EXPECT_EQ(2, two); - EXPECT_TRUE(three); - EXPECT_TRUE(four); - EXPECT_TRUE(five); + CHECK(two == 2); + CHECK(three); + CHECK(four); + CHECK(five); } -TEST_F(TApp, IniFalseFlags) { +TEST_CASE_METHOD(TApp, "IniFalseFlags", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); @@ -1489,13 +1487,13 @@ TEST_F(TApp, IniFalseFlags) { run(); - EXPECT_EQ(-2, two); - EXPECT_FALSE(three); - EXPECT_TRUE(four); - EXPECT_TRUE(five); + CHECK(two == -2); + CHECK(!three); + CHECK(four); + CHECK(five); } -TEST_F(TApp, IniFalseFlagsDef) { +TEST_CASE_METHOD(TApp, "IniFalseFlagsDef", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); @@ -1517,13 +1515,13 @@ TEST_F(TApp, IniFalseFlagsDef) { run(); - EXPECT_EQ(-2, two); - EXPECT_TRUE(three); - EXPECT_FALSE(four); - EXPECT_TRUE(five); + CHECK(two == -2); + CHECK(three); + CHECK(!four); + CHECK(five); } -TEST_F(TApp, IniFalseFlagsDefDisableOverrideError) { +TEST_CASE_METHOD(TApp, "IniFalseFlagsDefDisableOverrideError", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); @@ -1541,10 +1539,10 @@ TEST_F(TApp, IniFalseFlagsDefDisableOverrideError) { app.add_flag("!--four", four); app.add_flag("--five", five); - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, IniFalseFlagsDefDisableOverrideSuccess) { +TEST_CASE_METHOD(TApp, "IniFalseFlagsDefDisableOverrideSuccess", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); @@ -1563,12 +1561,12 @@ TEST_F(TApp, IniFalseFlagsDefDisableOverrideSuccess) { run(); - EXPECT_EQ(2, two); - EXPECT_EQ(4, four); - EXPECT_EQ(15, val); + CHECK(two == 2); + CHECK(four == 4); + CHECK(val == 15); } -TEST_F(TApp, TomlOutputSimple) { +TEST_CASE_METHOD(TApp, "TomlOutputSimple", "[config]") { int v{0}; app.add_option("--simple", v); @@ -1578,10 +1576,10 @@ TEST_F(TApp, TomlOutputSimple) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("simple=3\n", str); + CHECK(str == "simple=3\n"); } -TEST_F(TApp, TomlOutputShort) { +TEST_CASE_METHOD(TApp, "TomlOutputShort", "[config]") { int v{0}; app.add_option("-s", v); @@ -1591,10 +1589,10 @@ TEST_F(TApp, TomlOutputShort) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("s=3\n", str); + CHECK(str == "s=3\n"); } -TEST_F(TApp, TomlOutputPositional) { +TEST_CASE_METHOD(TApp, "TomlOutputPositional", "[config]") { int v{0}; app.add_option("pos", v); @@ -1604,11 +1602,11 @@ TEST_F(TApp, TomlOutputPositional) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("pos=3\n", str); + CHECK(str == "pos=3\n"); } // try the output with environmental only arguments -TEST_F(TApp, TomlOutputEnvironmental) { +TEST_CASE_METHOD(TApp, "TomlOutputEnvironmental", "[config]") { put_env("CLI11_TEST_ENV_TMP", "2"); @@ -1617,14 +1615,14 @@ TEST_F(TApp, TomlOutputEnvironmental) { run(); - EXPECT_EQ(2, val); + CHECK(val == 2); std::string str = app.config_to_str(); - EXPECT_EQ("CLI11_TEST_ENV_TMP=2\n", str); + CHECK(str == "CLI11_TEST_ENV_TMP=2\n"); unset_env("CLI11_TEST_ENV_TMP"); } -TEST_F(TApp, TomlOutputNoConfigurable) { +TEST_CASE_METHOD(TApp, "TomlOutputNoConfigurable", "[config]") { int v1{0}, v2{0}; app.add_option("--simple", v1); @@ -1635,10 +1633,10 @@ TEST_F(TApp, TomlOutputNoConfigurable) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("simple=3\n", str); + CHECK(str == "simple=3\n"); } -TEST_F(TApp, TomlOutputShortSingleDescription) { +TEST_CASE_METHOD(TApp, "TomlOutputShortSingleDescription", "[config]") { std::string flag = "some_flag"; const std::string description = "Some short description."; app.add_flag("--" + flag, description); @@ -1646,10 +1644,10 @@ TEST_F(TApp, TomlOutputShortSingleDescription) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("# " + description + "\n" + flag + "=false\n")); + CHECK_THAT(str, Contains("# " + description + "\n" + flag + "=false\n")); } -TEST_F(TApp, TomlOutputShortDoubleDescription) { +TEST_CASE_METHOD(TApp, "TomlOutputShortDoubleDescription", "[config]") { std::string flag1 = "flagnr1"; std::string flag2 = "flagnr2"; const std::string description1 = "First description."; @@ -1660,11 +1658,11 @@ TEST_F(TApp, TomlOutputShortDoubleDescription) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT( - str, HasSubstr("# " + description1 + "\n" + flag1 + "=false\n\n# " + description2 + "\n" + flag2 + "=false\n")); + std::string ans = "# " + description1 + "\n" + flag1 + "=false\n\n# " + description2 + "\n" + flag2 + "=false\n"; + CHECK_THAT(str, Contains(ans)); } -TEST_F(TApp, TomlOutputGroups) { +TEST_CASE_METHOD(TApp, "TomlOutputGroups", "[config]") { std::string flag1 = "flagnr1"; std::string flag2 = "flagnr2"; const std::string description1 = "First description."; @@ -1675,11 +1673,11 @@ TEST_F(TApp, TomlOutputGroups) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("group1")); - EXPECT_THAT(str, HasSubstr("group2")); + CHECK_THAT(str, Contains("group1")); + CHECK_THAT(str, Contains("group2")); } -TEST_F(TApp, TomlOutputHiddenOptions) { +TEST_CASE_METHOD(TApp, "TomlOutputHiddenOptions", "[config]") { std::string flag1 = "flagnr1"; std::string flag2 = "flagnr2"; double val{12.7}; @@ -1692,18 +1690,18 @@ TEST_F(TApp, TomlOutputHiddenOptions) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("group1")); - EXPECT_THAT(str, HasSubstr("group2")); - EXPECT_THAT(str, HasSubstr("dval=12.7")); + CHECK_THAT(str, Contains("group1")); + CHECK_THAT(str, Contains("group2")); + CHECK_THAT(str, Contains("dval=12.7")); auto loc = str.find("dval=12.7"); auto locg1 = str.find("group1"); - EXPECT_GT(locg1, loc); + CHECK(loc < locg1); // make sure it doesn't come twice loc = str.find("dval=12.7", loc + 4); - EXPECT_EQ(loc, std::string::npos); + CHECK(std::string::npos == loc); } -TEST_F(TApp, TomlOutputMultiLineDescription) { +TEST_CASE_METHOD(TApp, "TomlOutputMultiLineDescription", "[config]") { std::string flag = "some_flag"; const std::string description = "Some short description.\nThat has lines."; app.add_flag("--" + flag, description); @@ -1711,12 +1709,12 @@ TEST_F(TApp, TomlOutputMultiLineDescription) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("# Some short description.\n")); - EXPECT_THAT(str, HasSubstr("# That has lines.\n")); - EXPECT_THAT(str, HasSubstr(flag + "=false\n")); + CHECK_THAT(str, Contains("# Some short description.\n")); + CHECK_THAT(str, Contains("# That has lines.\n")); + CHECK_THAT(str, Contains(flag + "=false\n")); } -TEST_F(TApp, TomlOutputOptionGroup) { +TEST_CASE_METHOD(TApp, "TomlOutputOptionGroup", "[config]") { std::string flag1 = "flagnr1"; std::string flag2 = "flagnr2"; double val{12.7}; @@ -1730,22 +1728,22 @@ TEST_F(TApp, TomlOutputOptionGroup) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("group1")); - EXPECT_THAT(str, HasSubstr("group2")); - EXPECT_THAT(str, HasSubstr("dval=12.7")); - EXPECT_THAT(str, HasSubstr("group3")); - EXPECT_THAT(str, HasSubstr("g3 desc")); + CHECK_THAT(str, Contains("group1")); + CHECK_THAT(str, Contains("group2")); + CHECK_THAT(str, Contains("dval=12.7")); + CHECK_THAT(str, Contains("group3")); + CHECK_THAT(str, Contains("g3 desc")); auto loc = str.find("dval=12.7"); auto locg1 = str.find("group1"); auto locg3 = str.find("group3"); - EXPECT_LT(locg1, loc); + CHECK(loc > locg1); // make sure it doesn't come twice loc = str.find("dval=12.7", loc + 4); - EXPECT_EQ(loc, std::string::npos); - EXPECT_GT(locg3, locg1); + CHECK(std::string::npos == loc); + CHECK(locg1 < locg3); } -TEST_F(TApp, TomlOutputVector) { +TEST_CASE_METHOD(TApp, "TomlOutputVector", "[config]") { std::vector<int> v; app.add_option("--vector", v); @@ -1755,10 +1753,10 @@ TEST_F(TApp, TomlOutputVector) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("vector=[1, 2, 3]\n", str); + CHECK(str == "vector=[1, 2, 3]\n"); } -TEST_F(TApp, ConfigOutputVectorCustom) { +TEST_CASE_METHOD(TApp, "ConfigOutputVectorCustom", "[config]") { std::vector<int> v; app.add_option("--vector", v); @@ -1770,10 +1768,10 @@ TEST_F(TApp, ConfigOutputVectorCustom) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("vector:{1; 2; 3}\n", str); + CHECK(str == "vector:{1; 2; 3}\n"); } -TEST_F(TApp, TomlOutputFlag) { +TEST_CASE_METHOD(TApp, "TomlOutputFlag", "[config]") { int v{0}, q{0}; app.add_option("--simple", v); @@ -1786,16 +1784,16 @@ TEST_F(TApp, TomlOutputFlag) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=3")); - EXPECT_THAT(str, Not(HasSubstr("nothing"))); - EXPECT_THAT(str, HasSubstr("onething=true")); - EXPECT_THAT(str, HasSubstr("something=[true, true]")); + CHECK_THAT(str, Contains("simple=3")); + CHECK_THAT(str, !Contains("nothing")); + CHECK_THAT(str, Contains("onething=true")); + CHECK_THAT(str, Contains("something=[true, true]")); str = app.config_to_str(true); - EXPECT_THAT(str, HasSubstr("nothing")); + CHECK_THAT(str, Contains("nothing")); } -TEST_F(TApp, TomlOutputSet) { +TEST_CASE_METHOD(TApp, "TomlOutputSet", "[config]") { int v{0}; app.add_option("--simple", v)->check(CLI::IsMember({1, 2, 3})); @@ -1805,10 +1803,10 @@ TEST_F(TApp, TomlOutputSet) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=2")); + CHECK_THAT(str, Contains("simple=2")); } -TEST_F(TApp, TomlOutputDefault) { +TEST_CASE_METHOD(TApp, "TomlOutputDefault", "[config]") { int v{7}; app.add_option("--simple", v, "", true); @@ -1816,13 +1814,13 @@ TEST_F(TApp, TomlOutputDefault) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, Not(HasSubstr("simple=7"))); + CHECK_THAT(str, !Contains("simple=7")); str = app.config_to_str(true); - EXPECT_THAT(str, HasSubstr("simple=7")); + CHECK_THAT(str, Contains("simple=7")); } -TEST_F(TApp, TomlOutputSubcom) { +TEST_CASE_METHOD(TApp, "TomlOutputSubcom", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other"); @@ -1832,11 +1830,11 @@ TEST_F(TApp, TomlOutputSubcom) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("other.newer=true")); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("other.newer=true")); } -TEST_F(TApp, TomlOutputSubcomConfigurable) { +TEST_CASE_METHOD(TApp, "TomlOutputSubcomConfigurable", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other")->configurable(); @@ -1846,13 +1844,13 @@ TEST_F(TApp, TomlOutputSubcomConfigurable) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("[other]")); - EXPECT_THAT(str, HasSubstr("newer=true")); - EXPECT_EQ(str.find("other.newer=true"), std::string::npos); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("[other]")); + CHECK_THAT(str, Contains("newer=true")); + CHECK(std::string::npos == str.find("other.newer=true")); } -TEST_F(TApp, TomlOutputSubsubcom) { +TEST_CASE_METHOD(TApp, "TomlOutputSubsubcom", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other"); @@ -1864,12 +1862,12 @@ TEST_F(TApp, TomlOutputSubsubcom) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("other.newer=true")); - EXPECT_THAT(str, HasSubstr("other.sub2.newest=true")); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("other.newer=true")); + CHECK_THAT(str, Contains("other.sub2.newest=true")); } -TEST_F(TApp, TomlOutputSubsubcomConfigurable) { +TEST_CASE_METHOD(TApp, "TomlOutputSubsubcomConfigurable", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other")->configurable(); @@ -1882,15 +1880,15 @@ TEST_F(TApp, TomlOutputSubsubcomConfigurable) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("[other]")); - EXPECT_THAT(str, HasSubstr("newer=true")); - EXPECT_THAT(str, HasSubstr("[other.sub2]")); - EXPECT_THAT(str, HasSubstr("newest=true")); - EXPECT_EQ(str.find("sub2.newest=true"), std::string::npos); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("[other]")); + CHECK_THAT(str, Contains("newer=true")); + CHECK_THAT(str, Contains("[other.sub2]")); + CHECK_THAT(str, Contains("newest=true")); + CHECK(std::string::npos == str.find("sub2.newest=true")); } -TEST_F(TApp, TomlOutputSubsubcomConfigurableDeep) { +TEST_CASE_METHOD(TApp, "TomlOutputSubsubcomConfigurableDeep", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other")->configurable(); @@ -1907,13 +1905,13 @@ TEST_F(TApp, TomlOutputSubsubcomConfigurableDeep) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("[other.sub2.sub-level2.sub-level3]")); - EXPECT_THAT(str, HasSubstr("absolute_newest=true")); - EXPECT_EQ(str.find(".absolute_newest=true"), std::string::npos); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("[other.sub2.sub-level2.sub-level3]")); + CHECK_THAT(str, Contains("absolute_newest=true")); + CHECK(std::string::npos == str.find(".absolute_newest=true")); } -TEST_F(TApp, TomlOutputQuoted) { +TEST_CASE_METHOD(TApp, "TomlOutputQuoted", "[config]") { std::string val1; app.add_option("--val1", val1); @@ -1925,15 +1923,15 @@ TEST_F(TApp, TomlOutputQuoted) { run(); - EXPECT_EQ("I am a string", val1); - EXPECT_EQ("I am a \"confusing\" string", val2); + CHECK(val1 == "I am a string"); + CHECK(val2 == "I am a \"confusing\" string"); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("val1=\"I am a string\"")); - EXPECT_THAT(str, HasSubstr("val2='I am a \"confusing\" string'")); + CHECK_THAT(str, Contains("val1=\"I am a string\"")); + CHECK_THAT(str, Contains("val2='I am a \"confusing\" string'")); } -TEST_F(TApp, DefaultsTomlOutputQuoted) { +TEST_CASE_METHOD(TApp, "DefaultsTomlOutputQuoted", "[config]") { std::string val1{"I am a string"}; app.add_option("--val1", val1, "", true); @@ -1944,18 +1942,18 @@ TEST_F(TApp, DefaultsTomlOutputQuoted) { run(); std::string str = app.config_to_str(true); - EXPECT_THAT(str, HasSubstr("val1=\"I am a string\"")); - EXPECT_THAT(str, HasSubstr("val2='I am a \"confusing\" string'")); + CHECK_THAT(str, Contains("val1=\"I am a string\"")); + CHECK_THAT(str, Contains("val2='I am a \"confusing\" string'")); } // #298 -TEST_F(TApp, StopReadingConfigOnClear) { +TEST_CASE_METHOD(TApp, "StopReadingConfigOnClear", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; app.set_config("--config", tmpini); auto ptr = app.set_config(); // Should *not* read config file - EXPECT_EQ(ptr, nullptr); + CHECK(nullptr == ptr); { std::ofstream out{tmpini}; @@ -1967,10 +1965,10 @@ TEST_F(TApp, StopReadingConfigOnClear) { run(); - EXPECT_EQ(volume, 0); + CHECK(0 == volume); } -TEST_F(TApp, ConfigWriteReadWrite) { +TEST_CASE_METHOD(TApp, "ConfigWriteReadWrite", "[config]") { TempFile tmpini{"TestIniTmp.ini"}; @@ -1989,12 +1987,12 @@ TEST_F(TApp, ConfigWriteReadWrite) { std::string config2 = app.config_to_str(true, true); - EXPECT_EQ(config1, config2); + CHECK(config2 == config1); } /////// INI output tests -TEST_F(TApp, IniOutputSimple) { +TEST_CASE_METHOD(TApp, "IniOutputSimple", "[config]") { int v{0}; app.add_option("--simple", v); @@ -2004,10 +2002,10 @@ TEST_F(TApp, IniOutputSimple) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("simple=3\n", str); + CHECK(str == "simple=3\n"); } -TEST_F(TApp, IniOutputNoConfigurable) { +TEST_CASE_METHOD(TApp, "IniOutputNoConfigurable", "[config]") { int v1{0}, v2{0}; app.add_option("--simple", v1); @@ -2018,10 +2016,10 @@ TEST_F(TApp, IniOutputNoConfigurable) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("simple=3\n", str); + CHECK(str == "simple=3\n"); } -TEST_F(TApp, IniOutputShortSingleDescription) { +TEST_CASE_METHOD(TApp, "IniOutputShortSingleDescription", "[config]") { std::string flag = "some_flag"; const std::string description = "Some short description."; app.add_flag("--" + flag, description); @@ -2029,10 +2027,10 @@ TEST_F(TApp, IniOutputShortSingleDescription) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("; " + description + "\n" + flag + "=false\n")); + CHECK_THAT(str, Contains("; " + description + "\n" + flag + "=false\n")); } -TEST_F(TApp, IniOutputShortDoubleDescription) { +TEST_CASE_METHOD(TApp, "IniOutputShortDoubleDescription", "[config]") { std::string flag1 = "flagnr1"; std::string flag2 = "flagnr2"; const std::string description1 = "First description."; @@ -2043,11 +2041,11 @@ TEST_F(TApp, IniOutputShortDoubleDescription) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT( - str, HasSubstr("; " + description1 + "\n" + flag1 + "=false\n\n; " + description2 + "\n" + flag2 + "=false\n")); + std::string ans = "; " + description1 + "\n" + flag1 + "=false\n\n; " + description2 + "\n" + flag2 + "=false\n"; + CHECK_THAT(str, Contains(ans)); } -TEST_F(TApp, IniOutputGroups) { +TEST_CASE_METHOD(TApp, "IniOutputGroups", "[config]") { std::string flag1 = "flagnr1"; std::string flag2 = "flagnr2"; const std::string description1 = "First description."; @@ -2058,11 +2056,11 @@ TEST_F(TApp, IniOutputGroups) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("group1")); - EXPECT_THAT(str, HasSubstr("group2")); + CHECK_THAT(str, Contains("group1")); + CHECK_THAT(str, Contains("group2")); } -TEST_F(TApp, IniOutputHiddenOptions) { +TEST_CASE_METHOD(TApp, "IniOutputHiddenOptions", "[config]") { std::string flag1 = "flagnr1"; std::string flag2 = "flagnr2"; double val{12.7}; @@ -2075,18 +2073,18 @@ TEST_F(TApp, IniOutputHiddenOptions) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("group1")); - EXPECT_THAT(str, HasSubstr("group2")); - EXPECT_THAT(str, HasSubstr("dval=12.7")); + CHECK_THAT(str, Contains("group1")); + CHECK_THAT(str, Contains("group2")); + CHECK_THAT(str, Contains("dval=12.7")); auto loc = str.find("dval=12.7"); auto locg1 = str.find("group1"); - EXPECT_GT(locg1, loc); + CHECK(loc < locg1); // make sure it doesn't come twice loc = str.find("dval=12.7", loc + 4); - EXPECT_EQ(loc, std::string::npos); + CHECK(std::string::npos == loc); } -TEST_F(TApp, IniOutputMultiLineDescription) { +TEST_CASE_METHOD(TApp, "IniOutputMultiLineDescription", "[config]") { std::string flag = "some_flag"; const std::string description = "Some short description.\nThat has lines."; app.add_flag("--" + flag, description); @@ -2094,12 +2092,12 @@ TEST_F(TApp, IniOutputMultiLineDescription) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("; Some short description.\n")); - EXPECT_THAT(str, HasSubstr("; That has lines.\n")); - EXPECT_THAT(str, HasSubstr(flag + "=false\n")); + CHECK_THAT(str, Contains("; Some short description.\n")); + CHECK_THAT(str, Contains("; That has lines.\n")); + CHECK_THAT(str, Contains(flag + "=false\n")); } -TEST_F(TApp, IniOutputOptionGroup) { +TEST_CASE_METHOD(TApp, "IniOutputOptionGroup", "[config]") { std::string flag1 = "flagnr1"; std::string flag2 = "flagnr2"; double val{12.7}; @@ -2113,22 +2111,22 @@ TEST_F(TApp, IniOutputOptionGroup) { run(); std::string str = app.config_to_str(true, true); - EXPECT_THAT(str, HasSubstr("group1")); - EXPECT_THAT(str, HasSubstr("group2")); - EXPECT_THAT(str, HasSubstr("dval=12.7")); - EXPECT_THAT(str, HasSubstr("group3")); - EXPECT_THAT(str, HasSubstr("g3 desc")); + CHECK_THAT(str, Contains("group1")); + CHECK_THAT(str, Contains("group2")); + CHECK_THAT(str, Contains("dval=12.7")); + CHECK_THAT(str, Contains("group3")); + CHECK_THAT(str, Contains("g3 desc")); auto loc = str.find("dval=12.7"); auto locg1 = str.find("group1"); auto locg3 = str.find("group3"); - EXPECT_LT(locg1, loc); + CHECK(loc > locg1); // make sure it doesn't come twice loc = str.find("dval=12.7", loc + 4); - EXPECT_EQ(loc, std::string::npos); - EXPECT_GT(locg3, locg1); + CHECK(std::string::npos == loc); + CHECK(locg1 < locg3); } -TEST_F(TApp, IniOutputVector) { +TEST_CASE_METHOD(TApp, "IniOutputVector", "[config]") { std::vector<int> v; app.add_option("--vector", v); @@ -2138,10 +2136,10 @@ TEST_F(TApp, IniOutputVector) { run(); std::string str = app.config_to_str(); - EXPECT_EQ("vector=1 2 3\n", str); + CHECK(str == "vector=1 2 3\n"); } -TEST_F(TApp, IniOutputFlag) { +TEST_CASE_METHOD(TApp, "IniOutputFlag", "[config]") { int v{0}, q{0}; app.add_option("--simple", v); @@ -2154,16 +2152,16 @@ TEST_F(TApp, IniOutputFlag) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=3")); - EXPECT_THAT(str, Not(HasSubstr("nothing"))); - EXPECT_THAT(str, HasSubstr("onething=true")); - EXPECT_THAT(str, HasSubstr("something=true true")); + CHECK_THAT(str, Contains("simple=3")); + CHECK_THAT(str, !Contains("nothing")); + CHECK_THAT(str, Contains("onething=true")); + CHECK_THAT(str, Contains("something=true true")); str = app.config_to_str(true); - EXPECT_THAT(str, HasSubstr("nothing")); + CHECK_THAT(str, Contains("nothing")); } -TEST_F(TApp, IniOutputSet) { +TEST_CASE_METHOD(TApp, "IniOutputSet", "[config]") { int v{0}; app.add_option("--simple", v)->check(CLI::IsMember({1, 2, 3})); @@ -2173,10 +2171,10 @@ TEST_F(TApp, IniOutputSet) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=2")); + CHECK_THAT(str, Contains("simple=2")); } -TEST_F(TApp, IniOutputDefault) { +TEST_CASE_METHOD(TApp, "IniOutputDefault", "[config]") { int v{7}; app.add_option("--simple", v, "", true); @@ -2184,13 +2182,13 @@ TEST_F(TApp, IniOutputDefault) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, Not(HasSubstr("simple=7"))); + CHECK_THAT(str, !Contains("simple=7")); str = app.config_to_str(true); - EXPECT_THAT(str, HasSubstr("simple=7")); + CHECK_THAT(str, Contains("simple=7")); } -TEST_F(TApp, IniOutputSubcom) { +TEST_CASE_METHOD(TApp, "IniOutputSubcom", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other"); @@ -2200,11 +2198,11 @@ TEST_F(TApp, IniOutputSubcom) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("other.newer=true")); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("other.newer=true")); } -TEST_F(TApp, IniOutputSubcomConfigurable) { +TEST_CASE_METHOD(TApp, "IniOutputSubcomConfigurable", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other")->configurable(); @@ -2214,13 +2212,13 @@ TEST_F(TApp, IniOutputSubcomConfigurable) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("[other]")); - EXPECT_THAT(str, HasSubstr("newer=true")); - EXPECT_EQ(str.find("other.newer=true"), std::string::npos); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("[other]")); + CHECK_THAT(str, Contains("newer=true")); + CHECK(std::string::npos == str.find("other.newer=true")); } -TEST_F(TApp, IniOutputSubsubcom) { +TEST_CASE_METHOD(TApp, "IniOutputSubsubcom", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other"); @@ -2232,12 +2230,12 @@ TEST_F(TApp, IniOutputSubsubcom) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("other.newer=true")); - EXPECT_THAT(str, HasSubstr("other.sub2.newest=true")); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("other.newer=true")); + CHECK_THAT(str, Contains("other.sub2.newest=true")); } -TEST_F(TApp, IniOutputSubsubcomConfigurable) { +TEST_CASE_METHOD(TApp, "IniOutputSubsubcomConfigurable", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other")->configurable(); @@ -2250,15 +2248,15 @@ TEST_F(TApp, IniOutputSubsubcomConfigurable) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("[other]")); - EXPECT_THAT(str, HasSubstr("newer=true")); - EXPECT_THAT(str, HasSubstr("[other.sub2]")); - EXPECT_THAT(str, HasSubstr("newest=true")); - EXPECT_EQ(str.find("sub2.newest=true"), std::string::npos); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("[other]")); + CHECK_THAT(str, Contains("newer=true")); + CHECK_THAT(str, Contains("[other.sub2]")); + CHECK_THAT(str, Contains("newest=true")); + CHECK(std::string::npos == str.find("sub2.newest=true")); } -TEST_F(TApp, IniOutputSubsubcomConfigurableDeep) { +TEST_CASE_METHOD(TApp, "IniOutputSubsubcomConfigurableDeep", "[config]") { app.add_flag("--simple"); auto subcom = app.add_subcommand("other")->configurable(); @@ -2275,13 +2273,13 @@ TEST_F(TApp, IniOutputSubsubcomConfigurableDeep) { run(); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("simple=true")); - EXPECT_THAT(str, HasSubstr("[other.sub2.sub-level2.sub-level3]")); - EXPECT_THAT(str, HasSubstr("absolute_newest=true")); - EXPECT_EQ(str.find(".absolute_newest=true"), std::string::npos); + CHECK_THAT(str, Contains("simple=true")); + CHECK_THAT(str, Contains("[other.sub2.sub-level2.sub-level3]")); + CHECK_THAT(str, Contains("absolute_newest=true")); + CHECK(std::string::npos == str.find(".absolute_newest=true")); } -TEST_F(TApp, IniOutputQuoted) { +TEST_CASE_METHOD(TApp, "IniOutputQuoted", "[config]") { std::string val1; app.add_option("--val1", val1); @@ -2293,15 +2291,15 @@ TEST_F(TApp, IniOutputQuoted) { run(); - EXPECT_EQ("I am a string", val1); - EXPECT_EQ("I am a \"confusing\" string", val2); + CHECK(val1 == "I am a string"); + CHECK(val2 == "I am a \"confusing\" string"); std::string str = app.config_to_str(); - EXPECT_THAT(str, HasSubstr("val1=\"I am a string\"")); - EXPECT_THAT(str, HasSubstr("val2='I am a \"confusing\" string'")); + CHECK_THAT(str, Contains("val1=\"I am a string\"")); + CHECK_THAT(str, Contains("val2='I am a \"confusing\" string'")); } -TEST_F(TApp, DefaultsIniOutputQuoted) { +TEST_CASE_METHOD(TApp, "DefaultsIniOutputQuoted", "[config]") { std::string val1{"I am a string"}; app.add_option("--val1", val1, "", true); @@ -2312,6 +2310,6 @@ TEST_F(TApp, DefaultsIniOutputQuoted) { run(); std::string str = app.config_to_str(true); - EXPECT_THAT(str, HasSubstr("val1=\"I am a string\"")); - EXPECT_THAT(str, HasSubstr("val2='I am a \"confusing\" string'")); + CHECK_THAT(str, Contains("val1=\"I am a string\"")); + CHECK_THAT(str, Contains("val2='I am a \"confusing\" string'")); } diff --git a/packages/CLI11/tests/CreationTest.cpp b/packages/CLI11/tests/CreationTest.cpp index 648c4aebba3de45e1e4e85232eafdd54e4c2ce0c..2a70f70d419552e96e13e0cf3a3c6f524a64ce68 100644 --- a/packages/CLI11/tests/CreationTest.cpp +++ b/packages/CLI11/tests/CreationTest.cpp @@ -7,221 +7,224 @@ #include "app_helper.hpp" #include <cstdlib> -TEST_F(TApp, AddingExistingShort) { +TEST_CASE_METHOD(TApp, "AddingExistingShort", "[creation]") { CLI::Option *opt = app.add_flag("-c,--count"); - EXPECT_EQ(opt->get_lnames(), std::vector<std::string>({"count"})); - EXPECT_EQ(opt->get_snames(), std::vector<std::string>({"c"})); + CHECK(std::vector<std::string>({"count"}) == opt->get_lnames()); + CHECK(std::vector<std::string>({"c"}) == opt->get_snames()); - EXPECT_THROW(app.add_flag("--cat,-c"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_flag("--cat,-c"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingExistingLong) { +TEST_CASE_METHOD(TApp, "AddingExistingLong", "[creation]") { app.add_flag("-q,--count"); - EXPECT_THROW(app.add_flag("--count,-c"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_flag("--count,-c"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingExistingShortNoCase) { +TEST_CASE_METHOD(TApp, "AddingExistingShortNoCase", "[creation]") { app.add_flag("-C,--count")->ignore_case(); - EXPECT_THROW(app.add_flag("--cat,-c"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_flag("--cat,-c"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingExistingLongNoCase) { +TEST_CASE_METHOD(TApp, "AddingExistingLongNoCase", "[creation]") { app.add_flag("-q,--count")->ignore_case(); - EXPECT_THROW(app.add_flag("--Count,-c"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_flag("--Count,-c"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingExistingNoCaseReversed) { +TEST_CASE_METHOD(TApp, "AddingExistingNoCaseReversed", "[creation]") { app.add_flag("-c,--count")->ignore_case(); - EXPECT_THROW(app.add_flag("--cat,-C"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_flag("--cat,-C"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingExistingWithCase) { +TEST_CASE_METHOD(TApp, "AddingExistingWithCase", "[creation]") { app.add_flag("-c,--count"); - EXPECT_NO_THROW(app.add_flag("--Cat,-C")); + CHECK_NOTHROW(app.add_flag("--Cat,-C")); } -TEST_F(TApp, AddingExistingWithCaseAfter) { +TEST_CASE_METHOD(TApp, "AddingExistingWithCaseAfter", "[creation]") { auto count = app.add_flag("-c,--count"); app.add_flag("--Cat,-C"); - EXPECT_THROW(count->ignore_case(), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(count->ignore_case(), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingExistingWithCaseAfter2) { +TEST_CASE_METHOD(TApp, "AddingExistingWithCaseAfter2", "[creation]") { app.add_flag("-c,--count"); auto cat = app.add_flag("--Cat,-C"); - EXPECT_THROW(cat->ignore_case(), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(cat->ignore_case(), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingExistingWithUnderscoreAfter) { +TEST_CASE_METHOD(TApp, "AddingExistingWithUnderscoreAfter", "[creation]") { auto count = app.add_flag("--underscore"); app.add_flag("--under_score"); - EXPECT_THROW(count->ignore_underscore(), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(count->ignore_underscore(), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingExistingWithUnderscoreAfter2) { +TEST_CASE_METHOD(TApp, "AddingExistingWithUnderscoreAfter2", "[creation]") { auto count = app.add_flag("--under_score"); app.add_flag("--underscore"); - EXPECT_THROW(count->ignore_underscore(), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(count->ignore_underscore(), CLI::OptionAlreadyAdded); } -TEST_F(TApp, AddingMultipleInfPositionals) { +TEST_CASE_METHOD(TApp, "AddingMultipleInfPositionals", "[creation]") { std::vector<std::string> one, two; app.add_option("one", one); app.add_option("two", two); - EXPECT_THROW(run(), CLI::InvalidError); + CHECK_THROWS_AS(run(), CLI::InvalidError); } -TEST_F(TApp, AddingMultipleInfPositionalsSubcom) { +TEST_CASE_METHOD(TApp, "AddingMultipleInfPositionalsSubcom", "[creation]") { std::vector<std::string> one, two; CLI::App *below = app.add_subcommand("below"); below->add_option("one", one); below->add_option("two", two); - EXPECT_THROW(run(), CLI::InvalidError); + CHECK_THROWS_AS(run(), CLI::InvalidError); } -TEST_F(TApp, MultipleSubcomMatching) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatching", "[creation]") { app.add_subcommand("first"); app.add_subcommand("second"); app.add_subcommand("Second"); - EXPECT_THROW(app.add_subcommand("first"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_subcommand("first"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, RecoverSubcommands) { +TEST_CASE_METHOD(TApp, "RecoverSubcommands", "[creation]") { CLI::App *app1 = app.add_subcommand("app1"); CLI::App *app2 = app.add_subcommand("app2"); CLI::App *app3 = app.add_subcommand("app3"); CLI::App *app4 = app.add_subcommand("app4"); - EXPECT_EQ(app.get_subcommands({}), std::vector<CLI::App *>({app1, app2, app3, app4})); + CHECK(std::vector<CLI::App *>({app1, app2, app3, app4}) == app.get_subcommands({})); } -TEST_F(TApp, MultipleSubcomMatchingWithCase) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatchingWithCase", "[creation]") { app.add_subcommand("first")->ignore_case(); - EXPECT_THROW(app.add_subcommand("fIrst"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_subcommand("fIrst"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, MultipleSubcomMatchingWithCaseFirst) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatchingWithCaseFirst", "[creation]") { app.ignore_case(); app.add_subcommand("first"); - EXPECT_THROW(app.add_subcommand("fIrst"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_subcommand("fIrst"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, MultipleSubcomMatchingWithUnderscore) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatchingWithUnderscore", "[creation]") { app.add_subcommand("first_option")->ignore_underscore(); - EXPECT_THROW(app.add_subcommand("firstoption"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_subcommand("firstoption"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, MultipleSubcomMatchingWithUnderscoreFirst) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatchingWithUnderscoreFirst", "[creation]") { app.ignore_underscore(); app.add_subcommand("first_option"); - EXPECT_THROW(app.add_subcommand("firstoption"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(app.add_subcommand("firstoption"), CLI::OptionAlreadyAdded); } -TEST_F(TApp, MultipleSubcomMatchingWithCaseInplace) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatchingWithCaseInplace", "[creation]") { app.add_subcommand("first"); auto first = app.add_subcommand("fIrst"); - EXPECT_THROW(first->ignore_case(), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(first->ignore_case(), CLI::OptionAlreadyAdded); } -TEST_F(TApp, MultipleSubcomMatchingWithCaseInplace2) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatchingWithCaseInplace2", "[creation]") { auto first = app.add_subcommand("first"); app.add_subcommand("fIrst"); - EXPECT_THROW(first->ignore_case(), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(first->ignore_case(), CLI::OptionAlreadyAdded); } -TEST_F(TApp, MultipleSubcomMatchingWithUnderscoreInplace) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatchingWithUnderscoreInplace", "[creation]") { app.add_subcommand("first_option"); auto first = app.add_subcommand("firstoption"); - EXPECT_THROW(first->ignore_underscore(), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(first->ignore_underscore(), CLI::OptionAlreadyAdded); } -TEST_F(TApp, MultipleSubcomMatchingWithUnderscoreInplace2) { +TEST_CASE_METHOD(TApp, "MultipleSubcomMatchingWithUnderscoreInplace2", "[creation]") { auto first = app.add_subcommand("firstoption"); app.add_subcommand("first_option"); - EXPECT_THROW(first->ignore_underscore(), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(first->ignore_underscore(), CLI::OptionAlreadyAdded); } -TEST_F(TApp, MultipleSubcomNoMatchingInplace2) { +TEST_CASE_METHOD(TApp, "MultipleSubcomNoMatchingInplace2", "[creation]") { auto first = app.add_subcommand("first"); auto second = app.add_subcommand("second"); - EXPECT_NO_THROW(first->ignore_case()); - EXPECT_NO_THROW(second->ignore_case()); + CHECK_NOTHROW(first->ignore_case()); + CHECK_NOTHROW(second->ignore_case()); } -TEST_F(TApp, MultipleSubcomNoMatchingInplaceUnderscore2) { +TEST_CASE_METHOD(TApp, "MultipleSubcomNoMatchingInplaceUnderscore2", "[creation]") { auto first = app.add_subcommand("first_option"); auto second = app.add_subcommand("second_option"); - EXPECT_NO_THROW(first->ignore_underscore()); - EXPECT_NO_THROW(second->ignore_underscore()); + CHECK_NOTHROW(first->ignore_underscore()); + CHECK_NOTHROW(second->ignore_underscore()); } -TEST_F(TApp, IncorrectConstructionFlagPositional1) { EXPECT_THROW(app.add_flag("cat"), CLI::IncorrectConstruction); } +TEST_CASE_METHOD(TApp, "IncorrectConstructionFlagPositional1", "[creation]") { + // This wants to be one line with clang-format + CHECK_THROWS_AS(app.add_flag("cat"), CLI::IncorrectConstruction); +} -TEST_F(TApp, IncorrectConstructionFlagPositional2) { +TEST_CASE_METHOD(TApp, "IncorrectConstructionFlagPositional2", "[creation]") { int x{0}; - EXPECT_THROW(app.add_flag("cat", x), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_flag("cat", x), CLI::IncorrectConstruction); } -TEST_F(TApp, IncorrectConstructionFlagPositional3) { +TEST_CASE_METHOD(TApp, "IncorrectConstructionFlagPositional3", "[creation]") { bool x{false}; - EXPECT_THROW(app.add_flag("cat", x), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_flag("cat", x), CLI::IncorrectConstruction); } -TEST_F(TApp, IncorrectConstructionNeedsCannotFind) { +TEST_CASE_METHOD(TApp, "IncorrectConstructionNeedsCannotFind", "[creation]") { auto cat = app.add_flag("--cat"); - EXPECT_THROW(cat->needs("--nothing"), CLI::IncorrectConstruction); + CHECK_THROWS_AS(cat->needs("--nothing"), CLI::IncorrectConstruction); } -TEST_F(TApp, IncorrectConstructionExcludesCannotFind) { +TEST_CASE_METHOD(TApp, "IncorrectConstructionExcludesCannotFind", "[creation]") { auto cat = app.add_flag("--cat"); - EXPECT_THROW(cat->excludes("--nothing"), CLI::IncorrectConstruction); + CHECK_THROWS_AS(cat->excludes("--nothing"), CLI::IncorrectConstruction); } -TEST_F(TApp, IncorrectConstructionDuplicateNeeds) { +TEST_CASE_METHOD(TApp, "IncorrectConstructionDuplicateNeeds", "[creation]") { auto cat = app.add_flag("--cat"); auto other = app.add_flag("--other"); - ASSERT_NO_THROW(cat->needs(other)); + REQUIRE_NOTHROW(cat->needs(other)); // duplicated needs is redundant but not an error - EXPECT_NO_THROW(cat->needs(other)); + CHECK_NOTHROW(cat->needs(other)); } -TEST_F(TApp, IncorrectConstructionDuplicateNeedsTxt) { +TEST_CASE_METHOD(TApp, "IncorrectConstructionDuplicateNeedsTxt", "[creation]") { auto cat = app.add_flag("--cat"); app.add_flag("--other"); - ASSERT_NO_THROW(cat->needs("--other")); + REQUIRE_NOTHROW(cat->needs("--other")); // duplicate needs is redundant but not an error - EXPECT_NO_THROW(cat->needs("--other")); + CHECK_NOTHROW(cat->needs("--other")); } // Now allowed -TEST_F(TApp, CorrectConstructionDuplicateExcludes) { +TEST_CASE_METHOD(TApp, "CorrectConstructionDuplicateExcludes", "[creation]") { auto cat = app.add_flag("--cat"); auto other = app.add_flag("--other"); - ASSERT_NO_THROW(cat->excludes(other)); - ASSERT_NO_THROW(other->excludes(cat)); + REQUIRE_NOTHROW(cat->excludes(other)); + REQUIRE_NOTHROW(other->excludes(cat)); } // Now allowed -TEST_F(TApp, CorrectConstructionDuplicateExcludesTxt) { +TEST_CASE_METHOD(TApp, "CorrectConstructionDuplicateExcludesTxt", "[creation]") { auto cat = app.add_flag("--cat"); auto other = app.add_flag("--other"); - ASSERT_NO_THROW(cat->excludes("--other")); - ASSERT_NO_THROW(other->excludes("--cat")); + REQUIRE_NOTHROW(cat->excludes("--other")); + REQUIRE_NOTHROW(other->excludes("--cat")); } -TEST_F(TApp, CheckName) { +TEST_CASE_METHOD(TApp, "CheckName", "[creation]") { auto long1 = app.add_flag("--long1"); auto long2 = app.add_flag("--Long2"); auto short1 = app.add_flag("-a"); @@ -230,26 +233,26 @@ TEST_F(TApp, CheckName) { auto pos1 = app.add_option("pos1", x); auto pos2 = app.add_option("pOs2", y); - EXPECT_TRUE(long1->check_name("--long1")); - EXPECT_FALSE(long1->check_name("--lonG1")); + CHECK(long1->check_name("--long1")); + CHECK(!long1->check_name("--lonG1")); - EXPECT_TRUE(long2->check_name("--Long2")); - EXPECT_FALSE(long2->check_name("--long2")); + CHECK(long2->check_name("--Long2")); + CHECK(!long2->check_name("--long2")); - EXPECT_TRUE(short1->check_name("-a")); - EXPECT_FALSE(short1->check_name("-A")); + CHECK(short1->check_name("-a")); + CHECK(!short1->check_name("-A")); - EXPECT_TRUE(short2->check_name("-B")); - EXPECT_FALSE(short2->check_name("-b")); + CHECK(short2->check_name("-B")); + CHECK(!short2->check_name("-b")); - EXPECT_TRUE(pos1->check_name("pos1")); - EXPECT_FALSE(pos1->check_name("poS1")); + CHECK(pos1->check_name("pos1")); + CHECK(!pos1->check_name("poS1")); - EXPECT_TRUE(pos2->check_name("pOs2")); - EXPECT_FALSE(pos2->check_name("pos2")); + CHECK(pos2->check_name("pOs2")); + CHECK(!pos2->check_name("pos2")); } -TEST_F(TApp, CheckNameNoCase) { +TEST_CASE_METHOD(TApp, "CheckNameNoCase", "[creation]") { auto long1 = app.add_flag("--long1")->ignore_case(); auto long2 = app.add_flag("--Long2")->ignore_case(); auto short1 = app.add_flag("-a")->ignore_case(); @@ -258,26 +261,26 @@ TEST_F(TApp, CheckNameNoCase) { auto pos1 = app.add_option("pos1", x)->ignore_case(); auto pos2 = app.add_option("pOs2", y)->ignore_case(); - EXPECT_TRUE(long1->check_name("--long1")); - EXPECT_TRUE(long1->check_name("--lonG1")); + CHECK(long1->check_name("--long1")); + CHECK(long1->check_name("--lonG1")); - EXPECT_TRUE(long2->check_name("--Long2")); - EXPECT_TRUE(long2->check_name("--long2")); + CHECK(long2->check_name("--Long2")); + CHECK(long2->check_name("--long2")); - EXPECT_TRUE(short1->check_name("-a")); - EXPECT_TRUE(short1->check_name("-A")); + CHECK(short1->check_name("-a")); + CHECK(short1->check_name("-A")); - EXPECT_TRUE(short2->check_name("-B")); - EXPECT_TRUE(short2->check_name("-b")); + CHECK(short2->check_name("-B")); + CHECK(short2->check_name("-b")); - EXPECT_TRUE(pos1->check_name("pos1")); - EXPECT_TRUE(pos1->check_name("poS1")); + CHECK(pos1->check_name("pos1")); + CHECK(pos1->check_name("poS1")); - EXPECT_TRUE(pos2->check_name("pOs2")); - EXPECT_TRUE(pos2->check_name("pos2")); + CHECK(pos2->check_name("pOs2")); + CHECK(pos2->check_name("pos2")); } -TEST_F(TApp, CheckNameNoUnderscore) { +TEST_CASE_METHOD(TApp, "CheckNameNoUnderscore", "[creation]") { auto long1 = app.add_flag("--longoption1")->ignore_underscore(); auto long2 = app.add_flag("--long_option2")->ignore_underscore(); @@ -285,30 +288,30 @@ TEST_F(TApp, CheckNameNoUnderscore) { auto pos1 = app.add_option("pos_option_1", x)->ignore_underscore(); auto pos2 = app.add_option("posoption2", y)->ignore_underscore(); - EXPECT_TRUE(long1->check_name("--long_option1")); - EXPECT_TRUE(long1->check_name("--longoption_1")); - EXPECT_TRUE(long1->check_name("--longoption1")); - EXPECT_TRUE(long1->check_name("--long__opt_ion__1")); - EXPECT_TRUE(long1->check_name("--__l_o_n_g_o_p_t_i_o_n_1")); + CHECK(long1->check_name("--long_option1")); + CHECK(long1->check_name("--longoption_1")); + CHECK(long1->check_name("--longoption1")); + CHECK(long1->check_name("--long__opt_ion__1")); + CHECK(long1->check_name("--__l_o_n_g_o_p_t_i_o_n_1")); - EXPECT_TRUE(long2->check_name("--long_option2")); - EXPECT_TRUE(long2->check_name("--longoption2")); - EXPECT_TRUE(long2->check_name("--longoption_2")); - EXPECT_TRUE(long2->check_name("--long__opt_ion__2")); - EXPECT_TRUE(long2->check_name("--__l_o_n_go_p_t_i_o_n_2__")); + CHECK(long2->check_name("--long_option2")); + CHECK(long2->check_name("--longoption2")); + CHECK(long2->check_name("--longoption_2")); + CHECK(long2->check_name("--long__opt_ion__2")); + CHECK(long2->check_name("--__l_o_n_go_p_t_i_o_n_2__")); - EXPECT_TRUE(pos1->check_name("pos_option1")); - EXPECT_TRUE(pos1->check_name("pos_option_1")); - EXPECT_TRUE(pos1->check_name("pos_o_p_t_i_on_1")); - EXPECT_TRUE(pos1->check_name("posoption1")); + CHECK(pos1->check_name("pos_option1")); + CHECK(pos1->check_name("pos_option_1")); + CHECK(pos1->check_name("pos_o_p_t_i_on_1")); + CHECK(pos1->check_name("posoption1")); - EXPECT_TRUE(pos2->check_name("pos_option2")); - EXPECT_TRUE(pos2->check_name("pos_option_2")); - EXPECT_TRUE(pos2->check_name("pos_o_p_t_i_on_2")); - EXPECT_TRUE(pos2->check_name("posoption2")); + CHECK(pos2->check_name("pos_option2")); + CHECK(pos2->check_name("pos_option_2")); + CHECK(pos2->check_name("pos_o_p_t_i_on_2")); + CHECK(pos2->check_name("posoption2")); } -TEST_F(TApp, CheckNameNoCaseNoUnderscore) { +TEST_CASE_METHOD(TApp, "CheckNameNoCaseNoUnderscore", "[creation]") { auto long1 = app.add_flag("--LongoptioN1")->ignore_underscore()->ignore_case(); auto long2 = app.add_flag("--long_Option2")->ignore_case()->ignore_underscore(); @@ -316,85 +319,85 @@ TEST_F(TApp, CheckNameNoCaseNoUnderscore) { auto pos1 = app.add_option("pos_Option_1", x)->ignore_underscore()->ignore_case(); auto pos2 = app.add_option("posOption2", y)->ignore_case()->ignore_underscore(); - EXPECT_TRUE(long1->check_name("--Long_Option1")); - EXPECT_TRUE(long1->check_name("--lONgoption_1")); - EXPECT_TRUE(long1->check_name("--LongOption1")); - EXPECT_TRUE(long1->check_name("--long__Opt_ion__1")); - EXPECT_TRUE(long1->check_name("--__l_o_N_g_o_P_t_i_O_n_1")); + CHECK(long1->check_name("--Long_Option1")); + CHECK(long1->check_name("--lONgoption_1")); + CHECK(long1->check_name("--LongOption1")); + CHECK(long1->check_name("--long__Opt_ion__1")); + CHECK(long1->check_name("--__l_o_N_g_o_P_t_i_O_n_1")); - EXPECT_TRUE(long2->check_name("--long_Option2")); - EXPECT_TRUE(long2->check_name("--LongOption2")); - EXPECT_TRUE(long2->check_name("--longOPTION_2")); - EXPECT_TRUE(long2->check_name("--long__OPT_ion__2")); - EXPECT_TRUE(long2->check_name("--__l_o_n_GO_p_t_i_o_n_2__")); + CHECK(long2->check_name("--long_Option2")); + CHECK(long2->check_name("--LongOption2")); + CHECK(long2->check_name("--longOPTION_2")); + CHECK(long2->check_name("--long__OPT_ion__2")); + CHECK(long2->check_name("--__l_o_n_GO_p_t_i_o_n_2__")); - EXPECT_TRUE(pos1->check_name("POS_Option1")); - EXPECT_TRUE(pos1->check_name("pos_option_1")); - EXPECT_TRUE(pos1->check_name("pos_o_p_t_i_on_1")); - EXPECT_TRUE(pos1->check_name("posoption1")); + CHECK(pos1->check_name("POS_Option1")); + CHECK(pos1->check_name("pos_option_1")); + CHECK(pos1->check_name("pos_o_p_t_i_on_1")); + CHECK(pos1->check_name("posoption1")); - EXPECT_TRUE(pos2->check_name("pos_option2")); - EXPECT_TRUE(pos2->check_name("pos_OPTION_2")); - EXPECT_TRUE(pos2->check_name("poS_o_p_T_I_on_2")); - EXPECT_TRUE(pos2->check_name("PosOption2")); + CHECK(pos2->check_name("pos_option2")); + CHECK(pos2->check_name("pos_OPTION_2")); + CHECK(pos2->check_name("poS_o_p_T_I_on_2")); + CHECK(pos2->check_name("PosOption2")); } -TEST_F(TApp, PreSpaces) { +TEST_CASE_METHOD(TApp, "PreSpaces", "[creation]") { int x{0}; auto myapp = app.add_option(" -a, --long, other", x); - EXPECT_TRUE(myapp->check_lname("long")); - EXPECT_TRUE(myapp->check_sname("a")); - EXPECT_TRUE(myapp->check_name("other")); + CHECK(myapp->check_lname("long")); + CHECK(myapp->check_sname("a")); + CHECK(myapp->check_name("other")); } -TEST_F(TApp, AllSpaces) { +TEST_CASE_METHOD(TApp, "AllSpaces", "[creation]") { int x{0}; auto myapp = app.add_option(" -a , --long , other ", x); - EXPECT_TRUE(myapp->check_lname("long")); - EXPECT_TRUE(myapp->check_sname("a")); - EXPECT_TRUE(myapp->check_name("other")); + CHECK(myapp->check_lname("long")); + CHECK(myapp->check_sname("a")); + CHECK(myapp->check_name("other")); } -TEST_F(TApp, OptionFromDefaults) { +TEST_CASE_METHOD(TApp, "OptionFromDefaults", "[creation]") { app.option_defaults()->required(); // Options should remember defaults int x{0}; auto opt = app.add_option("--simple", x); - EXPECT_TRUE(opt->get_required()); + CHECK(opt->get_required()); // Flags cannot be required auto flag = app.add_flag("--other"); - EXPECT_FALSE(flag->get_required()); + CHECK(!flag->get_required()); app.option_defaults()->required(false); auto opt2 = app.add_option("--simple2", x); - EXPECT_FALSE(opt2->get_required()); + CHECK(!opt2->get_required()); app.option_defaults()->required()->ignore_case(); auto opt3 = app.add_option("--simple3", x); - EXPECT_TRUE(opt3->get_required()); - EXPECT_TRUE(opt3->get_ignore_case()); + CHECK(opt3->get_required()); + CHECK(opt3->get_ignore_case()); app.option_defaults()->required()->ignore_underscore(); auto opt4 = app.add_option("--simple4", x); - EXPECT_TRUE(opt4->get_required()); - EXPECT_TRUE(opt4->get_ignore_underscore()); + CHECK(opt4->get_required()); + CHECK(opt4->get_ignore_underscore()); } -TEST_F(TApp, OptionFromDefaultsSubcommands) { +TEST_CASE_METHOD(TApp, "OptionFromDefaultsSubcommands", "[creation]") { // Initial defaults - EXPECT_FALSE(app.option_defaults()->get_required()); - EXPECT_EQ(app.option_defaults()->get_multi_option_policy(), CLI::MultiOptionPolicy::Throw); - EXPECT_FALSE(app.option_defaults()->get_ignore_case()); - EXPECT_FALSE(app.option_defaults()->get_ignore_underscore()); - EXPECT_FALSE(app.option_defaults()->get_disable_flag_override()); - EXPECT_TRUE(app.option_defaults()->get_configurable()); - EXPECT_EQ(app.option_defaults()->get_group(), "Options"); + CHECK(!app.option_defaults()->get_required()); + CHECK(CLI::MultiOptionPolicy::Throw == app.option_defaults()->get_multi_option_policy()); + CHECK(!app.option_defaults()->get_ignore_case()); + CHECK(!app.option_defaults()->get_ignore_underscore()); + CHECK(!app.option_defaults()->get_disable_flag_override()); + CHECK(app.option_defaults()->get_configurable()); + CHECK("Options" == app.option_defaults()->get_group()); app.option_defaults() ->required() @@ -407,55 +410,55 @@ TEST_F(TApp, OptionFromDefaultsSubcommands) { auto app2 = app.add_subcommand("app2"); - EXPECT_TRUE(app2->option_defaults()->get_required()); - EXPECT_EQ(app2->option_defaults()->get_multi_option_policy(), CLI::MultiOptionPolicy::TakeLast); - EXPECT_TRUE(app2->option_defaults()->get_ignore_case()); - EXPECT_TRUE(app2->option_defaults()->get_ignore_underscore()); - EXPECT_FALSE(app2->option_defaults()->get_configurable()); - EXPECT_TRUE(app.option_defaults()->get_disable_flag_override()); - EXPECT_EQ(app2->option_defaults()->get_group(), "Something"); + CHECK(app2->option_defaults()->get_required()); + CHECK(CLI::MultiOptionPolicy::TakeLast == app2->option_defaults()->get_multi_option_policy()); + CHECK(app2->option_defaults()->get_ignore_case()); + CHECK(app2->option_defaults()->get_ignore_underscore()); + CHECK(!app2->option_defaults()->get_configurable()); + CHECK(app.option_defaults()->get_disable_flag_override()); + CHECK("Something" == app2->option_defaults()->get_group()); } -TEST_F(TApp, GetNameCheck) { +TEST_CASE_METHOD(TApp, "GetNameCheck", "[creation]") { int x{0}; auto a = app.add_flag("--that"); auto b = app.add_flag("-x"); auto c = app.add_option("pos", x); auto d = app.add_option("one,-o,--other", x); - EXPECT_EQ(a->get_name(false, true), "--that"); - EXPECT_EQ(b->get_name(false, true), "-x"); - EXPECT_EQ(c->get_name(false, true), "pos"); + CHECK("--that" == a->get_name(false, true)); + CHECK("-x" == b->get_name(false, true)); + CHECK("pos" == c->get_name(false, true)); - EXPECT_EQ(d->get_name(), "--other"); - EXPECT_EQ(d->get_name(false, false), "--other"); - EXPECT_EQ(d->get_name(false, true), "-o,--other"); - EXPECT_EQ(d->get_name(true, true), "one,-o,--other"); - EXPECT_EQ(d->get_name(true, false), "one"); + CHECK("--other" == d->get_name()); + CHECK("--other" == d->get_name(false, false)); + CHECK("-o,--other" == d->get_name(false, true)); + CHECK("one,-o,--other" == d->get_name(true, true)); + CHECK("one" == d->get_name(true, false)); } -TEST_F(TApp, SubcommandDefaults) { +TEST_CASE_METHOD(TApp, "SubcommandDefaults", "[creation]") { // allow_extras, prefix_command, ignore_case, fallthrough, group, min/max subcommand, validate_positionals // Initial defaults - EXPECT_FALSE(app.get_allow_extras()); - EXPECT_FALSE(app.get_prefix_command()); - EXPECT_FALSE(app.get_immediate_callback()); - EXPECT_FALSE(app.get_ignore_case()); - EXPECT_FALSE(app.get_ignore_underscore()); + CHECK(!app.get_allow_extras()); + CHECK(!app.get_prefix_command()); + CHECK(!app.get_immediate_callback()); + CHECK(!app.get_ignore_case()); + CHECK(!app.get_ignore_underscore()); #ifdef _WIN32 - EXPECT_TRUE(app.get_allow_windows_style_options()); + CHECK(app.get_allow_windows_style_options()); #else - EXPECT_FALSE(app.get_allow_windows_style_options()); + CHECK(!app.get_allow_windows_style_options()); #endif - EXPECT_FALSE(app.get_fallthrough()); - EXPECT_FALSE(app.get_configurable()); - EXPECT_FALSE(app.get_validate_positionals()); + CHECK(!app.get_fallthrough()); + CHECK(!app.get_configurable()); + CHECK(!app.get_validate_positionals()); - EXPECT_EQ(app.get_footer(), ""); - EXPECT_EQ(app.get_group(), "Subcommands"); - EXPECT_EQ(app.get_require_subcommand_min(), 0u); - EXPECT_EQ(app.get_require_subcommand_max(), 0u); + CHECK("" == app.get_footer()); + CHECK("Subcommands" == app.get_group()); + CHECK(0u == app.get_require_subcommand_min()); + CHECK(0u == app.get_require_subcommand_max()); app.allow_extras(); app.prefix_command(); @@ -478,57 +481,57 @@ TEST_F(TApp, SubcommandDefaults) { auto app2 = app.add_subcommand("app2"); // Initial defaults - EXPECT_TRUE(app2->get_allow_extras()); - EXPECT_TRUE(app2->get_prefix_command()); - EXPECT_TRUE(app2->get_immediate_callback()); - EXPECT_TRUE(app2->get_ignore_case()); - EXPECT_TRUE(app2->get_ignore_underscore()); + CHECK(app2->get_allow_extras()); + CHECK(app2->get_prefix_command()); + CHECK(app2->get_immediate_callback()); + CHECK(app2->get_ignore_case()); + CHECK(app2->get_ignore_underscore()); #ifdef _WIN32 - EXPECT_FALSE(app2->get_allow_windows_style_options()); + CHECK(!app2->get_allow_windows_style_options()); #else - EXPECT_TRUE(app2->get_allow_windows_style_options()); + CHECK(app2->get_allow_windows_style_options()); #endif - EXPECT_TRUE(app2->get_fallthrough()); - EXPECT_TRUE(app2->get_validate_positionals()); - EXPECT_TRUE(app2->get_configurable()); - EXPECT_EQ(app2->get_footer(), "footy"); - EXPECT_EQ(app2->get_group(), "Stuff"); - EXPECT_EQ(app2->get_require_subcommand_min(), 0u); - EXPECT_EQ(app2->get_require_subcommand_max(), 3u); + CHECK(app2->get_fallthrough()); + CHECK(app2->get_validate_positionals()); + CHECK(app2->get_configurable()); + CHECK("footy" == app2->get_footer()); + CHECK("Stuff" == app2->get_group()); + CHECK(0u == app2->get_require_subcommand_min()); + CHECK(3u == app2->get_require_subcommand_max()); } -TEST_F(TApp, SubcommandMinMax) { +TEST_CASE_METHOD(TApp, "SubcommandMinMax", "[creation]") { - EXPECT_EQ(app.get_require_subcommand_min(), 0u); - EXPECT_EQ(app.get_require_subcommand_max(), 0u); + CHECK(0u == app.get_require_subcommand_min()); + CHECK(0u == app.get_require_subcommand_max()); app.require_subcommand(); - EXPECT_EQ(app.get_require_subcommand_min(), 1u); - EXPECT_EQ(app.get_require_subcommand_max(), 0u); + CHECK(1u == app.get_require_subcommand_min()); + CHECK(0u == app.get_require_subcommand_max()); app.require_subcommand(2); - EXPECT_EQ(app.get_require_subcommand_min(), 2u); - EXPECT_EQ(app.get_require_subcommand_max(), 2u); + CHECK(2u == app.get_require_subcommand_min()); + CHECK(2u == app.get_require_subcommand_max()); app.require_subcommand(0); - EXPECT_EQ(app.get_require_subcommand_min(), 0u); - EXPECT_EQ(app.get_require_subcommand_max(), 0u); + CHECK(0u == app.get_require_subcommand_min()); + CHECK(0u == app.get_require_subcommand_max()); app.require_subcommand(-2); - EXPECT_EQ(app.get_require_subcommand_min(), 0u); - EXPECT_EQ(app.get_require_subcommand_max(), 2u); + CHECK(0u == app.get_require_subcommand_min()); + CHECK(2u == app.get_require_subcommand_max()); app.require_subcommand(3, 7); - EXPECT_EQ(app.get_require_subcommand_min(), 3u); - EXPECT_EQ(app.get_require_subcommand_max(), 7u); + CHECK(3u == app.get_require_subcommand_min()); + CHECK(7u == app.get_require_subcommand_max()); } -TEST_F(TApp, GetOptionList) { +TEST_CASE_METHOD(TApp, "GetOptionList", "[creation]") { int two{0}; auto flag = app.add_flag("--one"); auto opt = app.add_option("--two", two); @@ -536,36 +539,36 @@ TEST_F(TApp, GetOptionList) { const CLI::App &const_app = app; // const alias to force use of const-methods std::vector<const CLI::Option *> opt_list = const_app.get_options(); - ASSERT_EQ(opt_list.size(), static_cast<std::size_t>(3)); - EXPECT_EQ(opt_list.at(1), flag); - EXPECT_EQ(opt_list.at(2), opt); + REQUIRE(static_cast<std::size_t>(3) == opt_list.size()); + CHECK(flag == opt_list.at(1)); + CHECK(opt == opt_list.at(2)); std::vector<CLI::Option *> nonconst_opt_list = app.get_options(); for(std::size_t i = 0; i < opt_list.size(); ++i) { - EXPECT_EQ(nonconst_opt_list.at(i), opt_list.at(i)); + CHECK(opt_list.at(i) == nonconst_opt_list.at(i)); } } -TEST(ValidatorTests, TestValidatorCreation) { +TEST_CASE("ValidatorTests: TestValidatorCreation", "[creation]") { std::function<std::string(std::string &)> op1 = [](std::string &val) { return (val.size() >= 5) ? std::string{} : val; }; CLI::Validator V(op1, "", "size"); - EXPECT_EQ(V.get_name(), "size"); + CHECK("size" == V.get_name()); V.name("harry"); - EXPECT_EQ(V.get_name(), "harry"); - EXPECT_TRUE(V.get_active()); + CHECK("harry" == V.get_name()); + CHECK(V.get_active()); - EXPECT_EQ(V("test"), "test"); - EXPECT_EQ(V("test5"), std::string{}); + CHECK("test" == V("test")); + CHECK(std::string{} == V("test5")); - EXPECT_EQ(V.get_description(), std::string{}); + CHECK(std::string{} == V.get_description()); V.description("this is a description"); - EXPECT_EQ(V.get_description(), "this is a description"); + CHECK("this is a description" == V.get_description()); } -TEST(ValidatorTests, TestValidatorOps) { +TEST_CASE("ValidatorTests: TestValidatorOps", "[creation]") { std::function<std::string(std::string &)> op1 = [](std::string &val) { return (val.size() >= 5) ? std::string{} : val; }; @@ -590,72 +593,72 @@ TEST(ValidatorTests, TestValidatorOps) { std::string eight(8, 'a'); std::string nine(9, 'a'); std::string ten(10, 'a'); - EXPECT_TRUE(V1(five).empty()); - EXPECT_FALSE(V1(four).empty()); + CHECK(V1(five).empty()); + CHECK(!V1(four).empty()); - EXPECT_TRUE(V2(nine).empty()); - EXPECT_FALSE(V2(eight).empty()); + CHECK(V2(nine).empty()); + CHECK(!V2(eight).empty()); - EXPECT_TRUE(V3(two).empty()); - EXPECT_FALSE(V3(four).empty()); + CHECK(V3(two).empty()); + CHECK(!V3(four).empty()); - EXPECT_TRUE(V4(eight).empty()); - EXPECT_FALSE(V4(ten).empty()); + CHECK(V4(eight).empty()); + CHECK(!V4(ten).empty()); auto V1a2 = V1 & V2; - EXPECT_EQ(V1a2.get_description(), "(SIZE >= 5) AND (SIZE >= 9)"); - EXPECT_FALSE(V1a2(five).empty()); - EXPECT_TRUE(V1a2(nine).empty()); + CHECK("(SIZE >= 5) AND (SIZE >= 9)" == V1a2.get_description()); + CHECK(!V1a2(five).empty()); + CHECK(V1a2(nine).empty()); auto V1a4 = V1 & V4; - EXPECT_EQ(V1a4.get_description(), "(SIZE >= 5) AND (SIZE <= 9)"); - EXPECT_TRUE(V1a4(five).empty()); - EXPECT_TRUE(V1a4(eight).empty()); - EXPECT_FALSE(V1a4(ten).empty()); - EXPECT_FALSE(V1a4(four).empty()); + CHECK("(SIZE >= 5) AND (SIZE <= 9)" == V1a4.get_description()); + CHECK(V1a4(five).empty()); + CHECK(V1a4(eight).empty()); + CHECK(!V1a4(ten).empty()); + CHECK(!V1a4(four).empty()); auto V1o3 = V1 | V3; - EXPECT_EQ(V1o3.get_description(), "(SIZE >= 5) OR (SIZE < 3)"); - EXPECT_TRUE(V1o3(two).empty()); - EXPECT_TRUE(V1o3(eight).empty()); - EXPECT_TRUE(V1o3(ten).empty()); - EXPECT_TRUE(V1o3(two).empty()); - EXPECT_FALSE(V1o3(four).empty()); + CHECK("(SIZE >= 5) OR (SIZE < 3)" == V1o3.get_description()); + CHECK(V1o3(two).empty()); + CHECK(V1o3(eight).empty()); + CHECK(V1o3(ten).empty()); + CHECK(V1o3(two).empty()); + CHECK(!V1o3(four).empty()); auto m1 = V1o3 & V4; - EXPECT_EQ(m1.get_description(), "((SIZE >= 5) OR (SIZE < 3)) AND (SIZE <= 9)"); - EXPECT_TRUE(m1(two).empty()); - EXPECT_TRUE(m1(eight).empty()); - EXPECT_FALSE(m1(ten).empty()); - EXPECT_TRUE(m1(two).empty()); - EXPECT_TRUE(m1(five).empty()); - EXPECT_FALSE(m1(four).empty()); + CHECK("((SIZE >= 5) OR (SIZE < 3)) AND (SIZE <= 9)" == m1.get_description()); + CHECK(m1(two).empty()); + CHECK(m1(eight).empty()); + CHECK(!m1(ten).empty()); + CHECK(m1(two).empty()); + CHECK(m1(five).empty()); + CHECK(!m1(four).empty()); auto m2 = m1 & V2; - EXPECT_EQ(m2.get_description(), "(((SIZE >= 5) OR (SIZE < 3)) AND (SIZE <= 9)) AND (SIZE >= 9)"); - EXPECT_FALSE(m2(two).empty()); - EXPECT_FALSE(m2(eight).empty()); - EXPECT_FALSE(m2(ten).empty()); - EXPECT_FALSE(m2(two).empty()); - EXPECT_TRUE(m2(nine).empty()); - EXPECT_FALSE(m2(four).empty()); + CHECK("(((SIZE >= 5) OR (SIZE < 3)) AND (SIZE <= 9)) AND (SIZE >= 9)" == m2.get_description()); + CHECK(!m2(two).empty()); + CHECK(!m2(eight).empty()); + CHECK(!m2(ten).empty()); + CHECK(!m2(two).empty()); + CHECK(m2(nine).empty()); + CHECK(!m2(four).empty()); auto m3 = m2 | V3; - EXPECT_EQ(m3.get_description(), "((((SIZE >= 5) OR (SIZE < 3)) AND (SIZE <= 9)) AND (SIZE >= 9)) OR (SIZE < 3)"); - EXPECT_TRUE(m3(two).empty()); - EXPECT_FALSE(m3(eight).empty()); - EXPECT_TRUE(m3(nine).empty()); - EXPECT_FALSE(m3(four).empty()); + CHECK("((((SIZE >= 5) OR (SIZE < 3)) AND (SIZE <= 9)) AND (SIZE >= 9)) OR (SIZE < 3)" == m3.get_description()); + CHECK(m3(two).empty()); + CHECK(!m3(eight).empty()); + CHECK(m3(nine).empty()); + CHECK(!m3(four).empty()); auto m4 = V3 | m2; - EXPECT_EQ(m4.get_description(), "(SIZE < 3) OR ((((SIZE >= 5) OR (SIZE < 3)) AND (SIZE <= 9)) AND (SIZE >= 9))"); - EXPECT_TRUE(m4(two).empty()); - EXPECT_FALSE(m4(eight).empty()); - EXPECT_TRUE(m4(nine).empty()); - EXPECT_FALSE(m4(four).empty()); + CHECK("(SIZE < 3) OR ((((SIZE >= 5) OR (SIZE < 3)) AND (SIZE <= 9)) AND (SIZE >= 9))" == m4.get_description()); + CHECK(m4(two).empty()); + CHECK(!m4(eight).empty()); + CHECK(m4(nine).empty()); + CHECK(!m4(four).empty()); } -TEST(ValidatorTests, TestValidatorNegation) { +TEST_CASE("ValidatorTests: TestValidatorNegation", "[creation]") { std::function<std::string(std::string &)> op1 = [](std::string &val) { return (val.size() >= 5) ? std::string{} : val; @@ -666,21 +669,21 @@ TEST(ValidatorTests, TestValidatorNegation) { std::string four(4, 'a'); std::string five(5, 'a'); - EXPECT_TRUE(V1(five).empty()); - EXPECT_FALSE(V1(four).empty()); + CHECK(V1(five).empty()); + CHECK(!V1(four).empty()); auto V2 = !V1; - EXPECT_FALSE(V2(five).empty()); - EXPECT_TRUE(V2(four).empty()); - EXPECT_EQ(V2.get_description(), "NOT SIZE >= 5"); + CHECK(!V2(five).empty()); + CHECK(V2(four).empty()); + CHECK("NOT SIZE >= 5" == V2.get_description()); V2.active(false); - EXPECT_TRUE(V2(five).empty()); - EXPECT_TRUE(V2(four).empty()); - EXPECT_TRUE(V2.get_description().empty()); + CHECK(V2(five).empty()); + CHECK(V2(four).empty()); + CHECK(V2.get_description().empty()); } -TEST(ValidatorTests, ValidatorDefaults) { +TEST_CASE("ValidatorTests: ValidatorDefaults", "[creation]") { CLI::Validator V1{}; @@ -688,23 +691,23 @@ TEST(ValidatorTests, ValidatorDefaults) { std::string five(5, 'a'); // make sure this doesn't generate a seg fault or something - EXPECT_TRUE(V1(five).empty()); - EXPECT_TRUE(V1(four).empty()); + CHECK(V1(five).empty()); + CHECK(V1(four).empty()); - EXPECT_TRUE(V1.get_name().empty()); - EXPECT_TRUE(V1.get_description().empty()); - EXPECT_TRUE(V1.get_active()); - EXPECT_TRUE(V1.get_modifying()); + CHECK(V1.get_name().empty()); + CHECK(V1.get_description().empty()); + CHECK(V1.get_active()); + CHECK(V1.get_modifying()); CLI::Validator V2{"check"}; // make sure this doesn't generate a seg fault or something - EXPECT_TRUE(V2(five).empty()); - EXPECT_TRUE(V2(four).empty()); + CHECK(V2(five).empty()); + CHECK(V2(four).empty()); - EXPECT_TRUE(V2.get_name().empty()); - EXPECT_EQ(V2.get_description(), "check"); - EXPECT_TRUE(V2.get_active()); - EXPECT_TRUE(V2.get_modifying()); + CHECK(V2.get_name().empty()); + CHECK("check" == V2.get_description()); + CHECK(V2.get_active()); + CHECK(V2.get_modifying()); // This class only support streaming in, not out } @@ -731,7 +734,7 @@ std::istream &operator>>(std::istream &in, Unstreamable &value) { static_assert(CLI::detail::is_istreamable<Unstreamable>::value, "Unstreamable type is still unstreamable and it should be"); -TEST_F(TApp, MakeUnstreamableOptions) { +TEST_CASE_METHOD(TApp, "MakeUnstreamableOptions", "[creation]") { Unstreamable value; app.add_option("--value", value); @@ -746,10 +749,10 @@ TEST_F(TApp, MakeUnstreamableOptions) { args = {"--value", "45"}; run(); - EXPECT_EQ(value.get_x(), 45); + CHECK(45 == value.get_x()); args = {"--values", "45", "27", "34"}; run(); - EXPECT_EQ(values.size(), 3u); - EXPECT_EQ(values[2].get_x(), 34); + CHECK(3u == values.size()); + CHECK(34 == values[2].get_x()); } diff --git a/packages/CLI11/tests/DeprecatedTest.cpp b/packages/CLI11/tests/DeprecatedTest.cpp index a8f41971270021c57f50ff64dcb5f97387175791..cf9987c6d97b6a8251630f60deff8e5466138e28 100644 --- a/packages/CLI11/tests/DeprecatedTest.cpp +++ b/packages/CLI11/tests/DeprecatedTest.cpp @@ -6,157 +6,16 @@ #include "app_helper.hpp" -#include "gmock/gmock.h" +using Catch::Matchers::Contains; -using ::testing::HasSubstr; -using ::testing::Not; - -TEST(Deprecated, Empty) { +TEST_CASE("Deprecated: Empty", "[deprecated]") { // No deprecated features at this time. - EXPECT_TRUE(true); + CHECK(true); } // Classic sets -TEST_F(TApp, SetWithDefaults) { - int someint = 2; - app.add_set("-a", someint, {1, 2, 3, 4}, "", true); - - args = {"-a1", "-a2"}; - - EXPECT_THROW(run(), CLI::ArgumentMismatch); -} - -TEST_F(TApp, SetWithDefaultsConversion) { - int someint = 2; - app.add_set("-a", someint, {1, 2, 3, 4}, "", true); - - args = {"-a", "hi"}; - - EXPECT_THROW(run(), CLI::ValidationError); -} - -TEST_F(TApp, InSet) { - - std::string choice; - app.add_set("-q,--quick", choice, {"one", "two", "three"}); - - args = {"--quick", "two"}; - - run(); - EXPECT_EQ("two", choice); - - args = {"--quick", "four"}; - EXPECT_THROW(run(), CLI::ValidationError); -} - -TEST_F(TApp, InSetWithDefault) { - - std::string choice = "one"; - app.add_set("-q,--quick", choice, {"one", "two", "three"}, "", true); - - run(); - EXPECT_EQ("one", choice); - - args = {"--quick", "two"}; - - run(); - EXPECT_EQ("two", choice); - - args = {"--quick", "four"}; - EXPECT_THROW(run(), CLI::ValidationError); -} - -TEST_F(TApp, InIntSet) { - - int choice; - app.add_set("-q,--quick", choice, {1, 2, 3}); - - args = {"--quick", "2"}; - - run(); - EXPECT_EQ(2, choice); - - args = {"--quick", "4"}; - EXPECT_THROW(run(), CLI::ValidationError); -} - -TEST_F(TApp, InIntSetWindows) { - - int choice; - app.add_set("-q,--quick", choice, {1, 2, 3}); - app.allow_windows_style_options(); - args = {"/q", "2"}; - - run(); - EXPECT_EQ(2, choice); - - args = {"/q", "4"}; - EXPECT_THROW(run(), CLI::ValidationError); - - args = {"/q4"}; - EXPECT_THROW(run(), CLI::ExtrasError); -} - -TEST_F(TApp, FailSet) { - - int choice; - app.add_set("-q,--quick", choice, {1, 2, 3}); - - args = {"--quick", "3", "--quick=2"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); - - args = {"--quick=hello"}; - EXPECT_THROW(run(), CLI::ValidationError); -} - -TEST_F(TApp, FailMutableSet) { - - int choice; - std::set<int> vals{1, 2, 3}; - app.add_mutable_set("-q,--quick", choice, vals); - app.add_mutable_set("-s,--slow", choice, vals, "", true); - - args = {"--quick=hello"}; - EXPECT_THROW(run(), CLI::ValidationError); - - args = {"--slow=hello"}; - EXPECT_THROW(run(), CLI::ValidationError); -} - -// #113 -TEST_F(TApp, AddRemoveSetItems) { - std::set<std::string> items{"TYPE1", "TYPE2", "TYPE3", "TYPE4", "TYPE5"}; - - std::string type1, type2; - app.add_mutable_set("--type1", type1, items); - app.add_mutable_set("--type2", type2, items, "", true); - - args = {"--type1", "TYPE1", "--type2", "TYPE2"}; - - run(); - EXPECT_EQ(type1, "TYPE1"); - EXPECT_EQ(type2, "TYPE2"); - - items.insert("TYPE6"); - items.insert("TYPE7"); - - items.erase("TYPE1"); - items.erase("TYPE2"); - - args = {"--type1", "TYPE6", "--type2", "TYPE7"}; - run(); - EXPECT_EQ(type1, "TYPE6"); - EXPECT_EQ(type2, "TYPE7"); - - args = {"--type1", "TYPE1"}; - EXPECT_THROW(run(), CLI::ValidationError); - - args = {"--type2", "TYPE2"}; - EXPECT_THROW(run(), CLI::ValidationError); -} - -TEST(THelp, Defaults) { +TEST_CASE("THelp: Defaults", "[deprecated]") { CLI::App app{"My prog"}; int one{1}, two{2}; @@ -165,24 +24,24 @@ TEST(THelp, Defaults) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("--one")); - EXPECT_THAT(help, HasSubstr("--set")); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("=2")); - EXPECT_THAT(help, HasSubstr("2,3,4")); + CHECK_THAT(help, Contains("--one")); + CHECK_THAT(help, Contains("--set")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("=2")); + CHECK_THAT(help, Contains("2,3,4")); } -TEST(THelp, VectorOpts) { +TEST_CASE("THelp: VectorOpts", "[deprecated]") { CLI::App app{"My prog"}; std::vector<int> x = {1, 2}; app.add_option("-q,--quick", x, "", true); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("INT=[1,2] ...")); + CHECK_THAT(help, Contains("INT=[1,2] ...")); } -TEST(THelp, SetLower) { +TEST_CASE("THelp: SetLower", "[deprecated]") { CLI::App app{"My prog"}; std::string def{"One"}; @@ -190,14 +49,14 @@ TEST(THelp, SetLower) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("--set")); - EXPECT_THAT(help, HasSubstr("=One")); - EXPECT_THAT(help, HasSubstr("oNe")); - EXPECT_THAT(help, HasSubstr("twO")); - EXPECT_THAT(help, HasSubstr("THREE")); + CHECK_THAT(help, Contains("--set")); + CHECK_THAT(help, Contains("=One")); + CHECK_THAT(help, Contains("oNe")); + CHECK_THAT(help, Contains("twO")); + CHECK_THAT(help, Contains("THREE")); } -TEST(THelp, ChangingSetDefaulted) { +TEST_CASE("THelp: ChangingSetDefaulted", "[deprecated]") { CLI::App app; std::set<int> vals{1, 2, 3}; @@ -206,19 +65,19 @@ TEST(THelp, ChangingSetDefaulted) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, Not(HasSubstr("4"))); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, !Contains("4")); vals.insert(4); vals.erase(1); help = app.help(); - EXPECT_THAT(help, Not(HasSubstr("1"))); - EXPECT_THAT(help, HasSubstr("4")); + CHECK_THAT(help, !Contains("1")); + CHECK_THAT(help, Contains("4")); } -TEST(THelp, ChangingCaselessSetDefaulted) { +TEST_CASE("THelp: ChangingCaselessSetDefaulted", "[deprecated]") { CLI::App app; std::set<std::string> vals{"1", "2", "3"}; @@ -227,19 +86,19 @@ TEST(THelp, ChangingCaselessSetDefaulted) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, Not(HasSubstr("4"))); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, !Contains("4")); vals.insert("4"); vals.erase("1"); help = app.help(); - EXPECT_THAT(help, Not(HasSubstr("1"))); - EXPECT_THAT(help, HasSubstr("4")); + CHECK_THAT(help, !Contains("1")); + CHECK_THAT(help, Contains("4")); } -TEST_F(TApp, DefaultOpts) { +TEST_CASE_METHOD(TApp, "DefaultOpts", "[deprecated]") { int i = 3; std::string s = "HI"; @@ -251,116 +110,116 @@ TEST_F(TApp, DefaultOpts) { run(); - EXPECT_EQ(1u, app.count("i")); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(2, i); - EXPECT_EQ("9", s); + CHECK(app.count("i") == 1u); + CHECK(app.count("-s") == 1u); + CHECK(i == 2); + CHECK(s == "9"); } -TEST_F(TApp, VectorDefaultedFixedString) { +TEST_CASE_METHOD(TApp, "VectorDefaultedFixedString", "[deprecated]") { std::vector<std::string> strvec{"one"}; std::vector<std::string> answer{"mystring", "mystring2", "mystring3"}; CLI::Option *opt = app.add_option("-s,--string", strvec, "", true)->expected(3); - EXPECT_EQ(3, opt->get_expected()); + CHECK(opt->get_expected() == 3); args = {"--string", "mystring", "mystring2", "mystring3"}; run(); - EXPECT_EQ(3u, app.count("--string")); - EXPECT_EQ(answer, strvec); + CHECK(app.count("--string") == 3u); + CHECK(strvec == answer); } -TEST_F(TApp, DefaultedResult) { +TEST_CASE_METHOD(TApp, "DefaultedResult", "[deprecated]") { std::string sval = "NA"; int ival; auto opts = app.add_option("--string", sval, "", true); auto optv = app.add_option("--val", ival); args = {}; run(); - EXPECT_EQ(sval, "NA"); + CHECK("NA" == sval); std::string nString; opts->results(nString); - EXPECT_EQ(nString, "NA"); + CHECK("NA" == nString); int newIval; - // EXPECT_THROW(optv->results(newIval), CLI::ConversionError); + // CHECK_THROWS_AS (optv->results(newIval), CLI::ConversionError); optv->default_str("442"); optv->results(newIval); - EXPECT_EQ(newIval, 442); + CHECK(442 == newIval); } -TEST_F(TApp, OptionWithDefaults) { +TEST_CASE_METHOD(TApp, "OptionWithDefaults", "[deprecated]") { int someint = 2; app.add_option("-a", someint, "", true); args = {"-a1", "-a2"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } // #209 -TEST_F(TApp, CustomUserSepParse) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse", "[deprecated]") { std::vector<int> vals = {1, 2, 3}; args = {"--idx", "1,2,3"}; auto opt = app.add_option("--idx", vals)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2, 3})); + CHECK(std::vector<int>({1, 2, 3}) == vals); std::vector<int> vals2; // check that the results vector gets the results in the same way opt->results(vals2); - EXPECT_EQ(vals2, vals); + CHECK(vals == vals2); app.remove_option(opt); app.add_option("--idx", vals, "", true)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2, 3})); + CHECK(std::vector<int>({1, 2, 3}) == vals); } // #209 -TEST_F(TApp, CustomUserSepParse2) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse2", "[deprecated]") { std::vector<int> vals = {1, 2, 3}; args = {"--idx", "1,2,"}; auto opt = app.add_option("--idx", vals)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); app.remove_option(opt); app.add_option("--idx", vals, "", true)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); } // // #209 -TEST_F(TApp, CustomUserSepParse4) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse4", "[deprecated]") { std::vector<int> vals; args = {"--idx", "1, 2"}; auto opt = app.add_option("--idx", vals, "", true)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); app.remove_option(opt); app.add_option("--idx", vals)->delimiter(','); run(); - EXPECT_EQ(vals, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == vals); } // #218 -TEST_F(TApp, CustomUserSepParse5) { +TEST_CASE_METHOD(TApp, "CustomUserSepParse5", "[deprecated]") { std::vector<std::string> bar; args = {"this", "is", "a", "test"}; auto opt = app.add_option("bar", bar, "bar"); run(); - EXPECT_EQ(bar, std::vector<std::string>({"this", "is", "a", "test"})); + CHECK(std::vector<std::string>({"this", "is", "a", "test"}) == bar); app.remove_option(opt); args = {"this", "is", "a", "test"}; app.add_option("bar", bar, "bar", true); run(); - EXPECT_EQ(bar, std::vector<std::string>({"this", "is", "a", "test"})); + CHECK(std::vector<std::string>({"this", "is", "a", "test"}) == bar); } diff --git a/packages/CLI11/tests/FormatterTest.cpp b/packages/CLI11/tests/FormatterTest.cpp index 513f956a03bc1d20e01b6fb6a05821903a28e6f0..22da56f0e543af444e9caadd0c60a2932fe35018 100644 --- a/packages/CLI11/tests/FormatterTest.cpp +++ b/packages/CLI11/tests/FormatterTest.cpp @@ -10,12 +10,10 @@ #include "CLI/CLI.hpp" #endif -#include "gmock/gmock.h" -#include "gtest/gtest.h" +#include "catch.hpp" #include <fstream> -using ::testing::HasSubstr; -using ::testing::Not; +using Catch::Matchers::Contains; class SimpleFormatter : public CLI::FormatterBase { public: @@ -26,17 +24,17 @@ class SimpleFormatter : public CLI::FormatterBase { } }; -TEST(Formatter, Nothing) { +TEST_CASE("Formatter: Nothing", "[formatter]") { CLI::App app{"My prog"}; app.formatter(std::make_shared<SimpleFormatter>()); std::string help = app.help(); - EXPECT_EQ(help, "This is really simple"); + CHECK("This is really simple" == help); } -TEST(Formatter, NothingLambda) { +TEST_CASE("Formatter: NothingLambda", "[formatter]") { CLI::App app{"My prog"}; app.formatter_fn( @@ -44,10 +42,10 @@ TEST(Formatter, NothingLambda) { std::string help = app.help(); - EXPECT_EQ(help, "This is really simple"); + CHECK("This is really simple" == help); } -TEST(Formatter, OptCustomize) { +TEST_CASE("Formatter: OptCustomize", "[formatter]") { CLI::App app{"My prog"}; auto optfmt = std::make_shared<CLI::Formatter>(); @@ -60,16 +58,15 @@ TEST(Formatter, OptCustomize) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("(MUST HAVE)")); - EXPECT_EQ(help, - "My prog\n" - "Usage: [OPTIONS]\n\n" - "Options:\n" - " -h,--help Print this help message and exit\n" - " --opt INT (MUST HAVE) Something\n\n"); + CHECK_THAT(help, Contains("(MUST HAVE)")); + CHECK(help == "My prog\n" + "Usage: [OPTIONS]\n\n" + "Options:\n" + " -h,--help Print this help message and exit\n" + " --opt INT (MUST HAVE) Something\n\n"); } -TEST(Formatter, OptCustomizeSimple) { +TEST_CASE("Formatter: OptCustomizeSimple", "[formatter]") { CLI::App app{"My prog"}; app.get_formatter()->column_width(25); @@ -80,16 +77,15 @@ TEST(Formatter, OptCustomizeSimple) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("(MUST HAVE)")); - EXPECT_EQ(help, - "My prog\n" - "Usage: [OPTIONS]\n\n" - "Options:\n" - " -h,--help Print this help message and exit\n" - " --opt INT (MUST HAVE) Something\n\n"); + CHECK_THAT(help, Contains("(MUST HAVE)")); + CHECK(help == "My prog\n" + "Usage: [OPTIONS]\n\n" + "Options:\n" + " -h,--help Print this help message and exit\n" + " --opt INT (MUST HAVE) Something\n\n"); } -TEST(Formatter, OptCustomizeOptionText) { +TEST_CASE("Formatter: OptCustomizeOptionText", "[formatter]") { CLI::App app{"My prog"}; app.get_formatter()->column_width(25); @@ -99,16 +95,15 @@ TEST(Formatter, OptCustomizeOptionText) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("(ARG)")); - EXPECT_EQ(help, - "My prog\n" - "Usage: [OPTIONS]\n\n" - "Options:\n" - " -h,--help Print this help message and exit\n" - " --opt (ARG) Something\n\n"); + CHECK_THAT(help, Contains("(ARG)")); + CHECK(help == "My prog\n" + "Usage: [OPTIONS]\n\n" + "Options:\n" + " -h,--help Print this help message and exit\n" + " --opt (ARG) Something\n\n"); } -TEST(Formatter, FalseFlagExample) { +TEST_CASE("Formatter: FalseFlagExample", "[formatter]") { CLI::App app{"My prog"}; app.get_formatter()->column_width(25); @@ -122,12 +117,12 @@ TEST(Formatter, FalseFlagExample) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("--no_opt{false}")); - EXPECT_THAT(help, HasSubstr("--no_opt2{false}")); - EXPECT_THAT(help, HasSubstr("-O{false}")); + CHECK_THAT(help, Contains("--no_opt{false}")); + CHECK_THAT(help, Contains("--no_opt2{false}")); + CHECK_THAT(help, Contains("-O{false}")); } -TEST(Formatter, AppCustomize) { +TEST_CASE("Formatter: AppCustomize", "[formatter]") { CLI::App app{"My prog"}; app.add_subcommand("subcom1", "This"); @@ -139,17 +134,16 @@ TEST(Formatter, AppCustomize) { app.add_subcommand("subcom2", "This"); std::string help = app.help(); - EXPECT_EQ(help, - "My prog\n" - "Run: [OPTIONS] [SUBCOMMAND]\n\n" - "Options:\n" - " -h,--help Print this help message and exit\n\n" - "Subcommands:\n" - " subcom1 This\n" - " subcom2 This\n\n"); + CHECK(help == "My prog\n" + "Run: [OPTIONS] [SUBCOMMAND]\n\n" + "Options:\n" + " -h,--help Print this help message and exit\n\n" + "Subcommands:\n" + " subcom1 This\n" + " subcom2 This\n\n"); } -TEST(Formatter, AppCustomizeSimple) { +TEST_CASE("Formatter: AppCustomizeSimple", "[formatter]") { CLI::App app{"My prog"}; app.add_subcommand("subcom1", "This"); @@ -159,48 +153,47 @@ TEST(Formatter, AppCustomizeSimple) { app.add_subcommand("subcom2", "This"); std::string help = app.help(); - EXPECT_EQ(help, - "My prog\n" - "Run: [OPTIONS] [SUBCOMMAND]\n\n" - "Options:\n" - " -h,--help Print this help message and exit\n\n" - "Subcommands:\n" - " subcom1 This\n" - " subcom2 This\n\n"); + CHECK(help == "My prog\n" + "Run: [OPTIONS] [SUBCOMMAND]\n\n" + "Options:\n" + " -h,--help Print this help message and exit\n\n" + "Subcommands:\n" + " subcom1 This\n" + " subcom2 This\n\n"); } -TEST(Formatter, AllSub) { +TEST_CASE("Formatter: AllSub", "[formatter]") { CLI::App app{"My prog"}; CLI::App *sub = app.add_subcommand("subcom", "This"); sub->add_flag("--insub", "MyFlag"); std::string help = app.help("", CLI::AppFormatMode::All); - EXPECT_THAT(help, HasSubstr("--insub")); - EXPECT_THAT(help, HasSubstr("subcom")); + CHECK_THAT(help, Contains("--insub")); + CHECK_THAT(help, Contains("subcom")); } -TEST(Formatter, AllSubRequired) { +TEST_CASE("Formatter: AllSubRequired", "[formatter]") { CLI::App app{"My prog"}; CLI::App *sub = app.add_subcommand("subcom", "This"); sub->add_flag("--insub", "MyFlag"); sub->required(); std::string help = app.help("", CLI::AppFormatMode::All); - EXPECT_THAT(help, HasSubstr("--insub")); - EXPECT_THAT(help, HasSubstr("subcom")); - EXPECT_THAT(help, HasSubstr("REQUIRED")); + CHECK_THAT(help, Contains("--insub")); + CHECK_THAT(help, Contains("subcom")); + CHECK_THAT(help, Contains("REQUIRED")); } -TEST(Formatter, NamelessSub) { +TEST_CASE("Formatter: NamelessSub", "[formatter]") { CLI::App app{"My prog"}; CLI::App *sub = app.add_subcommand("", "This subcommand"); sub->add_flag("--insub", "MyFlag"); std::string help = app.help("", CLI::AppFormatMode::Normal); - EXPECT_THAT(help, HasSubstr("--insub")); - EXPECT_THAT(help, HasSubstr("This subcommand")); + CHECK_THAT(help, Contains("--insub")); + CHECK_THAT(help, Contains("This subcommand")); } -TEST(Formatter, NamelessSubInGroup) { +TEST_CASE("Formatter: NamelessSubInGroup", "[formatter]") { CLI::App app{"My prog"}; CLI::App *sub = app.add_subcommand("", "This subcommand"); CLI::App *sub2 = app.add_subcommand("sub2", "subcommand2"); @@ -210,9 +203,9 @@ TEST(Formatter, NamelessSubInGroup) { sub->group("group1"); sub2->group("group1"); std::string help = app.help("", CLI::AppFormatMode::Normal); - EXPECT_THAT(help, HasSubstr("--insub")); - EXPECT_THAT(help, HasSubstr("This subcommand")); - EXPECT_THAT(help, HasSubstr("group1")); - EXPECT_THAT(help, HasSubstr("sub2")); - EXPECT_TRUE(help.find("pos") == std::string::npos); + CHECK_THAT(help, Contains("--insub")); + CHECK_THAT(help, Contains("This subcommand")); + CHECK_THAT(help, Contains("group1")); + CHECK_THAT(help, Contains("sub2")); + CHECK(help.find("pos") == std::string::npos); } diff --git a/packages/CLI11/tests/HelpTest.cpp b/packages/CLI11/tests/HelpTest.cpp index a6169dfebbdcaa907f2163b57610941e6338a810..48089f6fc76ea10a7446447ebf8114fdd9797553 100644 --- a/packages/CLI11/tests/HelpTest.cpp +++ b/packages/CLI11/tests/HelpTest.cpp @@ -10,65 +10,63 @@ #include "CLI/CLI.hpp" #endif -#include "gmock/gmock.h" -#include "gtest/gtest.h" +#include "catch.hpp" #include <fstream> -using ::testing::HasSubstr; -using ::testing::Not; +using Catch::Matchers::Contains; -TEST(THelp, Basic) { +TEST_CASE("THelp: Basic", "[help]") { CLI::App app{"My prog"}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Usage:")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); } -TEST(THelp, Footer) { +TEST_CASE("THelp: Footer", "[help]") { CLI::App app{"My prog"}; app.footer("Report bugs to bugs@example.com"); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Usage:")); - EXPECT_THAT(help, HasSubstr("Report bugs to bugs@example.com")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); + CHECK_THAT(help, Contains("Report bugs to bugs@example.com")); } -TEST(THelp, FooterCallback) { +TEST_CASE("THelp: FooterCallback", "[help]") { CLI::App app{"My prog"}; app.footer([]() { return "Report bugs to bugs@example.com"; }); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Usage:")); - EXPECT_THAT(help, HasSubstr("Report bugs to bugs@example.com")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); + CHECK_THAT(help, Contains("Report bugs to bugs@example.com")); } -TEST(THelp, FooterCallbackBoth) { +TEST_CASE("THelp: FooterCallbackBoth", "[help]") { CLI::App app{"My prog"}; app.footer([]() { return "Report bugs to bugs@example.com"; }); app.footer(" foot!!!!"); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Usage:")); - EXPECT_THAT(help, HasSubstr("Report bugs to bugs@example.com")); - EXPECT_THAT(help, HasSubstr("foot!!!!")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); + CHECK_THAT(help, Contains("Report bugs to bugs@example.com")); + CHECK_THAT(help, Contains("foot!!!!")); } -TEST(THelp, OptionalPositional) { +TEST_CASE("THelp: OptionalPositional", "[help]") { CLI::App app{"My prog", "program"}; std::string x; @@ -76,16 +74,16 @@ TEST(THelp, OptionalPositional) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Positionals:")); - EXPECT_THAT(help, HasSubstr("something TEXT")); - EXPECT_THAT(help, HasSubstr("My option here")); - EXPECT_THAT(help, HasSubstr("Usage: program [OPTIONS] [something]")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Positionals:")); + CHECK_THAT(help, Contains("something TEXT")); + CHECK_THAT(help, Contains("My option here")); + CHECK_THAT(help, Contains("Usage: program [OPTIONS] [something]")); } -TEST(THelp, Hidden) { +TEST_CASE("THelp: Hidden", "[help]") { CLI::App app{"My prog"}; std::string x; @@ -95,15 +93,15 @@ TEST(THelp, Hidden) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, Not(HasSubstr("[something]"))); - EXPECT_THAT(help, Not(HasSubstr("something "))); - EXPECT_THAT(help, Not(HasSubstr("another"))); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, !Contains("[something]")); + CHECK_THAT(help, !Contains("something ")); + CHECK_THAT(help, !Contains("another")); } -TEST(THelp, deprecatedOptions) { +TEST_CASE("THelp: deprecatedOptions", "[help]") { CLI::App app{"My prog"}; std::string x; @@ -116,12 +114,12 @@ TEST(THelp, deprecatedOptions) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("DEPRECATED")); - EXPECT_THAT(help, HasSubstr("something")); - EXPECT_NO_THROW(app.parse("--something deprecated")); + CHECK_THAT(help, Contains("DEPRECATED")); + CHECK_THAT(help, Contains("something")); + CHECK_NOTHROW(app.parse("--something deprecated")); } -TEST(THelp, deprecatedOptions2) { +TEST_CASE("THelp: deprecatedOptions2", "[help]") { CLI::App app{"My prog"}; std::string x; @@ -134,12 +132,12 @@ TEST(THelp, deprecatedOptions2) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("DEPRECATED")); - EXPECT_THAT(help, HasSubstr("something")); - EXPECT_NO_THROW(app.parse("--something deprecated")); + CHECK_THAT(help, Contains("DEPRECATED")); + CHECK_THAT(help, Contains("something")); + CHECK_NOTHROW(app.parse("--something deprecated")); } -TEST(THelp, deprecatedOptions3) { +TEST_CASE("THelp: deprecatedOptions3", "[help]") { CLI::App app{"My prog"}; std::string x; @@ -152,12 +150,12 @@ TEST(THelp, deprecatedOptions3) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("DEPRECATED")); - EXPECT_THAT(help, HasSubstr("'--something_else' instead")); - EXPECT_NO_THROW(app.parse("--something deprecated")); + CHECK_THAT(help, Contains("DEPRECATED")); + CHECK_THAT(help, Contains("'--something_else' instead")); + CHECK_NOTHROW(app.parse("--something deprecated")); } -TEST(THelp, retiredOptions) { +TEST_CASE("THelp: retiredOptions", "[help]") { CLI::App app{"My prog"}; std::string x; @@ -170,13 +168,13 @@ TEST(THelp, retiredOptions) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("RETIRED")); - EXPECT_THAT(help, HasSubstr("something")); + CHECK_THAT(help, Contains("RETIRED")); + CHECK_THAT(help, Contains("something")); - EXPECT_NO_THROW(app.parse("--something old")); + CHECK_NOTHROW(app.parse("--something old")); } -TEST(THelp, retiredOptions2) { +TEST_CASE("THelp: retiredOptions2", "[help]") { CLI::App app{"My prog"}; std::string x; @@ -188,12 +186,12 @@ TEST(THelp, retiredOptions2) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("RETIRED")); - EXPECT_THAT(help, HasSubstr("something")); - EXPECT_NO_THROW(app.parse("--something old")); + CHECK_THAT(help, Contains("RETIRED")); + CHECK_THAT(help, Contains("something")); + CHECK_NOTHROW(app.parse("--something old")); } -TEST(THelp, retiredOptions3) { +TEST_CASE("THelp: retiredOptions3", "[help]") { CLI::App app{"My prog"}; std::string x; @@ -206,13 +204,13 @@ TEST(THelp, retiredOptions3) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("RETIRED")); - EXPECT_THAT(help, HasSubstr("something")); + CHECK_THAT(help, Contains("RETIRED")); + CHECK_THAT(help, Contains("something")); - EXPECT_NO_THROW(app.parse("--something old")); + CHECK_NOTHROW(app.parse("--something old")); } -TEST(THelp, HiddenGroup) { +TEST_CASE("THelp: HiddenGroup", "[help]") { CLI::App app{"My prog"}; // empty option group name should be hidden auto hgroup = app.add_option_group(""); @@ -223,22 +221,22 @@ TEST(THelp, HiddenGroup) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, Not(HasSubstr("[something]"))); - EXPECT_THAT(help, Not(HasSubstr("something "))); - EXPECT_THAT(help, Not(HasSubstr("another"))); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, !Contains("[something]")); + CHECK_THAT(help, !Contains("something ")); + CHECK_THAT(help, !Contains("another")); hgroup->group("ghidden"); help = app.help(); - EXPECT_THAT(help, HasSubstr("something ")); - EXPECT_THAT(help, HasSubstr("another")); + CHECK_THAT(help, Contains("something ")); + CHECK_THAT(help, Contains("another")); } -TEST(THelp, OptionalPositionalAndOptions) { +TEST_CASE("THelp: OptionalPositionalAndOptions", "[help]") { CLI::App app{"My prog", "AnotherProgram"}; app.add_flag("-q,--quick"); @@ -247,13 +245,13 @@ TEST(THelp, OptionalPositionalAndOptions) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Usage: AnotherProgram [OPTIONS] [something]")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Usage: AnotherProgram [OPTIONS] [something]")); } -TEST(THelp, RequiredPositionalAndOptions) { +TEST_CASE("THelp: RequiredPositionalAndOptions", "[help]") { CLI::App app{"My prog"}; app.add_flag("-q,--quick"); @@ -262,14 +260,14 @@ TEST(THelp, RequiredPositionalAndOptions) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("-h,--help")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Positionals:")); - EXPECT_THAT(help, HasSubstr("Usage: [OPTIONS] something")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("-h,--help")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Positionals:")); + CHECK_THAT(help, Contains("Usage: [OPTIONS] something")); } -TEST(THelp, MultiOpts) { +TEST_CASE("THelp: MultiOpts", "[help]") { CLI::App app{"My prog"}; std::vector<int> x, y; app.add_option("-q,--quick", x, "Disc")->expected(2); @@ -277,24 +275,24 @@ TEST(THelp, MultiOpts) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, Not(HasSubstr("Positionals:"))); - EXPECT_THAT(help, HasSubstr("Usage: [OPTIONS]")); - EXPECT_THAT(help, HasSubstr("INT x 2")); - EXPECT_THAT(help, HasSubstr("INT ...")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, !Contains("Positionals:")); + CHECK_THAT(help, Contains("Usage: [OPTIONS]")); + CHECK_THAT(help, Contains("INT x 2")); + CHECK_THAT(help, Contains("INT ...")); } -TEST(THelp, VectorOpts) { +TEST_CASE("THelp: VectorOpts", "[help]") { CLI::App app{"My prog"}; std::vector<int> x = {1, 2}; app.add_option("-q,--quick", x)->capture_default_str(); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("INT=[1,2] ...")); + CHECK_THAT(help, Contains("INT=[1,2] ...")); } -TEST(THelp, MultiPosOpts) { +TEST_CASE("THelp: MultiPosOpts", "[help]") { CLI::App app{"My prog"}; app.name("program"); std::vector<int> x, y; @@ -303,26 +301,26 @@ TEST(THelp, MultiPosOpts) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, HasSubstr("Positionals:")); - EXPECT_THAT(help, HasSubstr("Usage: program [OPTIONS]")); - EXPECT_THAT(help, HasSubstr("INT x 2")); - EXPECT_THAT(help, HasSubstr("INT ...")); - EXPECT_THAT(help, HasSubstr("[quick(2x)]")); - EXPECT_THAT(help, HasSubstr("[vals...]")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, Contains("Positionals:")); + CHECK_THAT(help, Contains("Usage: program [OPTIONS]")); + CHECK_THAT(help, Contains("INT x 2")); + CHECK_THAT(help, Contains("INT ...")); + CHECK_THAT(help, Contains("[quick(2x)]")); + CHECK_THAT(help, Contains("[vals...]")); } -TEST(THelp, EnvName) { +TEST_CASE("THelp: EnvName", "[help]") { CLI::App app{"My prog"}; std::string input; app.add_option("--something", input)->envname("SOME_ENV"); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("SOME_ENV")); + CHECK_THAT(help, Contains("SOME_ENV")); } -TEST(THelp, Needs) { +TEST_CASE("THelp: Needs", "[help]") { CLI::App app{"My prog"}; CLI::Option *op1 = app.add_flag("--op1"); @@ -330,10 +328,10 @@ TEST(THelp, Needs) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Needs: --op1")); + CHECK_THAT(help, Contains("Needs: --op1")); } -TEST(THelp, NeedsPositional) { +TEST_CASE("THelp: NeedsPositional", "[help]") { CLI::App app{"My prog"}; int x{0}, y{0}; @@ -343,11 +341,11 @@ TEST(THelp, NeedsPositional) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Positionals:")); - EXPECT_THAT(help, HasSubstr("Needs: op1")); + CHECK_THAT(help, Contains("Positionals:")); + CHECK_THAT(help, Contains("Needs: op1")); } -TEST(THelp, Excludes) { +TEST_CASE("THelp: Excludes", "[help]") { CLI::App app{"My prog"}; CLI::Option *op1 = app.add_flag("--op1"); @@ -355,10 +353,10 @@ TEST(THelp, Excludes) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Excludes: --op1")); + CHECK_THAT(help, Contains("Excludes: --op1")); } -TEST(THelp, ExcludesPositional) { +TEST_CASE("THelp: ExcludesPositional", "[help]") { CLI::App app{"My prog"}; int x{0}, y{0}; @@ -368,11 +366,11 @@ TEST(THelp, ExcludesPositional) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Positionals:")); - EXPECT_THAT(help, HasSubstr("Excludes: op1")); + CHECK_THAT(help, Contains("Positionals:")); + CHECK_THAT(help, Contains("Excludes: op1")); } -TEST(THelp, ExcludesSymmetric) { +TEST_CASE("THelp: ExcludesSymmetric", "[help]") { CLI::App app{"My prog"}; CLI::Option *op1 = app.add_flag("--op1"); @@ -380,10 +378,10 @@ TEST(THelp, ExcludesSymmetric) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Excludes: --op2")); + CHECK_THAT(help, Contains("Excludes: --op2")); } -TEST(THelp, ManualSetters) { +TEST_CASE("THelp: ManualSetters", "[help]") { CLI::App app{"My prog"}; @@ -392,35 +390,35 @@ TEST(THelp, ManualSetters) { CLI::Option *op1 = app.add_option("--op", x); op1->default_str("12"); op1->type_name("BIGGLES"); - EXPECT_EQ(x, 1); + CHECK(1 == x); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("=12")); - EXPECT_THAT(help, HasSubstr("BIGGLES")); + CHECK_THAT(help, Contains("=12")); + CHECK_THAT(help, Contains("BIGGLES")); op1->default_val("14"); - EXPECT_EQ(x, 14); + CHECK(14 == x); help = app.help(); - EXPECT_THAT(help, HasSubstr("=14")); + CHECK_THAT(help, Contains("=14")); op1->default_val(12); - EXPECT_EQ(x, 12); + CHECK(12 == x); help = app.help(); - EXPECT_THAT(help, HasSubstr("=12")); + CHECK_THAT(help, Contains("=12")); - EXPECT_TRUE(op1->get_run_callback_for_default()); + CHECK(op1->get_run_callback_for_default()); op1->run_callback_for_default(false); - EXPECT_FALSE(op1->get_run_callback_for_default()); + CHECK(!op1->get_run_callback_for_default()); op1->default_val(18); // x should not be modified in this case - EXPECT_EQ(x, 12); + CHECK(12 == x); help = app.help(); - EXPECT_THAT(help, HasSubstr("=18")); + CHECK_THAT(help, Contains("=18")); } -TEST(THelp, ManualSetterOverFunction) { +TEST_CASE("THelp: ManualSetterOverFunction", "[help]") { CLI::App app{"My prog"}; @@ -431,31 +429,31 @@ TEST(THelp, ManualSetterOverFunction) { op1->default_str("12"); op1->type_name("BIGGLES"); op2->type_name("QUIGGLES"); - EXPECT_EQ(x, 1); + CHECK(1 == x); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("=12")); - EXPECT_THAT(help, HasSubstr("BIGGLES")); - EXPECT_THAT(help, HasSubstr("QUIGGLES")); - EXPECT_THAT(help, HasSubstr("{1,2}")); + CHECK_THAT(help, Contains("=12")); + CHECK_THAT(help, Contains("BIGGLES")); + CHECK_THAT(help, Contains("QUIGGLES")); + CHECK_THAT(help, Contains("{1,2}")); } -TEST(THelp, Subcom) { +TEST_CASE("THelp: Subcom", "[help]") { CLI::App app{"My prog"}; auto sub1 = app.add_subcommand("sub1"); app.add_subcommand("sub2"); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Usage: [OPTIONS] [SUBCOMMAND]")); + CHECK_THAT(help, Contains("Usage: [OPTIONS] [SUBCOMMAND]")); app.require_subcommand(); help = app.help(); - EXPECT_THAT(help, HasSubstr("Usage: [OPTIONS] SUBCOMMAND")); + CHECK_THAT(help, Contains("Usage: [OPTIONS] SUBCOMMAND")); help = sub1->help(); - EXPECT_THAT(help, HasSubstr("Usage: sub1")); + CHECK_THAT(help, Contains("Usage: sub1")); char x[] = "./myprogram"; char y[] = "sub2"; @@ -464,10 +462,10 @@ TEST(THelp, Subcom) { app.parse(static_cast<int>(args.size()), args.data()); help = app.help(); - EXPECT_THAT(help, HasSubstr("Usage: ./myprogram sub2")); + CHECK_THAT(help, Contains("Usage: ./myprogram sub2")); } -TEST(THelp, Subcom_alias) { +TEST_CASE("THelp: Subcom_alias", "[help]") { CLI::App app{"My prog"}; auto sub1 = app.add_subcommand("sub1", "Subcommand1 description test"); @@ -477,12 +475,12 @@ TEST(THelp, Subcom_alias) { app.add_subcommand("sub2", "Subcommand2 description test"); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Usage: [OPTIONS] [SUBCOMMAND]")); - EXPECT_THAT(help, HasSubstr("sub_alias1")); - EXPECT_THAT(help, HasSubstr("sub_alias2")); + CHECK_THAT(help, Contains("Usage: [OPTIONS] [SUBCOMMAND]")); + CHECK_THAT(help, Contains("sub_alias1")); + CHECK_THAT(help, Contains("sub_alias2")); } -TEST(THelp, Subcom_alias_group) { +TEST_CASE("THelp: Subcom_alias_group", "[help]") { CLI::App app{"My prog"}; auto sub1 = app.add_subcommand("", "Subcommand1 description test"); @@ -492,12 +490,12 @@ TEST(THelp, Subcom_alias_group) { app.add_subcommand("sub2", "Subcommand2 description test"); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Usage: [OPTIONS] [SUBCOMMAND]")); - EXPECT_THAT(help, HasSubstr("sub_alias1")); - EXPECT_THAT(help, HasSubstr("sub_alias2")); + CHECK_THAT(help, Contains("Usage: [OPTIONS] [SUBCOMMAND]")); + CHECK_THAT(help, Contains("sub_alias1")); + CHECK_THAT(help, Contains("sub_alias2")); } -TEST(THelp, MasterName) { +TEST_CASE("THelp: MasterName", "[help]") { CLI::App app{"My prog", "MyRealName"}; char x[] = "./myprogram"; @@ -505,10 +503,10 @@ TEST(THelp, MasterName) { std::vector<char *> args = {x}; app.parse(static_cast<int>(args.size()), args.data()); - EXPECT_THAT(app.help(), HasSubstr("Usage: MyRealName")); + CHECK_THAT(app.help(), Contains("Usage: MyRealName")); } -TEST(THelp, IntDefaults) { +TEST_CASE("THelp: IntDefaults", "[help]") { CLI::App app{"My prog"}; int one{1}, two{2}; @@ -517,14 +515,14 @@ TEST(THelp, IntDefaults) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("--one")); - EXPECT_THAT(help, HasSubstr("--set")); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("=2")); - EXPECT_THAT(help, HasSubstr("2,3,4")); + CHECK_THAT(help, Contains("--one")); + CHECK_THAT(help, Contains("--set")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("=2")); + CHECK_THAT(help, Contains("2,3,4")); } -TEST(THelp, SetLower) { +TEST_CASE("THelp: SetLower", "[help]") { CLI::App app{"My prog"}; app.option_defaults()->always_capture_default(); @@ -533,14 +531,14 @@ TEST(THelp, SetLower) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("--set")); - EXPECT_THAT(help, HasSubstr("=One")); - EXPECT_THAT(help, HasSubstr("oNe")); - EXPECT_THAT(help, HasSubstr("twO")); - EXPECT_THAT(help, HasSubstr("THREE")); + CHECK_THAT(help, Contains("--set")); + CHECK_THAT(help, Contains("=One")); + CHECK_THAT(help, Contains("oNe")); + CHECK_THAT(help, Contains("twO")); + CHECK_THAT(help, Contains("THREE")); } -TEST(THelp, OnlyOneHelp) { +TEST_CASE("THelp: OnlyOneHelp", "[help]") { CLI::App app{"My prog"}; // It is not supported to have more than one help flag, last one wins @@ -548,10 +546,10 @@ TEST(THelp, OnlyOneHelp) { app.set_help_flag("--yelp", "Alias for help"); std::vector<std::string> input{"--help"}; - EXPECT_THROW(app.parse(input), CLI::ExtrasError); + CHECK_THROWS_AS(app.parse(input), CLI::ExtrasError); } -TEST(THelp, MultiHelp) { +TEST_CASE("THelp: MultiHelp", "[help]") { CLI::App app{"My prog"}; // It is not supported to have more than one help flag, last one wins @@ -559,10 +557,10 @@ TEST(THelp, MultiHelp) { app.allow_windows_style_options(); std::vector<std::string> input{"/?"}; - EXPECT_THROW(app.parse(input), CLI::CallForHelp); + CHECK_THROWS_AS(app.parse(input), CLI::CallForHelp); } -TEST(THelp, OnlyOneAllHelp) { +TEST_CASE("THelp: OnlyOneAllHelp", "[help]") { CLI::App app{"My prog"}; // It is not supported to have more than one help flag, last one wins @@ -570,37 +568,37 @@ TEST(THelp, OnlyOneAllHelp) { app.set_help_all_flag("--yelp", "Alias for help"); std::vector<std::string> input{"--help-all"}; - EXPECT_THROW(app.parse(input), CLI::ExtrasError); + CHECK_THROWS_AS(app.parse(input), CLI::ExtrasError); std::vector<std::string> input2{"--yelp"}; - EXPECT_THROW(app.parse(input2), CLI::CallForAllHelp); + CHECK_THROWS_AS(app.parse(input2), CLI::CallForAllHelp); // Remove the flag app.set_help_all_flag(); std::vector<std::string> input3{"--yelp"}; - EXPECT_THROW(app.parse(input3), CLI::ExtrasError); + CHECK_THROWS_AS(app.parse(input3), CLI::ExtrasError); } -TEST(THelp, RemoveHelp) { +TEST_CASE("THelp: RemoveHelp", "[help]") { CLI::App app{"My prog"}; app.set_help_flag(); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, Not(HasSubstr("-h,--help"))); - EXPECT_THAT(help, Not(HasSubstr("Options:"))); - EXPECT_THAT(help, HasSubstr("Usage:")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, !Contains("-h,--help")); + CHECK_THAT(help, !Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); std::vector<std::string> input{"--help"}; try { app.parse(input); } catch(const CLI::ParseError &e) { - EXPECT_EQ(static_cast<int>(CLI::ExitCodes::ExtrasError), e.get_exit_code()); + CHECK(e.get_exit_code() == static_cast<int>(CLI::ExitCodes::ExtrasError)); } } -TEST(THelp, RemoveOtherMethodHelp) { +TEST_CASE("THelp: RemoveOtherMethodHelp", "[help]") { CLI::App app{"My prog"}; // Don't do this. Just in case, let's make sure it works. @@ -608,20 +606,20 @@ TEST(THelp, RemoveOtherMethodHelp) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, Not(HasSubstr("-h,--help"))); - EXPECT_THAT(help, Not(HasSubstr("Options:"))); - EXPECT_THAT(help, HasSubstr("Usage:")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, !Contains("-h,--help")); + CHECK_THAT(help, !Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); std::vector<std::string> input{"--help"}; try { app.parse(input); } catch(const CLI::ParseError &e) { - EXPECT_EQ(static_cast<int>(CLI::ExitCodes::ExtrasError), e.get_exit_code()); + CHECK(e.get_exit_code() == static_cast<int>(CLI::ExitCodes::ExtrasError)); } } -TEST(THelp, RemoveOtherMethodHelpAll) { +TEST_CASE("THelp: RemoveOtherMethodHelpAll", "[help]") { CLI::App app{"My prog"}; app.set_help_all_flag("--help-all"); @@ -630,61 +628,61 @@ TEST(THelp, RemoveOtherMethodHelpAll) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, Not(HasSubstr("--help-all"))); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Usage:")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, !Contains("--help-all")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); std::vector<std::string> input{"--help-all"}; try { app.parse(input); } catch(const CLI::ParseError &e) { - EXPECT_EQ(static_cast<int>(CLI::ExitCodes::ExtrasError), e.get_exit_code()); + CHECK(e.get_exit_code() == static_cast<int>(CLI::ExitCodes::ExtrasError)); } } -TEST(THelp, NoHelp) { +TEST_CASE("THelp: NoHelp", "[help]") { CLI::App app{"My prog"}; app.set_help_flag(); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, Not(HasSubstr("-h,--help"))); - EXPECT_THAT(help, Not(HasSubstr("Options:"))); - EXPECT_THAT(help, HasSubstr("Usage:")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, !Contains("-h,--help")); + CHECK_THAT(help, !Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); std::vector<std::string> input{"--help"}; try { app.parse(input); } catch(const CLI::ParseError &e) { - EXPECT_EQ(static_cast<int>(CLI::ExitCodes::ExtrasError), e.get_exit_code()); + CHECK(e.get_exit_code() == static_cast<int>(CLI::ExitCodes::ExtrasError)); } } -TEST(THelp, CustomHelp) { +TEST_CASE("THelp: CustomHelp", "[help]") { CLI::App app{"My prog"}; CLI::Option *help_option = app.set_help_flag("--yelp", "display help and exit"); - EXPECT_EQ(app.get_help_ptr(), help_option); + CHECK(help_option == app.get_help_ptr()); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("My prog")); - EXPECT_THAT(help, Not(HasSubstr("-h,--help"))); - EXPECT_THAT(help, HasSubstr("--yelp")); - EXPECT_THAT(help, HasSubstr("Options:")); - EXPECT_THAT(help, HasSubstr("Usage:")); + CHECK_THAT(help, Contains("My prog")); + CHECK_THAT(help, !Contains("-h,--help")); + CHECK_THAT(help, Contains("--yelp")); + CHECK_THAT(help, Contains("Options:")); + CHECK_THAT(help, Contains("Usage:")); std::vector<std::string> input{"--yelp"}; try { app.parse(input); } catch(const CLI::CallForHelp &e) { - EXPECT_EQ(static_cast<int>(CLI::ExitCodes::Success), e.get_exit_code()); + CHECK(e.get_exit_code() == static_cast<int>(CLI::ExitCodes::Success)); } } -TEST(THelp, NextLineShouldBeAlignmentInMultilineDescription) { +TEST_CASE("THelp: NextLineShouldBeAlignmentInMultilineDescription", "[help]") { CLI::App app; int i{0}; const std::string first{"first line"}; @@ -693,10 +691,10 @@ TEST(THelp, NextLineShouldBeAlignmentInMultilineDescription) { const std::string help = app.help(); const auto width = app.get_formatter()->get_column_width(); - EXPECT_THAT(help, HasSubstr(first + "\n" + std::string(width, ' ') + second)); + CHECK_THAT(help, Contains(first + "\n" + std::string(width, ' ') + second)); } -TEST(THelp, NiceName) { +TEST_CASE("THelp: NiceName", "[help]") { CLI::App app; int x{0}; @@ -704,23 +702,23 @@ TEST(THelp, NiceName) { auto short_name = app.add_option("more,-x,-y", x); auto positional = app.add_option("posit", x); - EXPECT_EQ(long_name->get_name(), "--long"); - EXPECT_EQ(short_name->get_name(), "-x"); - EXPECT_EQ(positional->get_name(), "posit"); + CHECK("--long" == long_name->get_name()); + CHECK("-x" == short_name->get_name()); + CHECK("posit" == positional->get_name()); } -TEST(Exit, ErrorWithHelp) { +TEST_CASE("Exit: ErrorWithHelp", "[help]") { CLI::App app{"My prog"}; std::vector<std::string> input{"-h"}; try { app.parse(input); } catch(const CLI::CallForHelp &e) { - EXPECT_EQ(static_cast<int>(CLI::ExitCodes::Success), e.get_exit_code()); + CHECK(e.get_exit_code() == static_cast<int>(CLI::ExitCodes::Success)); } } -TEST(Exit, ErrorWithAllHelp) { +TEST_CASE("Exit: ErrorWithAllHelp", "[help]") { CLI::App app{"My prog"}; app.set_help_all_flag("--help-all", "All help"); @@ -728,33 +726,33 @@ TEST(Exit, ErrorWithAllHelp) { try { app.parse(input); } catch(const CLI::CallForAllHelp &e) { - EXPECT_EQ(static_cast<int>(CLI::ExitCodes::Success), e.get_exit_code()); + CHECK(e.get_exit_code() == static_cast<int>(CLI::ExitCodes::Success)); } } -TEST(Exit, ErrorWithoutHelp) { +TEST_CASE("Exit: ErrorWithoutHelp", "[help]") { CLI::App app{"My prog"}; std::vector<std::string> input{"--none"}; try { app.parse(input); } catch(const CLI::ParseError &e) { - EXPECT_EQ(static_cast<int>(CLI::ExitCodes::ExtrasError), e.get_exit_code()); + CHECK(e.get_exit_code() == static_cast<int>(CLI::ExitCodes::ExtrasError)); } } -TEST(Exit, ExitCodes) { +TEST_CASE("Exit: ExitCodes", "[help]") { CLI::App app; auto i = static_cast<int>(CLI::ExitCodes::ExtrasError); - EXPECT_EQ(0, app.exit(CLI::Success())); - EXPECT_EQ(0, app.exit(CLI::CallForHelp())); - EXPECT_EQ(i, app.exit(CLI::ExtrasError({"Thing"}))); - EXPECT_EQ(42, app.exit(CLI::RuntimeError(42))); - EXPECT_EQ(1, app.exit(CLI::RuntimeError())); // Not sure if a default here is a good thing + CHECK(app.exit(CLI::Success()) == 0); + CHECK(app.exit(CLI::CallForHelp()) == 0); + CHECK(app.exit(CLI::ExtrasError({"Thing"})) == i); + CHECK(app.exit(CLI::RuntimeError(42)) == 42); + CHECK(app.exit(CLI::RuntimeError()) == 1); } -struct CapturedHelp : public ::testing::Test { +struct CapturedHelp { CLI::App app{"My Test Program"}; std::stringstream out{}; std::stringstream err{}; @@ -767,113 +765,112 @@ struct CapturedHelp : public ::testing::Test { } }; -TEST_F(CapturedHelp, Successful) { - EXPECT_EQ(run(CLI::Success()), 0); - EXPECT_EQ(out.str(), ""); - EXPECT_EQ(err.str(), ""); +TEST_CASE_METHOD(CapturedHelp, "Successful", "[help]") { + CHECK(0 == run(CLI::Success())); + CHECK("" == out.str()); + CHECK("" == err.str()); } -TEST_F(CapturedHelp, JustAnError) { - EXPECT_EQ(run(CLI::RuntimeError(42)), 42); - EXPECT_EQ(out.str(), ""); - EXPECT_EQ(err.str(), ""); +TEST_CASE_METHOD(CapturedHelp, "JustAnError", "[help]") { + CHECK(42 == run(CLI::RuntimeError(42))); + CHECK("" == out.str()); + CHECK("" == err.str()); } -TEST_F(CapturedHelp, CallForHelp) { - EXPECT_EQ(run(CLI::CallForHelp()), 0); - EXPECT_EQ(out.str(), app.help()); - EXPECT_EQ(err.str(), ""); +TEST_CASE_METHOD(CapturedHelp, "CallForHelp", "[help]") { + CHECK(0 == run(CLI::CallForHelp())); + CHECK(app.help() == out.str()); + CHECK("" == err.str()); } -TEST_F(CapturedHelp, CallForAllHelp) { - EXPECT_EQ(run(CLI::CallForAllHelp()), 0); - EXPECT_EQ(out.str(), app.help("", CLI::AppFormatMode::All)); - EXPECT_EQ(err.str(), ""); +TEST_CASE_METHOD(CapturedHelp, "CallForAllHelp", "[help]") { + CHECK(0 == run(CLI::CallForAllHelp())); + CHECK(app.help("", CLI::AppFormatMode::All) == out.str()); + CHECK("" == err.str()); } -TEST_F(CapturedHelp, CallForAllHelpOutput) { +TEST_CASE_METHOD(CapturedHelp, "CallForAllHelpOutput", "[help]") { app.set_help_all_flag("--help-all", "Help all"); app.add_subcommand("one", "One description"); CLI::App *sub = app.add_subcommand("two"); sub->add_flag("--three"); - EXPECT_EQ(run(CLI::CallForAllHelp()), 0); - EXPECT_EQ(out.str(), app.help("", CLI::AppFormatMode::All)); - EXPECT_EQ(err.str(), ""); - EXPECT_THAT(out.str(), HasSubstr("one")); - EXPECT_THAT(out.str(), HasSubstr("two")); - EXPECT_THAT(out.str(), HasSubstr("--three")); - - EXPECT_EQ(out.str(), - "My Test Program\n" - "Usage: [OPTIONS] [SUBCOMMAND]\n" - "\n" - "Options:\n" - " -h,--help Print this help message and exit\n" - " --help-all Help all\n" - "\n" - "Subcommands:\n" - "one\n" - " One description\n\n" - "two\n" - " Options:\n" - " --three \n\n\n"); -} -TEST_F(CapturedHelp, NewFormattedHelp) { + CHECK(0 == run(CLI::CallForAllHelp())); + CHECK(app.help("", CLI::AppFormatMode::All) == out.str()); + CHECK("" == err.str()); + CHECK_THAT(out.str(), Contains("one")); + CHECK_THAT(out.str(), Contains("two")); + CHECK_THAT(out.str(), Contains("--three")); + + CHECK(out.str() == "My Test Program\n" + "Usage: [OPTIONS] [SUBCOMMAND]\n" + "\n" + "Options:\n" + " -h,--help Print this help message and exit\n" + " --help-all Help all\n" + "\n" + "Subcommands:\n" + "one\n" + " One description\n\n" + "two\n" + " Options:\n" + " --three \n\n\n"); +} +TEST_CASE_METHOD(CapturedHelp, "NewFormattedHelp", "[help]") { app.formatter_fn([](const CLI::App *, std::string, CLI::AppFormatMode) { return "New Help"; }); - EXPECT_EQ(run(CLI::CallForHelp()), 0); - EXPECT_EQ(out.str(), "New Help"); - EXPECT_EQ(err.str(), ""); + CHECK(0 == run(CLI::CallForHelp())); + CHECK("New Help" == out.str()); + CHECK("" == err.str()); } -TEST_F(CapturedHelp, NormalError) { - EXPECT_EQ(run(CLI::ExtrasError({"Thing"})), static_cast<int>(CLI::ExitCodes::ExtrasError)); - EXPECT_EQ(out.str(), ""); - EXPECT_THAT(err.str(), HasSubstr("for more information")); - EXPECT_THAT(err.str(), Not(HasSubstr("ExtrasError"))); - EXPECT_THAT(err.str(), HasSubstr("Thing")); - EXPECT_THAT(err.str(), Not(HasSubstr(" or "))); - EXPECT_THAT(err.str(), Not(HasSubstr("Usage"))); +TEST_CASE_METHOD(CapturedHelp, "NormalError", "[help]") { + CHECK(static_cast<int>(CLI::ExitCodes::ExtrasError) == run(CLI::ExtrasError({"Thing"}))); + CHECK("" == out.str()); + CHECK_THAT(err.str(), Contains("for more information")); + CHECK_THAT(err.str(), !Contains("ExtrasError")); + CHECK_THAT(err.str(), Contains("Thing")); + CHECK_THAT(err.str(), !Contains(" or ")); + CHECK_THAT(err.str(), !Contains("Usage")); } -TEST_F(CapturedHelp, DoubleError) { +TEST_CASE_METHOD(CapturedHelp, "DoubleError", "[help]") { app.set_help_all_flag("--help-all"); - EXPECT_EQ(run(CLI::ExtrasError({"Thing"})), static_cast<int>(CLI::ExitCodes::ExtrasError)); - EXPECT_EQ(out.str(), ""); - EXPECT_THAT(err.str(), HasSubstr("for more information")); - EXPECT_THAT(err.str(), HasSubstr(" --help ")); - EXPECT_THAT(err.str(), HasSubstr(" --help-all ")); - EXPECT_THAT(err.str(), HasSubstr(" or ")); - EXPECT_THAT(err.str(), Not(HasSubstr("ExtrasError"))); - EXPECT_THAT(err.str(), HasSubstr("Thing")); - EXPECT_THAT(err.str(), Not(HasSubstr("Usage"))); -} - -TEST_F(CapturedHelp, AllOnlyError) { + CHECK(static_cast<int>(CLI::ExitCodes::ExtrasError) == run(CLI::ExtrasError({"Thing"}))); + CHECK("" == out.str()); + CHECK_THAT(err.str(), Contains("for more information")); + CHECK_THAT(err.str(), Contains(" --help ")); + CHECK_THAT(err.str(), Contains(" --help-all ")); + CHECK_THAT(err.str(), Contains(" or ")); + CHECK_THAT(err.str(), !Contains("ExtrasError")); + CHECK_THAT(err.str(), Contains("Thing")); + CHECK_THAT(err.str(), !Contains("Usage")); +} + +TEST_CASE_METHOD(CapturedHelp, "AllOnlyError", "[help]") { app.set_help_all_flag("--help-all"); app.set_help_flag(); - EXPECT_EQ(run(CLI::ExtrasError({"Thing"})), static_cast<int>(CLI::ExitCodes::ExtrasError)); - EXPECT_EQ(out.str(), ""); - EXPECT_THAT(err.str(), HasSubstr("for more information")); - EXPECT_THAT(err.str(), Not(HasSubstr(" --help "))); - EXPECT_THAT(err.str(), HasSubstr(" --help-all ")); - EXPECT_THAT(err.str(), Not(HasSubstr(" or "))); - EXPECT_THAT(err.str(), Not(HasSubstr("ExtrasError"))); - EXPECT_THAT(err.str(), HasSubstr("Thing")); - EXPECT_THAT(err.str(), Not(HasSubstr("Usage"))); -} - -TEST_F(CapturedHelp, ReplacedError) { + CHECK(static_cast<int>(CLI::ExitCodes::ExtrasError) == run(CLI::ExtrasError({"Thing"}))); + CHECK("" == out.str()); + CHECK_THAT(err.str(), Contains("for more information")); + CHECK_THAT(err.str(), !Contains(" --help ")); + CHECK_THAT(err.str(), Contains(" --help-all ")); + CHECK_THAT(err.str(), !Contains(" or ")); + CHECK_THAT(err.str(), !Contains("ExtrasError")); + CHECK_THAT(err.str(), Contains("Thing")); + CHECK_THAT(err.str(), !Contains("Usage")); +} + +TEST_CASE_METHOD(CapturedHelp, "ReplacedError", "[help]") { app.failure_message(CLI::FailureMessage::help); - EXPECT_EQ(run(CLI::ExtrasError({"Thing"})), static_cast<int>(CLI::ExitCodes::ExtrasError)); - EXPECT_EQ(out.str(), ""); - EXPECT_THAT(err.str(), Not(HasSubstr("for more information"))); - EXPECT_THAT(err.str(), HasSubstr("ERROR: ExtrasError")); - EXPECT_THAT(err.str(), HasSubstr("Thing")); - EXPECT_THAT(err.str(), HasSubstr("Usage")); + CHECK(static_cast<int>(CLI::ExitCodes::ExtrasError) == run(CLI::ExtrasError({"Thing"}))); + CHECK("" == out.str()); + CHECK_THAT(err.str(), !Contains("for more information")); + CHECK_THAT(err.str(), Contains("ERROR: ExtrasError")); + CHECK_THAT(err.str(), Contains("Thing")); + CHECK_THAT(err.str(), Contains("Usage")); } // #87 -TEST(THelp, CustomDoubleOption) { +TEST_CASE("THelp: CustomDoubleOption", "[help]") { std::pair<int, double> custom_opt; @@ -885,74 +882,74 @@ TEST(THelp, CustomDoubleOption) { }); opt->type_name("INT FLOAT")->type_size(2); - EXPECT_THAT(app.help(), Not(HasSubstr("x 2"))); + CHECK_THAT(app.help(), !Contains("x 2")); } -TEST(THelp, CheckEmptyTypeName) { +TEST_CASE("THelp: CheckEmptyTypeName", "[help]") { CLI::App app; auto opt = app.add_flag("-f,--flag"); std::string name = opt->get_type_name(); - EXPECT_TRUE(name.empty()); + CHECK(name.empty()); } -TEST(THelp, AccessDescription) { +TEST_CASE("THelp: AccessDescription", "[help]") { CLI::App app{"My description goes here"}; - EXPECT_EQ(app.get_description(), "My description goes here"); + CHECK("My description goes here" == app.get_description()); } -TEST(THelp, SetDescriptionAfterCreation) { +TEST_CASE("THelp: SetDescriptionAfterCreation", "[help]") { CLI::App app{""}; app.description("My description goes here"); - EXPECT_EQ(app.get_description(), "My description goes here"); - EXPECT_THAT(app.help(), HasSubstr("My description goes here")); + CHECK("My description goes here" == app.get_description()); + CHECK_THAT(app.help(), Contains("My description goes here")); } -TEST(THelp, AccessOptionDescription) { +TEST_CASE("THelp: AccessOptionDescription", "[help]") { CLI::App app{}; int x{0}; auto opt = app.add_option("-a,--alpha", x, "My description goes here"); - EXPECT_EQ(opt->get_description(), "My description goes here"); + CHECK("My description goes here" == opt->get_description()); } -TEST(THelp, SetOptionDescriptionAfterCreation) { +TEST_CASE("THelp: SetOptionDescriptionAfterCreation", "[help]") { CLI::App app{}; int x{0}; auto opt = app.add_option("-a,--alpha", x); opt->description("My description goes here"); - EXPECT_EQ(opt->get_description(), "My description goes here"); - EXPECT_THAT(app.help(), HasSubstr("My description goes here")); + CHECK("My description goes here" == opt->get_description()); + CHECK_THAT(app.help(), Contains("My description goes here")); } -TEST(THelp, CleanNeeds) { +TEST_CASE("THelp: CleanNeeds", "[help]") { CLI::App app; int x{0}; auto a_name = app.add_option("-a,--alpha", x); app.add_option("-b,--boo", x)->needs(a_name); - EXPECT_THAT(app.help(), Not(HasSubstr("Requires"))); - EXPECT_THAT(app.help(), Not(HasSubstr("Needs: -a,--alpha"))); - EXPECT_THAT(app.help(), HasSubstr("Needs: --alpha")); + CHECK_THAT(app.help(), !Contains("Requires")); + CHECK_THAT(app.help(), !Contains("Needs: -a,--alpha")); + CHECK_THAT(app.help(), Contains("Needs: --alpha")); } -TEST(THelp, RequiredPrintout) { +TEST_CASE("THelp: RequiredPrintout", "[help]") { CLI::App app; int x{0}; app.add_option("-a,--alpha", x)->required(); - EXPECT_THAT(app.help(), HasSubstr(" REQUIRED")); + CHECK_THAT(app.help(), Contains(" REQUIRED")); } -TEST(THelp, GroupOrder) { +TEST_CASE("THelp: GroupOrder", "[help]") { CLI::App app; app.add_flag("--one")->group("zee"); @@ -963,12 +960,12 @@ TEST(THelp, GroupOrder) { auto zee_loc = help.find("zee"); auto aee_loc = help.find("aee"); - EXPECT_NE(zee_loc, std::string::npos); - EXPECT_NE(aee_loc, std::string::npos); - EXPECT_LT(zee_loc, aee_loc); + CHECK(std::string::npos != zee_loc); + CHECK(std::string::npos != aee_loc); + CHECK(aee_loc > zee_loc); } -TEST(THelp, ValidatorsText) { +TEST_CASE("THelp: ValidatorsText", "[help]") { CLI::App app; std::string filename; @@ -979,52 +976,52 @@ TEST(THelp, ValidatorsText) { app.add_option("--f4", y)->check(CLI::Range(12)); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("TEXT:FILE")); - EXPECT_THAT(help, HasSubstr("INT in [1 - 4]")); - EXPECT_THAT(help, HasSubstr("UINT:INT in [0 - 12]")); // Loses UINT + CHECK_THAT(help, Contains("TEXT:FILE")); + CHECK_THAT(help, Contains("INT in [1 - 4]")); + CHECK_THAT(help, Contains("UINT:INT in [0 - 12]")); } -TEST(THelp, ValidatorsTextCustom) { +TEST_CASE("THelp: ValidatorsTextCustom", "[help]") { CLI::App app; std::string filename; app.add_option("--f1", filename)->check(CLI::ExistingFile.description("Existing file")); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("Existing file")); + CHECK_THAT(help, Contains("Existing file")); } -TEST(THelp, ValidatorsNonPathText) { +TEST_CASE("THelp: ValidatorsNonPathText", "[help]") { CLI::App app; std::string filename; app.add_option("--f2", filename)->check(CLI::NonexistentPath); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("TEXT:PATH")); + CHECK_THAT(help, Contains("TEXT:PATH")); } -TEST(THelp, ValidatorsDirText) { +TEST_CASE("THelp: ValidatorsDirText", "[help]") { CLI::App app; std::string filename; app.add_option("--f2", filename)->check(CLI::ExistingDirectory); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("TEXT:DIR")); + CHECK_THAT(help, Contains("TEXT:DIR")); } -TEST(THelp, ValidatorsPathText) { +TEST_CASE("THelp: ValidatorsPathText", "[help]") { CLI::App app; std::string filename; app.add_option("--f2", filename)->check(CLI::ExistingPath); std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("TEXT:PATH")); + CHECK_THAT(help, Contains("TEXT:PATH")); } -TEST(THelp, CombinedValidatorsText) { +TEST_CASE("THelp: CombinedValidatorsText", "[help]") { CLI::App app; std::string filename; @@ -1034,12 +1031,12 @@ TEST(THelp, CombinedValidatorsText) { // Can't programmatically tell! // (Users can use ExistingPath, by the way) std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("TEXT:(FILE) OR (DIR)")); - EXPECT_THAT(help, Not(HasSubstr("PATH"))); + CHECK_THAT(help, Contains("TEXT:(FILE) OR (DIR)")); + CHECK_THAT(help, !Contains("PATH")); } // Don't do this in real life, please -TEST(THelp, CombinedValidatorsPathyText) { +TEST_CASE("THelp: CombinedValidatorsPathyText", "[help]") { CLI::App app; std::string filename; @@ -1047,12 +1044,12 @@ TEST(THelp, CombinedValidatorsPathyText) { // Combining validators with the same type string is OK std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("TEXT:")); - EXPECT_THAT(help, HasSubstr("PATH")); + CHECK_THAT(help, Contains("TEXT:")); + CHECK_THAT(help, Contains("PATH")); } // Don't do this in real life, please (and transform does nothing here) -TEST(THelp, CombinedValidatorsPathyTextAsTransform) { +TEST_CASE("THelp: CombinedValidatorsPathyTextAsTransform", "[help]") { CLI::App app; std::string filename; @@ -1060,11 +1057,11 @@ TEST(THelp, CombinedValidatorsPathyTextAsTransform) { // Combining validators with the same type string is OK std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("TEXT:(PATH(existing)) OR (PATH")); + CHECK_THAT(help, Contains("TEXT:(PATH(existing)) OR (PATH")); } // #113 Part 2 -TEST(THelp, ChangingSet) { +TEST_CASE("THelp: ChangingSet", "[help]") { CLI::App app; std::set<int> vals{1, 2, 3}; @@ -1073,19 +1070,19 @@ TEST(THelp, ChangingSet) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, Not(HasSubstr("4"))); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, !Contains("4")); vals.insert(4); vals.erase(1); help = app.help(); - EXPECT_THAT(help, Not(HasSubstr("1"))); - EXPECT_THAT(help, HasSubstr("4")); + CHECK_THAT(help, !Contains("1")); + CHECK_THAT(help, Contains("4")); } -TEST(THelp, ChangingSetDefaulted) { +TEST_CASE("THelp: ChangingSetDefaulted", "[help]") { CLI::App app; std::set<int> vals{1, 2, 3}; @@ -1094,19 +1091,19 @@ TEST(THelp, ChangingSetDefaulted) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, Not(HasSubstr("4"))); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, !Contains("4")); vals.insert(4); vals.erase(1); help = app.help(); - EXPECT_THAT(help, Not(HasSubstr("1"))); - EXPECT_THAT(help, HasSubstr("4")); + CHECK_THAT(help, !Contains("1")); + CHECK_THAT(help, Contains("4")); } -TEST(THelp, ChangingCaselessSet) { +TEST_CASE("THelp: ChangingCaselessSet", "[help]") { CLI::App app; std::set<std::string> vals{"1", "2", "3"}; @@ -1115,19 +1112,19 @@ TEST(THelp, ChangingCaselessSet) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, Not(HasSubstr("4"))); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, !Contains("4")); vals.insert("4"); vals.erase("1"); help = app.help(); - EXPECT_THAT(help, Not(HasSubstr("1"))); - EXPECT_THAT(help, HasSubstr("4")); + CHECK_THAT(help, !Contains("1")); + CHECK_THAT(help, Contains("4")); } -TEST(THelp, ChangingCaselessSetDefaulted) { +TEST_CASE("THelp: ChangingCaselessSetDefaulted", "[help]") { CLI::App app; app.option_defaults()->always_capture_default(); @@ -1137,21 +1134,21 @@ TEST(THelp, ChangingCaselessSetDefaulted) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, Not(HasSubstr("4"))); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, !Contains("4")); vals.insert("4"); vals.erase("1"); help = app.help(); - EXPECT_THAT(help, Not(HasSubstr("1"))); - EXPECT_THAT(help, HasSubstr("4")); + CHECK_THAT(help, !Contains("1")); + CHECK_THAT(help, Contains("4")); } // New defaults tests (1.8) -TEST(THelp, ChangingDefaults) { +TEST_CASE("THelp: ChangingDefaults", "[help]") { CLI::App app; @@ -1164,10 +1161,10 @@ TEST(THelp, ChangingDefaults) { x = {5, 6}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("INT=[3,4] ...")); + CHECK_THAT(help, Contains("INT=[3,4] ...")); } -TEST(THelp, ChangingDefaultsWithAutoCapture) { +TEST_CASE("THelp: ChangingDefaultsWithAutoCapture", "[help]") { CLI::App app; app.option_defaults()->always_capture_default(); @@ -1178,10 +1175,10 @@ TEST(THelp, ChangingDefaultsWithAutoCapture) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("INT=[1,2] ...")); + CHECK_THAT(help, Contains("INT=[1,2] ...")); } -TEST(THelp, FunctionDefaultString) { +TEST_CASE("THelp: FunctionDefaultString", "[help]") { CLI::App app; @@ -1193,44 +1190,44 @@ TEST(THelp, FunctionDefaultString) { std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("INT=Powerful")); + CHECK_THAT(help, Contains("INT=Powerful")); } -TEST(TVersion, simple_flag) { +TEST_CASE("TVersion: simple_flag", "[help]") { CLI::App app; app.set_version_flag("-v,--version", "VERSION " CLI11_VERSION); auto vers = app.version(); - EXPECT_THAT(vers, HasSubstr("VERSION")); + CHECK_THAT(vers, Contains("VERSION")); app.set_version_flag(); - EXPECT_TRUE(app.version().empty()); + CHECK(app.version().empty()); } -TEST(TVersion, callback_flag) { +TEST_CASE("TVersion: callback_flag", "[help]") { CLI::App app; app.set_version_flag("-v,--version", []() { return std::string("VERSION " CLI11_VERSION); }); auto vers = app.version(); - EXPECT_THAT(vers, HasSubstr("VERSION")); + CHECK_THAT(vers, Contains("VERSION")); app.set_version_flag("-v", []() { return std::string("VERSION2 " CLI11_VERSION); }); vers = app.version(); - EXPECT_THAT(vers, HasSubstr("VERSION")); + CHECK_THAT(vers, Contains("VERSION")); } -TEST(TVersion, parse_throw) { +TEST_CASE("TVersion: parse_throw", "[help]") { CLI::App app; app.set_version_flag("--version", CLI11_VERSION); - EXPECT_THROW(app.parse("--version"), CLI::CallForVersion); - EXPECT_THROW(app.parse("--version --arg2 5"), CLI::CallForVersion); + CHECK_THROWS_AS(app.parse("--version"), CLI::CallForVersion); + CHECK_THROWS_AS(app.parse("--version --arg2 5"), CLI::CallForVersion); auto ptr = app.get_version_ptr(); @@ -1238,10 +1235,10 @@ TEST(TVersion, parse_throw) { try { app.parse("--Version"); } catch(const CLI::CallForVersion &v) { - EXPECT_STREQ(v.what(), CLI11_VERSION); - EXPECT_EQ(v.get_exit_code(), 0); + CHECK_THAT(CLI11_VERSION, Catch::Equals(v.what())); + CHECK(0 == v.get_exit_code()); const auto &appc = app; auto cptr = appc.get_version_ptr(); - EXPECT_EQ(cptr->count(), 1U); + CHECK(1U == cptr->count()); } } diff --git a/packages/CLI11/tests/HelpersTest.cpp b/packages/CLI11/tests/HelpersTest.cpp index 993b8a9f436e3916c3d37df22bd43bd904eedac9..0c032fbce389899324025f4c8759e328e5ffaea2 100644 --- a/packages/CLI11/tests/HelpersTest.cpp +++ b/packages/CLI11/tests/HelpersTest.cpp @@ -8,11 +8,11 @@ #include <array> #include <atomic> -#include <climits> #include <complex> #include <cstdint> #include <cstdio> #include <fstream> +#include <limits> #include <map> #include <string> #include <tuple> @@ -25,146 +25,146 @@ class Streamable {}; std::ostream &operator<<(std::ostream &out, const Streamable &) { return out << "Streamable"; } -TEST(TypeTools, Streaming) { +TEST_CASE("TypeTools: Streaming", "[helpers]") { - EXPECT_EQ(CLI::detail::to_string(NotStreamable{}), ""); + CHECK("" == CLI::detail::to_string(NotStreamable{})); - EXPECT_EQ(CLI::detail::to_string(Streamable{}), "Streamable"); + CHECK("Streamable" == CLI::detail::to_string(Streamable{})); - EXPECT_EQ(CLI::detail::to_string(5), "5"); + CHECK("5" == CLI::detail::to_string(5)); - EXPECT_EQ(CLI::detail::to_string("string"), std::string("string")); - EXPECT_EQ(CLI::detail::to_string(std::string("string")), std::string("string")); + CHECK(std::string("string") == CLI::detail::to_string("string")); + CHECK(std::string("string") == CLI::detail::to_string(std::string("string"))); } -TEST(TypeTools, tuple) { - EXPECT_FALSE(CLI::detail::is_tuple_like<int>::value); - EXPECT_FALSE(CLI::detail::is_tuple_like<std::vector<double>>::value); +TEST_CASE("TypeTools: tuple", "[helpers]") { + CHECK_FALSE(CLI::detail::is_tuple_like<int>::value); + CHECK_FALSE(CLI::detail::is_tuple_like<std::vector<double>>::value); auto v = CLI::detail::is_tuple_like<std::tuple<double, int>>::value; - EXPECT_TRUE(v); + CHECK(v); v = CLI::detail::is_tuple_like<std::tuple<double, double, double>>::value; - EXPECT_TRUE(v); + CHECK(v); } -TEST(TypeTools, type_size) { +TEST_CASE("TypeTools: type_size", "[helpers]") { auto V = CLI::detail::type_count<int>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::type_count<void>::value; - EXPECT_EQ(V, 0); + CHECK(0 == V); V = CLI::detail::type_count<std::vector<double>>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::type_count<std::tuple<double, int>>::value; - EXPECT_EQ(V, 2); + CHECK(2 == V); V = CLI::detail::type_count<std::tuple<std::string, double, int>>::value; - EXPECT_EQ(V, 3); + CHECK(3 == V); V = CLI::detail::type_count<std::array<std::string, 5>>::value; - EXPECT_EQ(V, 5); + CHECK(5 == V); V = CLI::detail::type_count<std::vector<std::pair<std::string, double>>>::value; - EXPECT_EQ(V, 2); + CHECK(2 == V); V = CLI::detail::type_count<std::tuple<std::pair<std::string, double>>>::value; - EXPECT_EQ(V, 2); + CHECK(2 == V); V = CLI::detail::type_count<std::tuple<int, std::pair<std::string, double>>>::value; - EXPECT_EQ(V, 3); + CHECK(3 == V); V = CLI::detail::type_count<std::tuple<std::pair<int, double>, std::pair<std::string, double>>>::value; - EXPECT_EQ(V, 4); + CHECK(4 == V); // maps V = CLI::detail::type_count<std::map<int, std::pair<int, double>>>::value; - EXPECT_EQ(V, 3); + CHECK(3 == V); // three level tuples V = CLI::detail::type_count<std::tuple<int, std::pair<int, std::tuple<int, double, std::string>>>>::value; - EXPECT_EQ(V, 5); + CHECK(5 == V); V = CLI::detail::type_count<std::pair<int, std::vector<int>>>::value; - EXPECT_GE(V, CLI::detail::expected_max_vector_size); + CHECK(CLI::detail::expected_max_vector_size <= V); V = CLI::detail::type_count<std::vector<std::vector<int>>>::value; - EXPECT_EQ(V, CLI::detail::expected_max_vector_size); + CHECK(CLI::detail::expected_max_vector_size == V); } -TEST(TypeTools, type_size_min) { +TEST_CASE("TypeTools: type_size_min", "[helpers]") { auto V = CLI::detail::type_count_min<int>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::type_count_min<void>::value; - EXPECT_EQ(V, 0); + CHECK(0 == V); V = CLI::detail::type_count_min<std::vector<double>>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::type_count_min<std::tuple<double, int>>::value; - EXPECT_EQ(V, 2); + CHECK(2 == V); V = CLI::detail::type_count_min<std::tuple<std::string, double, int>>::value; - EXPECT_EQ(V, 3); + CHECK(3 == V); V = CLI::detail::type_count_min<std::array<std::string, 5>>::value; - EXPECT_EQ(V, 5); + CHECK(5 == V); V = CLI::detail::type_count_min<std::vector<std::pair<std::string, double>>>::value; - EXPECT_EQ(V, 2); + CHECK(2 == V); V = CLI::detail::type_count_min<std::tuple<std::pair<std::string, double>>>::value; - EXPECT_EQ(V, 2); + CHECK(2 == V); V = CLI::detail::type_count_min<std::tuple<int, std::pair<std::string, double>>>::value; - EXPECT_EQ(V, 3); + CHECK(3 == V); V = CLI::detail::type_count_min<std::tuple<std::pair<int, double>, std::pair<std::string, double>>>::value; - EXPECT_EQ(V, 4); + CHECK(4 == V); // maps V = CLI::detail::type_count_min<std::map<int, std::pair<int, double>>>::value; - EXPECT_EQ(V, 3); + CHECK(3 == V); // three level tuples V = CLI::detail::type_count_min<std::tuple<int, std::pair<int, std::tuple<int, double, std::string>>>>::value; - EXPECT_EQ(V, 5); + CHECK(5 == V); V = CLI::detail::type_count_min<std::pair<int, std::vector<int>>>::value; - EXPECT_EQ(V, 2); + CHECK(2 == V); V = CLI::detail::type_count_min<std::vector<std::vector<int>>>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::type_count_min<std::vector<std::vector<std::pair<int, int>>>>::value; - EXPECT_EQ(V, 2); + CHECK(2 == V); } -TEST(TypeTools, expected_count) { +TEST_CASE("TypeTools: expected_count", "[helpers]") { auto V = CLI::detail::expected_count<int>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::expected_count<void>::value; - EXPECT_EQ(V, 0); + CHECK(0 == V); V = CLI::detail::expected_count<std::vector<double>>::value; - EXPECT_EQ(V, CLI::detail::expected_max_vector_size); + CHECK(CLI::detail::expected_max_vector_size == V); V = CLI::detail::expected_count<std::tuple<double, int>>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::expected_count<std::tuple<std::string, double, int>>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::expected_count<std::array<std::string, 5>>::value; - EXPECT_EQ(V, 1); + CHECK(1 == V); V = CLI::detail::expected_count<std::vector<std::pair<std::string, double>>>::value; - EXPECT_EQ(V, CLI::detail::expected_max_vector_size); + CHECK(CLI::detail::expected_max_vector_size == V); } -TEST(Split, SimpleByToken) { +TEST_CASE("Split: SimpleByToken", "[helpers]") { auto out = CLI::detail::split("one.two.three", '.'); - ASSERT_EQ(3u, out.size()); - EXPECT_EQ("one", out.at(0)); - EXPECT_EQ("two", out.at(1)); - EXPECT_EQ("three", out.at(2)); + REQUIRE(out.size() == 3u); + CHECK(out.at(0) == "one"); + CHECK(out.at(1) == "two"); + CHECK(out.at(2) == "three"); } -TEST(Split, Single) { +TEST_CASE("Split: Single", "[helpers]") { auto out = CLI::detail::split("one", '.'); - ASSERT_EQ(1u, out.size()); - EXPECT_EQ("one", out.at(0)); + REQUIRE(out.size() == 1u); + CHECK(out.at(0) == "one"); } -TEST(Split, Empty) { +TEST_CASE("Split: Empty", "[helpers]") { auto out = CLI::detail::split("", '.'); - ASSERT_EQ(1u, out.size()); - EXPECT_EQ("", out.at(0)); + REQUIRE(out.size() == 1u); + CHECK(out.at(0) == ""); } -TEST(String, InvalidName) { - EXPECT_TRUE(CLI::detail::valid_name_string("valid")); - EXPECT_FALSE(CLI::detail::valid_name_string("-invalid")); - EXPECT_TRUE(CLI::detail::valid_name_string("va-li-d")); - EXPECT_FALSE(CLI::detail::valid_name_string("vali&d")); - EXPECT_TRUE(CLI::detail::valid_name_string("_valid")); - EXPECT_FALSE(CLI::detail::valid_name_string("/valid")); - EXPECT_TRUE(CLI::detail::valid_name_string("vali?d")); - EXPECT_TRUE(CLI::detail::valid_name_string("@@@@")); - EXPECT_TRUE(CLI::detail::valid_name_string("b@d2?")); - EXPECT_TRUE(CLI::detail::valid_name_string("2vali?d")); +TEST_CASE("String: InvalidName", "[helpers]") { + CHECK(CLI::detail::valid_name_string("valid")); + CHECK_FALSE(CLI::detail::valid_name_string("-invalid")); + CHECK(CLI::detail::valid_name_string("va-li-d")); + CHECK_FALSE(CLI::detail::valid_name_string("vali&d")); + CHECK(CLI::detail::valid_name_string("_valid")); + CHECK_FALSE(CLI::detail::valid_name_string("/valid")); + CHECK(CLI::detail::valid_name_string("vali?d")); + CHECK(CLI::detail::valid_name_string("@@@@")); + CHECK(CLI::detail::valid_name_string("b@d2?")); + CHECK(CLI::detail::valid_name_string("2vali?d")); } -TEST(StringTools, Modify) { +TEST_CASE("StringTools: Modify", "[helpers]") { int cnt{0}; std::string newString = CLI::detail::find_and_modify("======", "=", [&cnt](std::string &str, std::size_t index) { if((++cnt) % 2 == 0) { @@ -172,10 +172,10 @@ TEST(StringTools, Modify) { } return index + 1; }); - EXPECT_EQ(newString, "=:=:=:"); + CHECK("=:=:=:" == newString); } -TEST(StringTools, Modify2) { +TEST_CASE("StringTools: Modify2", "[helpers]") { std::string newString = CLI::detail::find_and_modify("this is a string test", "is", [](std::string &str, std::size_t index) { if((index > 1) && (str[index - 1] != ' ')) { @@ -184,296 +184,296 @@ TEST(StringTools, Modify2) { } return index + 1; }); - EXPECT_EQ(newString, "that is a string test"); + CHECK("that is a string test" == newString); } -TEST(StringTools, Modify3) { +TEST_CASE("StringTools: Modify3", "[helpers]") { // this picks up 3 sets of 3 after the 'b' then collapses the new first set std::string newString = CLI::detail::find_and_modify("baaaaaaaaaa", "aaa", [](std::string &str, std::size_t index) { str.erase(index, 3); str.insert(str.begin(), 'a'); return 0u; }); - EXPECT_EQ(newString, "aba"); -} - -TEST(StringTools, flagValues) { - EXPECT_EQ(CLI::detail::to_flag_value("0"), -1); - EXPECT_EQ(CLI::detail::to_flag_value("t"), 1); - EXPECT_EQ(CLI::detail::to_flag_value("1"), 1); - EXPECT_EQ(CLI::detail::to_flag_value("6"), 6); - EXPECT_EQ(CLI::detail::to_flag_value("-6"), -6); - EXPECT_EQ(CLI::detail::to_flag_value("false"), -1); - EXPECT_EQ(CLI::detail::to_flag_value("YES"), 1); - EXPECT_THROW(CLI::detail::to_flag_value("frog"), std::invalid_argument); - EXPECT_THROW(CLI::detail::to_flag_value("q"), std::invalid_argument); - EXPECT_EQ(CLI::detail::to_flag_value("NO"), -1); - EXPECT_EQ(CLI::detail::to_flag_value("475555233"), 475555233); -} - -TEST(StringTools, Validation) { - EXPECT_TRUE(CLI::detail::isalpha("")); - EXPECT_TRUE(CLI::detail::isalpha("a")); - EXPECT_TRUE(CLI::detail::isalpha("abcd")); - EXPECT_FALSE(CLI::detail::isalpha("_")); - EXPECT_FALSE(CLI::detail::isalpha("2")); - EXPECT_FALSE(CLI::detail::isalpha("test test")); - EXPECT_FALSE(CLI::detail::isalpha("test ")); - EXPECT_FALSE(CLI::detail::isalpha(" test")); - EXPECT_FALSE(CLI::detail::isalpha("test2")); -} - -TEST(Trim, Various) { + CHECK("aba" == newString); +} + +TEST_CASE("StringTools: flagValues", "[helpers]") { + CHECK(-1 == CLI::detail::to_flag_value("0")); + CHECK(1 == CLI::detail::to_flag_value("t")); + CHECK(1 == CLI::detail::to_flag_value("1")); + CHECK(6 == CLI::detail::to_flag_value("6")); + CHECK(-6 == CLI::detail::to_flag_value("-6")); + CHECK(-1 == CLI::detail::to_flag_value("false")); + CHECK(1 == CLI::detail::to_flag_value("YES")); + CHECK_THROWS_AS(CLI::detail::to_flag_value("frog"), std::invalid_argument); + CHECK_THROWS_AS(CLI::detail::to_flag_value("q"), std::invalid_argument); + CHECK(-1 == CLI::detail::to_flag_value("NO")); + CHECK(475555233 == CLI::detail::to_flag_value("475555233")); +} + +TEST_CASE("StringTools: Validation", "[helpers]") { + CHECK(CLI::detail::isalpha("")); + CHECK(CLI::detail::isalpha("a")); + CHECK(CLI::detail::isalpha("abcd")); + CHECK_FALSE(CLI::detail::isalpha("_")); + CHECK_FALSE(CLI::detail::isalpha("2")); + CHECK_FALSE(CLI::detail::isalpha("test test")); + CHECK_FALSE(CLI::detail::isalpha("test ")); + CHECK_FALSE(CLI::detail::isalpha(" test")); + CHECK_FALSE(CLI::detail::isalpha("test2")); +} + +TEST_CASE("Trim: Various", "[helpers]") { std::string s1{" sdlfkj sdflk sd s "}; std::string a1{"sdlfkj sdflk sd s"}; CLI::detail::trim(s1); - EXPECT_EQ(a1, s1); + CHECK(s1 == a1); std::string s2{" a \t"}; CLI::detail::trim(s2); - EXPECT_EQ("a", s2); + CHECK(s2 == "a"); std::string s3{" a \n"}; CLI::detail::trim(s3); - EXPECT_EQ("a", s3); + CHECK(s3 == "a"); std::string s4{" a b "}; - EXPECT_EQ("a b", CLI::detail::trim(s4)); + CHECK(CLI::detail::trim(s4) == "a b"); } -TEST(Trim, VariousFilters) { +TEST_CASE("Trim: VariousFilters", "[helpers]") { std::string s1{" sdlfkj sdflk sd s "}; std::string a1{"sdlfkj sdflk sd s"}; CLI::detail::trim(s1, " "); - EXPECT_EQ(a1, s1); + CHECK(s1 == a1); std::string s2{" a \t"}; CLI::detail::trim(s2, " "); - EXPECT_EQ("a \t", s2); + CHECK(s2 == "a \t"); std::string s3{"abdavda"}; CLI::detail::trim(s3, "a"); - EXPECT_EQ("bdavd", s3); + CHECK(s3 == "bdavd"); std::string s4{"abcabcabc"}; - EXPECT_EQ("cabcabc", CLI::detail::trim(s4, "ab")); + CHECK(CLI::detail::trim(s4, "ab") == "cabcabc"); } -TEST(Trim, TrimCopy) { +TEST_CASE("Trim: TrimCopy", "[helpers]") { std::string orig{" cabc "}; std::string trimmed = CLI::detail::trim_copy(orig); - EXPECT_EQ("cabc", trimmed); - EXPECT_NE(orig, trimmed); + CHECK(trimmed == "cabc"); + CHECK(trimmed != orig); CLI::detail::trim(orig); - EXPECT_EQ(trimmed, orig); + CHECK(orig == trimmed); orig = "abcabcabc"; trimmed = CLI::detail::trim_copy(orig, "ab"); - EXPECT_EQ("cabcabc", trimmed); - EXPECT_NE(orig, trimmed); + CHECK(trimmed == "cabcabc"); + CHECK(trimmed != orig); CLI::detail::trim(orig, "ab"); - EXPECT_EQ(trimmed, orig); + CHECK(orig == trimmed); } -TEST(Validators, FileExists) { +TEST_CASE("Validators: FileExists", "[helpers]") { std::string myfile{"TestFileNotUsed.txt"}; - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK_FALSE(CLI::ExistingFile(myfile).empty()); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); - EXPECT_TRUE(CLI::ExistingFile(myfile).empty()); + CHECK(ok); + CHECK(CLI::ExistingFile(myfile).empty()); std::remove(myfile.c_str()); - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK_FALSE(CLI::ExistingFile(myfile).empty()); } -TEST(Validators, FileNotExists) { +TEST_CASE("Validators: FileNotExists", "[helpers]") { std::string myfile{"TestFileNotUsed.txt"}; - EXPECT_TRUE(CLI::NonexistentPath(myfile).empty()); + CHECK(CLI::NonexistentPath(myfile).empty()); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); - EXPECT_FALSE(CLI::NonexistentPath(myfile).empty()); + CHECK(ok); + CHECK_FALSE(CLI::NonexistentPath(myfile).empty()); std::remove(myfile.c_str()); - EXPECT_TRUE(CLI::NonexistentPath(myfile).empty()); + CHECK(CLI::NonexistentPath(myfile).empty()); } -TEST(Validators, FileIsDir) { +TEST_CASE("Validators: FileIsDir", "[helpers]") { std::string mydir{"../tests"}; - EXPECT_NE(CLI::ExistingFile(mydir), ""); + CHECK("" != CLI::ExistingFile(mydir)); } -TEST(Validators, DirectoryExists) { +TEST_CASE("Validators: DirectoryExists", "[helpers]") { std::string mydir{"../tests"}; - EXPECT_EQ(CLI::ExistingDirectory(mydir), ""); + CHECK("" == CLI::ExistingDirectory(mydir)); } -TEST(Validators, DirectoryNotExists) { +TEST_CASE("Validators: DirectoryNotExists", "[helpers]") { std::string mydir{"nondirectory"}; - EXPECT_NE(CLI::ExistingDirectory(mydir), ""); + CHECK("" != CLI::ExistingDirectory(mydir)); } -TEST(Validators, DirectoryIsFile) { +TEST_CASE("Validators: DirectoryIsFile", "[helpers]") { std::string myfile{"TestFileNotUsed.txt"}; - EXPECT_TRUE(CLI::NonexistentPath(myfile).empty()); + CHECK(CLI::NonexistentPath(myfile).empty()); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); - EXPECT_FALSE(CLI::ExistingDirectory(myfile).empty()); + CHECK(ok); + CHECK_FALSE(CLI::ExistingDirectory(myfile).empty()); std::remove(myfile.c_str()); - EXPECT_TRUE(CLI::NonexistentPath(myfile).empty()); + CHECK(CLI::NonexistentPath(myfile).empty()); } -TEST(Validators, PathExistsDir) { +TEST_CASE("Validators: PathExistsDir", "[helpers]") { std::string mydir{"../tests"}; - EXPECT_EQ(CLI::ExistingPath(mydir), ""); + CHECK("" == CLI::ExistingPath(mydir)); } -TEST(Validators, PathExistsFile) { +TEST_CASE("Validators: PathExistsFile", "[helpers]") { std::string myfile{"TestFileNotUsed.txt"}; - EXPECT_FALSE(CLI::ExistingPath(myfile).empty()); + CHECK_FALSE(CLI::ExistingPath(myfile).empty()); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); - EXPECT_TRUE(CLI::ExistingPath(myfile).empty()); + CHECK(ok); + CHECK(CLI::ExistingPath(myfile).empty()); std::remove(myfile.c_str()); - EXPECT_FALSE(CLI::ExistingPath(myfile).empty()); + CHECK_FALSE(CLI::ExistingPath(myfile).empty()); } -TEST(Validators, PathNotExistsDir) { +TEST_CASE("Validators: PathNotExistsDir", "[helpers]") { std::string mydir{"nonpath"}; - EXPECT_NE(CLI::ExistingPath(mydir), ""); + CHECK("" != CLI::ExistingPath(mydir)); } -TEST(Validators, IPValidate1) { +TEST_CASE("Validators: IPValidate1", "[helpers]") { std::string ip = "1.1.1.1"; - EXPECT_TRUE(CLI::ValidIPV4(ip).empty()); + CHECK(CLI::ValidIPV4(ip).empty()); ip = "224.255.0.1"; - EXPECT_TRUE(CLI::ValidIPV4(ip).empty()); + CHECK(CLI::ValidIPV4(ip).empty()); ip = "-1.255.0.1"; - EXPECT_FALSE(CLI::ValidIPV4(ip).empty()); + CHECK_FALSE(CLI::ValidIPV4(ip).empty()); ip = "1.256.0.1"; - EXPECT_FALSE(CLI::ValidIPV4(ip).empty()); + CHECK_FALSE(CLI::ValidIPV4(ip).empty()); ip = "1.256.0.1"; - EXPECT_FALSE(CLI::ValidIPV4(ip).empty()); + CHECK_FALSE(CLI::ValidIPV4(ip).empty()); ip = "aaa"; - EXPECT_FALSE(CLI::ValidIPV4(ip).empty()); + CHECK_FALSE(CLI::ValidIPV4(ip).empty()); ip = "1.2.3.abc"; - EXPECT_FALSE(CLI::ValidIPV4(ip).empty()); + CHECK_FALSE(CLI::ValidIPV4(ip).empty()); ip = "11.22"; - EXPECT_FALSE(CLI::ValidIPV4(ip).empty()); + CHECK_FALSE(CLI::ValidIPV4(ip).empty()); } -TEST(Validators, PositiveValidator) { +TEST_CASE("Validators: PositiveValidator", "[helpers]") { std::string num = "1.1.1.1"; - EXPECT_FALSE(CLI::PositiveNumber(num).empty()); + CHECK_FALSE(CLI::PositiveNumber(num).empty()); num = "1"; - EXPECT_TRUE(CLI::PositiveNumber(num).empty()); + CHECK(CLI::PositiveNumber(num).empty()); num = "10000"; - EXPECT_TRUE(CLI::PositiveNumber(num).empty()); + CHECK(CLI::PositiveNumber(num).empty()); num = "0"; - EXPECT_FALSE(CLI::PositiveNumber(num).empty()); + CHECK_FALSE(CLI::PositiveNumber(num).empty()); num = "+0.5"; - EXPECT_TRUE(CLI::PositiveNumber(num).empty()); + CHECK(CLI::PositiveNumber(num).empty()); num = "-1"; - EXPECT_FALSE(CLI::PositiveNumber(num).empty()); + CHECK_FALSE(CLI::PositiveNumber(num).empty()); num = "-1.5"; - EXPECT_FALSE(CLI::PositiveNumber(num).empty()); + CHECK_FALSE(CLI::PositiveNumber(num).empty()); num = "a"; - EXPECT_FALSE(CLI::PositiveNumber(num).empty()); + CHECK_FALSE(CLI::PositiveNumber(num).empty()); } -TEST(Validators, NonNegativeValidator) { +TEST_CASE("Validators: NonNegativeValidator", "[helpers]") { std::string num = "1.1.1.1"; - EXPECT_FALSE(CLI::NonNegativeNumber(num).empty()); + CHECK_FALSE(CLI::NonNegativeNumber(num).empty()); num = "1"; - EXPECT_TRUE(CLI::NonNegativeNumber(num).empty()); + CHECK(CLI::NonNegativeNumber(num).empty()); num = "10000"; - EXPECT_TRUE(CLI::NonNegativeNumber(num).empty()); + CHECK(CLI::NonNegativeNumber(num).empty()); num = "0"; - EXPECT_TRUE(CLI::NonNegativeNumber(num).empty()); + CHECK(CLI::NonNegativeNumber(num).empty()); num = "+0.5"; - EXPECT_TRUE(CLI::NonNegativeNumber(num).empty()); + CHECK(CLI::NonNegativeNumber(num).empty()); num = "-1"; - EXPECT_FALSE(CLI::NonNegativeNumber(num).empty()); + CHECK_FALSE(CLI::NonNegativeNumber(num).empty()); num = "-1.5"; - EXPECT_FALSE(CLI::NonNegativeNumber(num).empty()); + CHECK_FALSE(CLI::NonNegativeNumber(num).empty()); num = "a"; - EXPECT_FALSE(CLI::NonNegativeNumber(num).empty()); + CHECK_FALSE(CLI::NonNegativeNumber(num).empty()); } -TEST(Validators, NumberValidator) { +TEST_CASE("Validators: NumberValidator", "[helpers]") { std::string num = "1.1.1.1"; - EXPECT_FALSE(CLI::Number(num).empty()); + CHECK_FALSE(CLI::Number(num).empty()); num = "1.7"; - EXPECT_TRUE(CLI::Number(num).empty()); + CHECK(CLI::Number(num).empty()); num = "10000"; - EXPECT_TRUE(CLI::Number(num).empty()); + CHECK(CLI::Number(num).empty()); num = "-0.000"; - EXPECT_TRUE(CLI::Number(num).empty()); + CHECK(CLI::Number(num).empty()); num = "+1.55"; - EXPECT_TRUE(CLI::Number(num).empty()); + CHECK(CLI::Number(num).empty()); num = "a"; - EXPECT_FALSE(CLI::Number(num).empty()); + CHECK_FALSE(CLI::Number(num).empty()); } -TEST(Validators, CombinedAndRange) { +TEST_CASE("Validators: CombinedAndRange", "[helpers]") { auto crange = CLI::Range(0, 12) & CLI::Range(4, 16); - EXPECT_TRUE(crange("4").empty()); - EXPECT_TRUE(crange("12").empty()); - EXPECT_TRUE(crange("7").empty()); + CHECK(crange("4").empty()); + CHECK(crange("12").empty()); + CHECK(crange("7").empty()); - EXPECT_FALSE(crange("-2").empty()); - EXPECT_FALSE(crange("2").empty()); - EXPECT_FALSE(crange("15").empty()); - EXPECT_FALSE(crange("16").empty()); - EXPECT_FALSE(crange("18").empty()); + CHECK_FALSE(crange("-2").empty()); + CHECK_FALSE(crange("2").empty()); + CHECK_FALSE(crange("15").empty()); + CHECK_FALSE(crange("16").empty()); + CHECK_FALSE(crange("18").empty()); } -TEST(Validators, CombinedOrRange) { +TEST_CASE("Validators: CombinedOrRange", "[helpers]") { auto crange = CLI::Range(0, 4) | CLI::Range(8, 12); - EXPECT_FALSE(crange("-2").empty()); - EXPECT_TRUE(crange("2").empty()); - EXPECT_FALSE(crange("5").empty()); - EXPECT_TRUE(crange("8").empty()); - EXPECT_TRUE(crange("12").empty()); - EXPECT_FALSE(crange("16").empty()); + CHECK_FALSE(crange("-2").empty()); + CHECK(crange("2").empty()); + CHECK_FALSE(crange("5").empty()); + CHECK(crange("8").empty()); + CHECK(crange("12").empty()); + CHECK_FALSE(crange("16").empty()); } -TEST(Validators, CombinedPaths) { +TEST_CASE("Validators: CombinedPaths", "[helpers]") { std::string myfile{"TestFileNotUsed.txt"}; - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK_FALSE(CLI::ExistingFile(myfile).empty()); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); + CHECK(ok); std::string dir{"../tests"}; std::string notpath{"nondirectory"}; auto path_or_dir = CLI::ExistingPath | CLI::ExistingDirectory; - EXPECT_TRUE(path_or_dir(dir).empty()); - EXPECT_TRUE(path_or_dir(myfile).empty()); - EXPECT_FALSE(path_or_dir(notpath).empty()); + CHECK(path_or_dir(dir).empty()); + CHECK(path_or_dir(myfile).empty()); + CHECK_FALSE(path_or_dir(notpath).empty()); auto file_or_dir = CLI::ExistingFile | CLI::ExistingDirectory; - EXPECT_TRUE(file_or_dir(dir).empty()); - EXPECT_TRUE(file_or_dir(myfile).empty()); - EXPECT_FALSE(file_or_dir(notpath).empty()); + CHECK(file_or_dir(dir).empty()); + CHECK(file_or_dir(myfile).empty()); + CHECK_FALSE(file_or_dir(notpath).empty()); auto path_and_dir = CLI::ExistingPath & CLI::ExistingDirectory; - EXPECT_TRUE(path_and_dir(dir).empty()); - EXPECT_FALSE(path_and_dir(myfile).empty()); - EXPECT_FALSE(path_and_dir(notpath).empty()); + CHECK(path_and_dir(dir).empty()); + CHECK_FALSE(path_and_dir(myfile).empty()); + CHECK_FALSE(path_and_dir(notpath).empty()); auto path_and_file = CLI::ExistingFile & CLI::ExistingDirectory; - EXPECT_FALSE(path_and_file(dir).empty()); - EXPECT_FALSE(path_and_file(myfile).empty()); - EXPECT_FALSE(path_and_file(notpath).empty()); + CHECK_FALSE(path_and_file(dir).empty()); + CHECK_FALSE(path_and_file(myfile).empty()); + CHECK_FALSE(path_and_file(notpath).empty()); std::remove(myfile.c_str()); - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK_FALSE(CLI::ExistingFile(myfile).empty()); } -TEST(Validators, ProgramNameSplit) { +TEST_CASE("Validators: ProgramNameSplit", "[helpers]") { TempFile myfile{"program_name1.exe"}; { std::ofstream out{myfile}; @@ -481,8 +481,8 @@ TEST(Validators, ProgramNameSplit) { } auto res = CLI::detail::split_program_name(std::string("./") + std::string(myfile) + " this is a bunch of extra stuff "); - EXPECT_EQ(res.first, std::string("./") + std::string(myfile)); - EXPECT_EQ(res.second, "this is a bunch of extra stuff"); + CHECK(std::string("./") + std::string(myfile) == res.first); + CHECK("this is a bunch of extra stuff" == res.second); TempFile myfile2{"program name1.exe"}; { @@ -491,257 +491,257 @@ TEST(Validators, ProgramNameSplit) { } res = CLI::detail::split_program_name(std::string(" ") + std::string("./") + std::string(myfile2) + " this is a bunch of extra stuff "); - EXPECT_EQ(res.first, std::string("./") + std::string(myfile2)); - EXPECT_EQ(res.second, "this is a bunch of extra stuff"); + CHECK(std::string("./") + std::string(myfile2) == res.first); + CHECK("this is a bunch of extra stuff" == res.second); res = CLI::detail::split_program_name("./program_name this is a bunch of extra stuff "); - EXPECT_EQ(res.first, "./program_name"); // test sectioning of first argument even if it can't detect the file - EXPECT_EQ(res.second, "this is a bunch of extra stuff"); + CHECK("./program_name" == res.first); + CHECK("this is a bunch of extra stuff" == res.second); res = CLI::detail::split_program_name(std::string(" ./") + std::string(myfile) + " "); - EXPECT_EQ(res.first, std::string("./") + std::string(myfile)); - EXPECT_TRUE(res.second.empty()); + CHECK(std::string("./") + std::string(myfile) == res.first); + CHECK(res.second.empty()); } -TEST(CheckedMultiply, Int) { +TEST_CASE("CheckedMultiply: Int", "[helpers]") { int a{10}; int b{-20}; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, -200); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(-200 == a); a = 0; b = -20; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, 0); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(0 == a); a = 20; b = 0; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, 0); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(0 == a); a = std::numeric_limits<int>::max(); b = 1; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<int>::max()); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<int>::max() == a); a = std::numeric_limits<int>::max(); b = 2; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<int>::max()); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<int>::max() == a); a = std::numeric_limits<int>::max(); b = -1; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, -std::numeric_limits<int>::max()); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(-std::numeric_limits<int>::max() == a); a = std::numeric_limits<int>::max(); b = std::numeric_limits<int>::max(); - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<int>::max()); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<int>::max() == a); a = std::numeric_limits<int>::min(); b = std::numeric_limits<int>::max(); - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<int>::min()); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<int>::min() == a); a = std::numeric_limits<int>::min(); b = 1; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<int>::min()); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<int>::min() == a); a = std::numeric_limits<int>::min(); b = -1; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<int>::min()); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<int>::min() == a); b = std::numeric_limits<int>::min(); a = -1; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, -1); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(-1 == a); a = std::numeric_limits<int>::min() / 100; b = 99; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<int>::min() / 100 * 99); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<int>::min() / 100 * 99 == a); a = std::numeric_limits<int>::min() / 100; b = -101; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<int>::min() / 100); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<int>::min() / 100 == a); a = 2; b = std::numeric_limits<int>::min() / 2; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); + REQUIRE(CLI::detail::checked_multiply(a, b)); a = std::numeric_limits<int>::min() / 2; b = 2; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); + REQUIRE(CLI::detail::checked_multiply(a, b)); a = 4; b = std::numeric_limits<int>::min() / 4; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); + REQUIRE(CLI::detail::checked_multiply(a, b)); a = 48; b = std::numeric_limits<int>::min() / 48; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); + REQUIRE(CLI::detail::checked_multiply(a, b)); } -TEST(CheckedMultiply, SizeT) { +TEST_CASE("CheckedMultiply: SizeT", "[helpers]") { std::size_t a = 10; std::size_t b = 20; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, 200u); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(200u == a); a = 0u; b = 20u; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, 0u); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(0u == a); a = 20u; b = 0u; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, 0u); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(0u == a); a = std::numeric_limits<std::size_t>::max(); b = 1u; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<std::size_t>::max()); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<std::size_t>::max() == a); a = std::numeric_limits<std::size_t>::max(); b = 2u; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<std::size_t>::max()); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<std::size_t>::max() == a); a = std::numeric_limits<std::size_t>::max(); b = std::numeric_limits<std::size_t>::max(); - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<std::size_t>::max()); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<std::size_t>::max() == a); a = std::numeric_limits<std::size_t>::max() / 100; b = 99u; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_EQ(a, std::numeric_limits<std::size_t>::max() / 100u * 99u); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<std::size_t>::max() / 100u * 99u == a); } -TEST(CheckedMultiply, Float) { +TEST_CASE("CheckedMultiply: Float", "[helpers]") { float a{10.0F}; float b{20.0F}; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, 200); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(200 == Approx(a)); a = 0.0F; b = 20.0F; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, 0); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(0 == Approx(a)); a = INFINITY; b = 20.0F; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, INFINITY); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(INFINITY == Approx(a)); a = 2.0F; b = -INFINITY; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, -INFINITY); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(-INFINITY == Approx(a)); a = std::numeric_limits<float>::max() / 100.0F; b = 1.0F; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, std::numeric_limits<float>::max() / 100.0F); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<float>::max() / 100.0F == Approx(a)); a = std::numeric_limits<float>::max() / 100.0F; b = 99.0F; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, std::numeric_limits<float>::max() / 100.0F * 99.0F); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<float>::max() / 100.0F * 99.0F == Approx(a)); a = std::numeric_limits<float>::max() / 100.0F; b = 101; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, std::numeric_limits<float>::max() / 100.0F); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<float>::max() / 100.0F == Approx(a)); a = std::numeric_limits<float>::max() / 100.0F; b = -99; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, std::numeric_limits<float>::max() / 100.0F * -99.0F); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<float>::max() / 100.0F * -99.0F == Approx(a)); a = std::numeric_limits<float>::max() / 100.0F; b = -101; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_FLOAT_EQ(a, std::numeric_limits<float>::max() / 100.0F); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<float>::max() / 100.0F == Approx(a)); } -TEST(CheckedMultiply, Double) { +TEST_CASE("CheckedMultiply: Double", "[helpers]") { double a{10.0F}; double b{20.0F}; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, 200); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(200 == Approx(a)); a = 0; b = 20; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, 0); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(0 == Approx(a)); a = INFINITY; b = 20; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, INFINITY); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(INFINITY == Approx(a)); a = 2; b = -INFINITY; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, -INFINITY); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(-INFINITY == Approx(a)); a = std::numeric_limits<double>::max() / 100; b = 1; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, std::numeric_limits<double>::max() / 100); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<double>::max() / 100 == Approx(a)); a = std::numeric_limits<double>::max() / 100; b = 99; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, std::numeric_limits<double>::max() / 100 * 99); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<double>::max() / 100 * 99 == Approx(a)); a = std::numeric_limits<double>::max() / 100; b = 101; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, std::numeric_limits<double>::max() / 100); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<double>::max() / 100 == Approx(a)); a = std::numeric_limits<double>::max() / 100; b = -99; - ASSERT_TRUE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, std::numeric_limits<double>::max() / 100 * -99); + REQUIRE(CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<double>::max() / 100 * -99 == Approx(a)); a = std::numeric_limits<double>::max() / 100; b = -101; - ASSERT_FALSE(CLI::detail::checked_multiply(a, b)); - ASSERT_DOUBLE_EQ(a, std::numeric_limits<double>::max() / 100); + REQUIRE(!CLI::detail::checked_multiply(a, b)); + REQUIRE(std::numeric_limits<double>::max() / 100 == Approx(a)); } // Yes, this is testing an app_helper :) -TEST(AppHelper, TempfileCreated) { +TEST_CASE("AppHelper: TempfileCreated", "[helpers]") { std::string name = "TestFileNotUsed.txt"; { TempFile myfile{name}; - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK_FALSE(CLI::ExistingFile(myfile).empty()); bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); - EXPECT_TRUE(CLI::ExistingFile(name).empty()); - EXPECT_THROW({ TempFile otherfile(name); }, std::runtime_error); + CHECK(ok); + CHECK(CLI::ExistingFile(name).empty()); + CHECK_THROWS_AS([&]() { TempFile otherfile(name); }(), std::runtime_error); } - EXPECT_FALSE(CLI::ExistingFile(name).empty()); + CHECK_FALSE(CLI::ExistingFile(name).empty()); } -TEST(AppHelper, TempfileNotCreated) { +TEST_CASE("AppHelper: TempfileNotCreated", "[helpers]") { std::string name = "TestFileNotUsed.txt"; { TempFile myfile{name}; - EXPECT_FALSE(CLI::ExistingFile(myfile).empty()); + CHECK_FALSE(CLI::ExistingFile(myfile).empty()); } - EXPECT_FALSE(CLI::ExistingFile(name).empty()); + CHECK_FALSE(CLI::ExistingFile(name).empty()); } -TEST(AppHelper, Ofstream) { +TEST_CASE("AppHelper: Ofstream", "[helpers]") { std::string name = "TestFileNotUsed.txt"; { @@ -752,174 +752,178 @@ TEST(AppHelper, Ofstream) { out << "this is output" << std::endl; } - EXPECT_TRUE(CLI::ExistingFile(myfile).empty()); + CHECK(CLI::ExistingFile(myfile).empty()); } - EXPECT_FALSE(CLI::ExistingFile(name).empty()); + CHECK_FALSE(CLI::ExistingFile(name).empty()); } -TEST(Split, StringList) { +TEST_CASE("Split: StringList", "[helpers]") { std::vector<std::string> results{"a", "long", "--lone", "-q"}; - EXPECT_EQ(results, CLI::detail::split_names("a,long,--lone,-q")); - EXPECT_EQ(results, CLI::detail::split_names(" a, long, --lone, -q")); - EXPECT_EQ(results, CLI::detail::split_names(" a , long , --lone , -q ")); - EXPECT_EQ(results, CLI::detail::split_names(" a , long , --lone , -q ")); + CHECK(CLI::detail::split_names("a,long,--lone,-q") == results); + CHECK(CLI::detail::split_names(" a, long, --lone, -q") == results); + CHECK(CLI::detail::split_names(" a , long , --lone , -q ") == results); + CHECK(CLI::detail::split_names(" a , long , --lone , -q ") == results); - EXPECT_EQ(std::vector<std::string>({"one"}), CLI::detail::split_names("one")); + CHECK(CLI::detail::split_names("one") == std::vector<std::string>({"one"})); } -TEST(RegEx, Shorts) { +TEST_CASE("RegEx: Shorts", "[helpers]") { std::string name, value; - EXPECT_TRUE(CLI::detail::split_short("-a", name, value)); - EXPECT_EQ("a", name); - EXPECT_EQ("", value); + CHECK(CLI::detail::split_short("-a", name, value)); + CHECK(name == "a"); + CHECK(value == ""); - EXPECT_TRUE(CLI::detail::split_short("-B", name, value)); - EXPECT_EQ("B", name); - EXPECT_EQ("", value); + CHECK(CLI::detail::split_short("-B", name, value)); + CHECK(name == "B"); + CHECK(value == ""); - EXPECT_TRUE(CLI::detail::split_short("-cc", name, value)); - EXPECT_EQ("c", name); - EXPECT_EQ("c", value); + CHECK(CLI::detail::split_short("-cc", name, value)); + CHECK(name == "c"); + CHECK(value == "c"); - EXPECT_TRUE(CLI::detail::split_short("-simple", name, value)); - EXPECT_EQ("s", name); - EXPECT_EQ("imple", value); + CHECK(CLI::detail::split_short("-simple", name, value)); + CHECK(name == "s"); + CHECK(value == "imple"); - EXPECT_FALSE(CLI::detail::split_short("--a", name, value)); - EXPECT_FALSE(CLI::detail::split_short("--thing", name, value)); - EXPECT_FALSE(CLI::detail::split_short("--", name, value)); - EXPECT_FALSE(CLI::detail::split_short("something", name, value)); - EXPECT_FALSE(CLI::detail::split_short("s", name, value)); + CHECK_FALSE(CLI::detail::split_short("--a", name, value)); + CHECK_FALSE(CLI::detail::split_short("--thing", name, value)); + CHECK_FALSE(CLI::detail::split_short("--", name, value)); + CHECK_FALSE(CLI::detail::split_short("something", name, value)); + CHECK_FALSE(CLI::detail::split_short("s", name, value)); } -TEST(RegEx, Longs) { +TEST_CASE("RegEx: Longs", "[helpers]") { std::string name, value; - EXPECT_TRUE(CLI::detail::split_long("--a", name, value)); - EXPECT_EQ("a", name); - EXPECT_EQ("", value); + CHECK(CLI::detail::split_long("--a", name, value)); + CHECK(name == "a"); + CHECK(value == ""); - EXPECT_TRUE(CLI::detail::split_long("--thing", name, value)); - EXPECT_EQ("thing", name); - EXPECT_EQ("", value); + CHECK(CLI::detail::split_long("--thing", name, value)); + CHECK(name == "thing"); + CHECK(value == ""); - EXPECT_TRUE(CLI::detail::split_long("--some=thing", name, value)); - EXPECT_EQ("some", name); - EXPECT_EQ("thing", value); + CHECK(CLI::detail::split_long("--some=thing", name, value)); + CHECK(name == "some"); + CHECK(value == "thing"); - EXPECT_FALSE(CLI::detail::split_long("-a", name, value)); - EXPECT_FALSE(CLI::detail::split_long("-things", name, value)); - EXPECT_FALSE(CLI::detail::split_long("Q", name, value)); - EXPECT_FALSE(CLI::detail::split_long("--", name, value)); + CHECK_FALSE(CLI::detail::split_long("-a", name, value)); + CHECK_FALSE(CLI::detail::split_long("-things", name, value)); + CHECK_FALSE(CLI::detail::split_long("Q", name, value)); + CHECK_FALSE(CLI::detail::split_long("--", name, value)); } -TEST(RegEx, SplittingNew) { +TEST_CASE("RegEx: SplittingNew", "[helpers]") { std::vector<std::string> shorts; std::vector<std::string> longs; std::string pname; - EXPECT_NO_THROW(std::tie(shorts, longs, pname) = CLI::detail::get_names({"--long", "-s", "-q", "--also-long"})); - EXPECT_EQ(std::vector<std::string>({"long", "also-long"}), longs); - EXPECT_EQ(std::vector<std::string>({"s", "q"}), shorts); - EXPECT_EQ("", pname); + CHECK_NOTHROW(std::tie(shorts, longs, pname) = CLI::detail::get_names({"--long", "-s", "-q", "--also-long"})); + CHECK(longs == std::vector<std::string>({"long", "also-long"})); + CHECK(shorts == std::vector<std::string>({"s", "q"})); + CHECK(pname == ""); - EXPECT_NO_THROW(std::tie(shorts, longs, pname) = - CLI::detail::get_names({"--long", "", "-s", "-q", "", "--also-long"})); - EXPECT_EQ(std::vector<std::string>({"long", "also-long"}), longs); - EXPECT_EQ(std::vector<std::string>({"s", "q"}), shorts); + std::tie(shorts, longs, pname) = CLI::detail::get_names({"--long", "", "-s", "-q", "", "--also-long"}); + CHECK(longs == std::vector<std::string>({"long", "also-long"})); + CHECK(shorts == std::vector<std::string>({"s", "q"})); - EXPECT_THROW(std::tie(shorts, longs, pname) = CLI::detail::get_names({"-"}), CLI::BadNameString); - EXPECT_THROW(std::tie(shorts, longs, pname) = CLI::detail::get_names({"--"}), CLI::BadNameString); - EXPECT_THROW(std::tie(shorts, longs, pname) = CLI::detail::get_names({"-hi"}), CLI::BadNameString); - EXPECT_THROW(std::tie(shorts, longs, pname) = CLI::detail::get_names({"---hi"}), CLI::BadNameString); - EXPECT_THROW(std::tie(shorts, longs, pname) = CLI::detail::get_names({"one", "two"}), CLI::BadNameString); + CHECK_THROWS_AS([&]() { std::tie(shorts, longs, pname) = CLI::detail::get_names({"-"}); }(), CLI::BadNameString); + CHECK_THROWS_AS([&]() { std::tie(shorts, longs, pname) = CLI::detail::get_names({"--"}); }(), CLI::BadNameString); + CHECK_THROWS_AS([&]() { std::tie(shorts, longs, pname) = CLI::detail::get_names({"-hi"}); }(), CLI::BadNameString); + CHECK_THROWS_AS([&]() { std::tie(shorts, longs, pname) = CLI::detail::get_names({"---hi"}); }(), + CLI::BadNameString); + CHECK_THROWS_AS( + [&]() { + std::tie(shorts, longs, pname) = CLI::detail::get_names({"one", "two"}); + }(), + CLI::BadNameString); } -TEST(String, ToLower) { EXPECT_EQ("one and two", CLI::detail::to_lower("one And TWO")); } +TEST_CASE("String: ToLower", "[helpers]") { CHECK("one and two" == CLI::detail::to_lower("one And TWO")); } -TEST(Join, Forward) { +TEST_CASE("Join: Forward", "[helpers]") { std::vector<std::string> val{{"one", "two", "three"}}; - EXPECT_EQ("one,two,three", CLI::detail::join(val)); - EXPECT_EQ("one;two;three", CLI::detail::join(val, ";")); + CHECK(CLI::detail::join(val) == "one,two,three"); + CHECK(CLI::detail::join(val, ";") == "one;two;three"); } -TEST(Join, Backward) { +TEST_CASE("Join: Backward", "[helpers]") { std::vector<std::string> val{{"three", "two", "one"}}; - EXPECT_EQ("one,two,three", CLI::detail::rjoin(val)); - EXPECT_EQ("one;two;three", CLI::detail::rjoin(val, ";")); + CHECK(CLI::detail::rjoin(val) == "one,two,three"); + CHECK(CLI::detail::rjoin(val, ";") == "one;two;three"); } -TEST(SplitUp, Simple) { +TEST_CASE("SplitUp: Simple", "[helpers]") { std::vector<std::string> oput = {"one", "two three"}; std::string orig{R"(one "two three")"}; std::vector<std::string> result = CLI::detail::split_up(orig); - EXPECT_EQ(oput, result); + CHECK(result == oput); } -TEST(SplitUp, SimpleDifferentQuotes) { +TEST_CASE("SplitUp: SimpleDifferentQuotes", "[helpers]") { std::vector<std::string> oput = {"one", "two three"}; std::string orig{R"(one `two three`)"}; std::vector<std::string> result = CLI::detail::split_up(orig); - EXPECT_EQ(oput, result); + CHECK(result == oput); } -TEST(SplitUp, SimpleDifferentQuotes2) { +TEST_CASE("SplitUp: SimpleDifferentQuotes2", "[helpers]") { std::vector<std::string> oput = {"one", "two three"}; std::string orig{R"(one 'two three')"}; std::vector<std::string> result = CLI::detail::split_up(orig); - EXPECT_EQ(oput, result); + CHECK(result == oput); } -TEST(SplitUp, Layered) { +TEST_CASE("SplitUp: Layered", "[helpers]") { std::vector<std::string> output = {R"(one 'two three')"}; std::string orig{R"("one 'two three'")"}; std::vector<std::string> result = CLI::detail::split_up(orig); - EXPECT_EQ(output, result); + CHECK(result == output); } -TEST(SplitUp, Spaces) { +TEST_CASE("SplitUp: Spaces", "[helpers]") { std::vector<std::string> oput = {"one", " two three"}; std::string orig{R"( one " two three" )"}; std::vector<std::string> result = CLI::detail::split_up(orig); - EXPECT_EQ(oput, result); + CHECK(result == oput); } -TEST(SplitUp, BadStrings) { +TEST_CASE("SplitUp: BadStrings", "[helpers]") { std::vector<std::string> oput = {"one", " two three"}; std::string orig{R"( one " two three )"}; std::vector<std::string> result = CLI::detail::split_up(orig); - EXPECT_EQ(oput, result); + CHECK(result == oput); oput = {"one", " two three"}; orig = R"( one ' two three )"; result = CLI::detail::split_up(orig); - EXPECT_EQ(oput, result); + CHECK(result == oput); } -TEST(Types, TypeName) { +TEST_CASE("Types: TypeName", "[helpers]") { std::string int_name = CLI::detail::type_name<int>(); - EXPECT_EQ("INT", int_name); + CHECK(int_name == "INT"); std::string int2_name = CLI::detail::type_name<std::int16_t>(); - EXPECT_EQ("INT", int2_name); + CHECK(int2_name == "INT"); std::string uint_name = CLI::detail::type_name<unsigned char>(); - EXPECT_EQ("UINT", uint_name); + CHECK(uint_name == "UINT"); std::string float_name = CLI::detail::type_name<double>(); - EXPECT_EQ("FLOAT", float_name); + CHECK(float_name == "FLOAT"); std::string char_name = CLI::detail::type_name<char>(); - EXPECT_EQ("CHAR", char_name); + CHECK(char_name == "CHAR"); std::string vector_name = CLI::detail::type_name<std::vector<int>>(); - EXPECT_EQ("INT", vector_name); + CHECK(vector_name == "INT"); vector_name = CLI::detail::type_name<std::vector<double>>(); - EXPECT_EQ("FLOAT", vector_name); + CHECK(vector_name == "FLOAT"); static_assert(CLI::detail::classify_object<std::pair<int, std::string>>::value == CLI::detail::object_category::tuple_value, @@ -930,228 +934,228 @@ TEST(Types, TypeName) { "tuple<string,double> does not read like a tuple"); std::string pair_name = CLI::detail::type_name<std::vector<std::pair<int, std::string>>>(); - EXPECT_EQ("[INT,TEXT]", pair_name); + CHECK(pair_name == "[INT,TEXT]"); vector_name = CLI::detail::type_name<std::vector<std::vector<unsigned char>>>(); - EXPECT_EQ("UINT", vector_name); + CHECK(vector_name == "UINT"); auto vclass = CLI::detail::classify_object<std::vector<std::vector<unsigned char>>>::value; - EXPECT_EQ(vclass, CLI::detail::object_category::container_value); + CHECK(CLI::detail::object_category::container_value == vclass); auto tclass = CLI::detail::classify_object<std::tuple<double>>::value; - EXPECT_EQ(tclass, CLI::detail::object_category::number_constructible); + CHECK(CLI::detail::object_category::number_constructible == tclass); std::string tuple_name = CLI::detail::type_name<std::tuple<double>>(); - EXPECT_EQ("FLOAT", tuple_name); + CHECK(tuple_name == "FLOAT"); static_assert(CLI::detail::classify_object<std::tuple<int, std::string>>::value == CLI::detail::object_category::tuple_value, "tuple<int,string> does not read like a tuple"); tuple_name = CLI::detail::type_name<std::tuple<int, std::string>>(); - EXPECT_EQ("[INT,TEXT]", tuple_name); + CHECK(tuple_name == "[INT,TEXT]"); tuple_name = CLI::detail::type_name<std::tuple<const int, std::string>>(); - EXPECT_EQ("[INT,TEXT]", tuple_name); + CHECK(tuple_name == "[INT,TEXT]"); tuple_name = CLI::detail::type_name<const std::tuple<int, std::string>>(); - EXPECT_EQ("[INT,TEXT]", tuple_name); + CHECK(tuple_name == "[INT,TEXT]"); tuple_name = CLI::detail::type_name<std::tuple<std::string, double>>(); - EXPECT_EQ("[TEXT,FLOAT]", tuple_name); + CHECK(tuple_name == "[TEXT,FLOAT]"); tuple_name = CLI::detail::type_name<const std::tuple<std::string, double>>(); - EXPECT_EQ("[TEXT,FLOAT]", tuple_name); + CHECK(tuple_name == "[TEXT,FLOAT]"); tuple_name = CLI::detail::type_name<std::tuple<int, std::string, double>>(); - EXPECT_EQ("[INT,TEXT,FLOAT]", tuple_name); + CHECK(tuple_name == "[INT,TEXT,FLOAT]"); tuple_name = CLI::detail::type_name<std::tuple<int, std::string, double, unsigned int>>(); - EXPECT_EQ("[INT,TEXT,FLOAT,UINT]", tuple_name); + CHECK(tuple_name == "[INT,TEXT,FLOAT,UINT]"); tuple_name = CLI::detail::type_name<std::tuple<int, std::string, double, unsigned int, std::string>>(); - EXPECT_EQ("[INT,TEXT,FLOAT,UINT,TEXT]", tuple_name); + CHECK(tuple_name == "[INT,TEXT,FLOAT,UINT,TEXT]"); tuple_name = CLI::detail::type_name<std::array<int, 10>>(); - EXPECT_EQ("[INT,INT,INT,INT,INT,INT,INT,INT,INT,INT]", tuple_name); + CHECK(tuple_name == "[INT,INT,INT,INT,INT,INT,INT,INT,INT,INT]"); std::string text_name = CLI::detail::type_name<std::string>(); - EXPECT_EQ("TEXT", text_name); + CHECK(text_name == "TEXT"); std::string text2_name = CLI::detail::type_name<char *>(); - EXPECT_EQ("TEXT", text2_name); + CHECK(text2_name == "TEXT"); enum class test { test1, test2, test3 }; std::string enum_name = CLI::detail::type_name<test>(); - EXPECT_EQ("ENUM", enum_name); + CHECK(enum_name == "ENUM"); vclass = CLI::detail::classify_object<std::tuple<test>>::value; - EXPECT_EQ(vclass, CLI::detail::object_category::tuple_value); + CHECK(CLI::detail::object_category::tuple_value == vclass); static_assert(CLI::detail::classify_object<std::tuple<test>>::value == CLI::detail::object_category::tuple_value, "tuple<test> does not classify as a tuple"); std::string enum_name2 = CLI::detail::type_name<std::tuple<test>>(); - EXPECT_EQ("ENUM", enum_name2); + CHECK(enum_name2 == "ENUM"); std::string umapName = CLI::detail::type_name<std::unordered_map<int, std::tuple<std::string, double>>>(); - EXPECT_EQ("[INT,[TEXT,FLOAT]]", umapName); + CHECK(umapName == "[INT,[TEXT,FLOAT]]"); vclass = CLI::detail::classify_object<std::atomic<int>>::value; } -TEST(Types, OverflowSmall) { +TEST_CASE("Types: OverflowSmall", "[helpers]") { signed char x; - auto strmax = std::to_string(SCHAR_MAX + 1); - EXPECT_FALSE(CLI::detail::lexical_cast(strmax, x)); + auto strmax = std::to_string(std::numeric_limits<signed char>::max() + 1); + CHECK_FALSE(CLI::detail::lexical_cast(strmax, x)); unsigned char y; - strmax = std::to_string(UINT8_MAX + 1); - EXPECT_FALSE(CLI::detail::lexical_cast(strmax, y)); + strmax = std::to_string(std::numeric_limits<unsigned char>::max() + 1); + CHECK_FALSE(CLI::detail::lexical_cast(strmax, y)); } -TEST(Types, LexicalCastInt) { +TEST_CASE("Types: LexicalCastInt", "[helpers]") { std::string signed_input = "-912"; int x_signed; - EXPECT_TRUE(CLI::detail::lexical_cast(signed_input, x_signed)); - EXPECT_EQ(-912, x_signed); + CHECK(CLI::detail::lexical_cast(signed_input, x_signed)); + CHECK(x_signed == -912); std::string unsigned_input = "912"; unsigned int x_unsigned; - EXPECT_TRUE(CLI::detail::lexical_cast(unsigned_input, x_unsigned)); - EXPECT_EQ((unsigned int)912, x_unsigned); + CHECK(CLI::detail::lexical_cast(unsigned_input, x_unsigned)); + CHECK(x_unsigned == (unsigned int)912); - EXPECT_FALSE(CLI::detail::lexical_cast(signed_input, x_unsigned)); + CHECK_FALSE(CLI::detail::lexical_cast(signed_input, x_unsigned)); unsigned char y; - std::string overflow_input = std::to_string(UINT64_MAX) + "0"; - EXPECT_FALSE(CLI::detail::lexical_cast(overflow_input, y)); + std::string overflow_input = std::to_string(std::numeric_limits<uint64_t>::max()) + "0"; + CHECK_FALSE(CLI::detail::lexical_cast(overflow_input, y)); char y_signed; - EXPECT_FALSE(CLI::detail::lexical_cast(overflow_input, y_signed)); + CHECK_FALSE(CLI::detail::lexical_cast(overflow_input, y_signed)); std::string bad_input = "hello"; - EXPECT_FALSE(CLI::detail::lexical_cast(bad_input, y)); + CHECK_FALSE(CLI::detail::lexical_cast(bad_input, y)); std::string extra_input = "912i"; - EXPECT_FALSE(CLI::detail::lexical_cast(extra_input, y)); + CHECK_FALSE(CLI::detail::lexical_cast(extra_input, y)); std::string empty_input{}; - EXPECT_FALSE(CLI::detail::lexical_cast(empty_input, x_signed)); - EXPECT_FALSE(CLI::detail::lexical_cast(empty_input, x_unsigned)); - EXPECT_FALSE(CLI::detail::lexical_cast(empty_input, y_signed)); + CHECK_FALSE(CLI::detail::lexical_cast(empty_input, x_signed)); + CHECK_FALSE(CLI::detail::lexical_cast(empty_input, x_unsigned)); + CHECK_FALSE(CLI::detail::lexical_cast(empty_input, y_signed)); } -TEST(Types, LexicalCastDouble) { +TEST_CASE("Types: LexicalCastDouble", "[helpers]") { std::string input = "9.12"; long double x; - EXPECT_TRUE(CLI::detail::lexical_cast(input, x)); - EXPECT_FLOAT_EQ((float)9.12, (float)x); + CHECK(CLI::detail::lexical_cast(input, x)); + CHECK((float)x == Approx((float)9.12)); std::string bad_input = "hello"; - EXPECT_FALSE(CLI::detail::lexical_cast(bad_input, x)); + CHECK_FALSE(CLI::detail::lexical_cast(bad_input, x)); - std::string overflow_input = "1" + std::to_string(LDBL_MAX); - EXPECT_TRUE(CLI::detail::lexical_cast(overflow_input, x)); - EXPECT_FALSE(std::isfinite(x)); + std::string overflow_input = "1" + std::to_string(std::numeric_limits<long double>::max()); + CHECK(CLI::detail::lexical_cast(overflow_input, x)); + CHECK_FALSE(std::isfinite(x)); std::string extra_input = "9.12i"; - EXPECT_FALSE(CLI::detail::lexical_cast(extra_input, x)); + CHECK_FALSE(CLI::detail::lexical_cast(extra_input, x)); std::string empty_input{}; - EXPECT_FALSE(CLI::detail::lexical_cast(empty_input, x)); + CHECK_FALSE(CLI::detail::lexical_cast(empty_input, x)); } -TEST(Types, LexicalCastBool) { +TEST_CASE("Types: LexicalCastBool", "[helpers]") { std::string input = "false"; bool x; - EXPECT_TRUE(CLI::detail::lexical_cast(input, x)); - EXPECT_FALSE(x); + CHECK(CLI::detail::lexical_cast(input, x)); + CHECK_FALSE(x); std::string bad_input = "happy"; - EXPECT_FALSE(CLI::detail::lexical_cast(bad_input, x)); + CHECK_FALSE(CLI::detail::lexical_cast(bad_input, x)); std::string input_true = "EnaBLE"; - EXPECT_TRUE(CLI::detail::lexical_cast(input_true, x)); - EXPECT_TRUE(x); + CHECK(CLI::detail::lexical_cast(input_true, x)); + CHECK(x); } -TEST(Types, LexicalCastString) { +TEST_CASE("Types: LexicalCastString", "[helpers]") { std::string input = "one"; std::string output; CLI::detail::lexical_cast(input, output); - EXPECT_EQ(input, output); + CHECK(output == input); } -TEST(Types, LexicalCastParsable) { +TEST_CASE("Types: LexicalCastParsable", "[helpers]") { std::string input = "(4.2,7.3)"; std::string fail_input = "4.2,7.3"; std::string extra_input = "(4.2,7.3)e"; std::complex<double> output; - EXPECT_TRUE(CLI::detail::lexical_cast(input, output)); - EXPECT_DOUBLE_EQ(output.real(), 4.2); // Doing this in one go sometimes has trouble - EXPECT_DOUBLE_EQ(output.imag(), 7.3); // on clang + gcc 4.8 due to missing const + CHECK(CLI::detail::lexical_cast(input, output)); + CHECK(4.2 == Approx(output.real())); + CHECK(7.3 == Approx(output.imag())); - EXPECT_TRUE(CLI::detail::lexical_cast("2.456", output)); - EXPECT_DOUBLE_EQ(output.real(), 2.456); // Doing this in one go sometimes has trouble - EXPECT_DOUBLE_EQ(output.imag(), 0.0); // on clang + gcc 4.8 due to missing const + CHECK(CLI::detail::lexical_cast("2.456", output)); + CHECK(2.456 == Approx(output.real())); + CHECK(0.0 == Approx(output.imag())); - EXPECT_FALSE(CLI::detail::lexical_cast(fail_input, output)); - EXPECT_FALSE(CLI::detail::lexical_cast(extra_input, output)); + CHECK_FALSE(CLI::detail::lexical_cast(fail_input, output)); + CHECK_FALSE(CLI::detail::lexical_cast(extra_input, output)); } -TEST(Types, LexicalCastEnum) { +TEST_CASE("Types: LexicalCastEnum", "[helpers]") { enum t1 : signed char { v1 = 5, v3 = 7, v5 = -9 }; t1 output; - EXPECT_TRUE(CLI::detail::lexical_cast("-9", output)); - EXPECT_EQ(output, v5); + CHECK(CLI::detail::lexical_cast("-9", output)); + CHECK(v5 == output); - EXPECT_FALSE(CLI::detail::lexical_cast("invalid", output)); + CHECK_FALSE(CLI::detail::lexical_cast("invalid", output)); enum class t2 : std::uint64_t { enum1 = 65, enum2 = 45667, enum3 = 9999999999999 }; t2 output2{t2::enum2}; - EXPECT_TRUE(CLI::detail::lexical_cast("65", output2)); - EXPECT_EQ(output2, t2::enum1); + CHECK(CLI::detail::lexical_cast("65", output2)); + CHECK(t2::enum1 == output2); - EXPECT_FALSE(CLI::detail::lexical_cast("invalid", output2)); + CHECK_FALSE(CLI::detail::lexical_cast("invalid", output2)); - EXPECT_TRUE(CLI::detail::lexical_cast("9999999999999", output2)); - EXPECT_EQ(output2, t2::enum3); + CHECK(CLI::detail::lexical_cast("9999999999999", output2)); + CHECK(t2::enum3 == output2); } -TEST(Types, LexicalConversionDouble) { +TEST_CASE("Types: LexicalConversionDouble", "[helpers]") { CLI::results_t input = {"9.12"}; long double x{0.0}; bool res = CLI::detail::lexical_conversion<long double, double>(input, x); - EXPECT_TRUE(res); - EXPECT_FLOAT_EQ((float)9.12, (float)x); + CHECK(res); + CHECK((float)x == Approx((float)9.12)); CLI::results_t bad_input = {"hello"}; res = CLI::detail::lexical_conversion<long double, double>(input, x); - EXPECT_TRUE(res); + CHECK(res); } -TEST(Types, LexicalConversionDoubleTuple) { +TEST_CASE("Types: LexicalConversionDoubleTuple", "[helpers]") { CLI::results_t input = {"9.12"}; std::tuple<double> x{0.0}; bool res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_TRUE(res); - EXPECT_DOUBLE_EQ(9.12, std::get<0>(x)); + CHECK(res); + CHECK(std::get<0>(x) == Approx(9.12)); CLI::results_t bad_input = {"hello"}; res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_TRUE(res); + CHECK(res); } -TEST(Types, LexicalConversionVectorDouble) { +TEST_CASE("Types: LexicalConversionVectorDouble", "[helpers]") { CLI::results_t input = {"9.12", "10.79", "-3.54"}; std::vector<double> x; bool res = CLI::detail::lexical_conversion<std::vector<double>, double>(input, x); - EXPECT_TRUE(res); - EXPECT_EQ(x.size(), 3u); - EXPECT_DOUBLE_EQ(x[2], -3.54); + CHECK(res); + CHECK(3u == x.size()); + CHECK(-3.54 == Approx(x[2])); res = CLI::detail::lexical_conversion<std::vector<double>, std::vector<double>>(input, x); - EXPECT_TRUE(res); - EXPECT_EQ(x.size(), 3u); - EXPECT_DOUBLE_EQ(x[2], -3.54); + CHECK(res); + CHECK(3u == x.size()); + CHECK(-3.54 == Approx(x[2])); } static_assert(!CLI::detail::is_tuple_like<std::vector<double>>::value, "vector should not be like a tuple"); @@ -1162,107 +1166,107 @@ static_assert(!CLI::detail::is_tuple_like<std::string>::value, "std::string shou static_assert(!CLI::detail::is_tuple_like<double>::value, "double should not be like a tuple"); static_assert(CLI::detail::is_tuple_like<std::tuple<double, int, double>>::value, "tuple should look like a tuple"); -TEST(Types, LexicalConversionTuple2) { +TEST_CASE("Types: LexicalConversionTuple2", "[helpers]") { CLI::results_t input = {"9.12", "19"}; std::tuple<double, int> x{0.0, 0}; static_assert(CLI::detail::is_tuple_like<decltype(x)>::value, "tuple type must have is_tuple_like trait to be true"); bool res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_TRUE(res); - EXPECT_EQ(std::get<1>(x), 19); - EXPECT_DOUBLE_EQ(std::get<0>(x), 9.12); + CHECK(res); + CHECK(19 == std::get<1>(x)); + CHECK(9.12 == Approx(std::get<0>(x))); input = {"19", "9.12"}; res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_FALSE(res); + CHECK_FALSE(res); } -TEST(Types, LexicalConversionTuple3) { +TEST_CASE("Types: LexicalConversionTuple3", "[helpers]") { CLI::results_t input = {"9.12", "19", "hippo"}; std::tuple<double, int, std::string> x; bool res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_TRUE(res); - EXPECT_EQ(std::get<1>(x), 19); - EXPECT_DOUBLE_EQ(std::get<0>(x), 9.12); - EXPECT_EQ(std::get<2>(x), "hippo"); + CHECK(res); + CHECK(19 == std::get<1>(x)); + CHECK(9.12 == Approx(std::get<0>(x))); + CHECK("hippo" == std::get<2>(x)); input = {"19", "9.12"}; res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_FALSE(res); + CHECK_FALSE(res); } -TEST(Types, LexicalConversionTuple4) { +TEST_CASE("Types: LexicalConversionTuple4", "[helpers]") { CLI::results_t input = {"9.12", "19", "18.6", "5.87"}; std::array<double, 4> x; bool res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_TRUE(res); - EXPECT_DOUBLE_EQ(std::get<1>(x), 19); - EXPECT_DOUBLE_EQ(x[0], 9.12); - EXPECT_DOUBLE_EQ(x[2], 18.6); - EXPECT_DOUBLE_EQ(x[3], 5.87); + CHECK(res); + CHECK(19 == Approx(std::get<1>(x))); + CHECK(9.12 == Approx(x[0])); + CHECK(18.6 == Approx(x[2])); + CHECK(5.87 == Approx(x[3])); input = {"19", "9.12", "hippo"}; res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_FALSE(res); + CHECK_FALSE(res); } -TEST(Types, LexicalConversionTuple5) { +TEST_CASE("Types: LexicalConversionTuple5", "[helpers]") { CLI::results_t input = {"9", "19", "18", "5", "235235"}; std::array<unsigned int, 5> x; bool res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_TRUE(res); - EXPECT_EQ(std::get<1>(x), 19u); - EXPECT_EQ(x[0], 9u); - EXPECT_EQ(x[2], 18u); - EXPECT_EQ(x[3], 5u); - EXPECT_EQ(x[4], 235235u); + CHECK(res); + CHECK(19u == std::get<1>(x)); + CHECK(9u == x[0]); + CHECK(18u == x[2]); + CHECK(5u == x[3]); + CHECK(235235u == x[4]); input = {"19", "9.12", "hippo"}; res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_FALSE(res); + CHECK_FALSE(res); } -TEST(Types, LexicalConversionTuple10) { +TEST_CASE("Types: LexicalConversionTuple10", "[helpers]") { CLI::results_t input = {"9", "19", "18", "5", "235235", "9", "19", "18", "5", "235235"}; std::array<unsigned int, 10> x; bool res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_TRUE(res); - EXPECT_EQ(std::get<1>(x), 19u); - EXPECT_EQ(x[0], 9u); - EXPECT_EQ(x[2], 18u); - EXPECT_EQ(x[3], 5u); - EXPECT_EQ(x[4], 235235u); - EXPECT_EQ(x[9], 235235u); + CHECK(res); + CHECK(19u == std::get<1>(x)); + CHECK(9u == x[0]); + CHECK(18u == x[2]); + CHECK(5u == x[3]); + CHECK(235235u == x[4]); + CHECK(235235u == x[9]); input[3] = "hippo"; res = CLI::detail::lexical_conversion<decltype(x), decltype(x)>(input, x); - EXPECT_FALSE(res); + CHECK_FALSE(res); } -TEST(Types, LexicalConversionTuple10XC) { +TEST_CASE("Types: LexicalConversionTuple10XC", "[helpers]") { CLI::results_t input = {"9", "19", "18", "5", "235235", "9", "19", "18", "5", "235235"}; std::array<double, 10> x; bool res = CLI::detail::lexical_conversion<decltype(x), std::array<unsigned int, 10>>(input, x); - EXPECT_TRUE(res); - EXPECT_EQ(std::get<1>(x), 19.0); - EXPECT_EQ(x[0], 9.0); - EXPECT_EQ(x[2], 18.0); - EXPECT_EQ(x[3], 5.0); - EXPECT_EQ(x[4], 235235.0); - EXPECT_EQ(x[9], 235235.0); + CHECK(res); + CHECK(19.0 == std::get<1>(x)); + CHECK(9.0 == x[0]); + CHECK(18.0 == x[2]); + CHECK(5.0 == x[3]); + CHECK(235235.0 == x[4]); + CHECK(235235.0 == x[9]); input[3] = "19.7"; res = CLI::detail::lexical_conversion<decltype(x), std::array<unsigned int, 10>>(input, x); - EXPECT_FALSE(res); + CHECK_FALSE(res); } -TEST(Types, LexicalConversionComplex) { +TEST_CASE("Types: LexicalConversionComplex", "[helpers]") { CLI::results_t input = {"5.1", "3.5"}; std::complex<double> x; bool res = CLI::detail::lexical_conversion<std::complex<double>, std::array<double, 2>>(input, x); - EXPECT_TRUE(res); - EXPECT_EQ(x.real(), 5.1); - EXPECT_EQ(x.imag(), 3.5); + CHECK(res); + CHECK(5.1 == x.real()); + CHECK(3.5 == x.imag()); } static_assert(CLI::detail::is_wrapper<std::vector<double>>::value, "vector double should be a wrapper"); @@ -1285,16 +1289,16 @@ static_assert(CLI::detail::is_readable_container<const std::vector<int>>::value, static_assert(CLI::detail::is_readable_container<const std::vector<int> &>::value, "const vector int & should be a readable container"); -TEST(FixNewLines, BasicCheck) { +TEST_CASE("FixNewLines: BasicCheck", "[helpers]") { std::string input = "one\ntwo"; std::string output = "one\n; two"; std::string result = CLI::detail::fix_newlines("; ", input); - EXPECT_EQ(result, output); + CHECK(output == result); } -TEST(FixNewLines, EdgesCheck) { +TEST_CASE("FixNewLines: EdgesCheck", "[helpers]") { std::string input = "\none\ntwo\n"; std::string output = "\n; one\n; two\n; "; std::string result = CLI::detail::fix_newlines("; ", input); - EXPECT_EQ(result, output); + CHECK(output == result); } diff --git a/packages/CLI11/tests/NewParseTest.cpp b/packages/CLI11/tests/NewParseTest.cpp index cf0adc384726c3b51fbdcfb0244ff105914701c0..d9d9dbf91ca777d8c70fa2f289316e535acec0f0 100644 --- a/packages/CLI11/tests/NewParseTest.cpp +++ b/packages/CLI11/tests/NewParseTest.cpp @@ -5,159 +5,159 @@ // SPDX-License-Identifier: BSD-3-Clause #include "app_helper.hpp" -#include "gmock/gmock.h" + #include <complex> #include <cstdint> -using ::testing::HasSubstr; +using Catch::Matchers::Contains; using cx = std::complex<double>; -TEST_F(TApp, Complex) { +TEST_CASE_METHOD(TApp, "Complex", "[newparse]") { cx comp{1, 2}; app.add_complex("-c,--complex", comp, "", true); args = {"-c", "4", "3"}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("2")); - EXPECT_THAT(help, HasSubstr("COMPLEX")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("2")); + CHECK_THAT(help, Contains("COMPLEX")); - EXPECT_DOUBLE_EQ(1, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(1)); + CHECK(comp.imag() == Approx(2)); run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); } -TEST_F(TApp, ComplexOption) { +TEST_CASE_METHOD(TApp, "ComplexOption", "[newparse]") { cx comp{1, 2}; app.add_option("-c,--complex", comp, "", true); args = {"-c", "4", "3"}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("2")); - EXPECT_THAT(help, HasSubstr("COMPLEX")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("2")); + CHECK_THAT(help, Contains("COMPLEX")); - EXPECT_DOUBLE_EQ(1, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(1)); + CHECK(comp.imag() == Approx(2)); run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); } -TEST_F(TApp, ComplexFloat) { +TEST_CASE_METHOD(TApp, "ComplexFloat", "[newparse]") { std::complex<float> comp{1, 2}; app.add_complex<std::complex<float>, float>("-c,--complex", comp, "", true); args = {"-c", "4", "3"}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("2")); - EXPECT_THAT(help, HasSubstr("COMPLEX")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("2")); + CHECK_THAT(help, Contains("COMPLEX")); - EXPECT_FLOAT_EQ(1, comp.real()); - EXPECT_FLOAT_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(1)); + CHECK(comp.imag() == Approx(2)); run(); - EXPECT_FLOAT_EQ(4, comp.real()); - EXPECT_FLOAT_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); } -TEST_F(TApp, ComplexFloatOption) { +TEST_CASE_METHOD(TApp, "ComplexFloatOption", "[newparse]") { std::complex<float> comp{1, 2}; app.add_option("-c,--complex", comp, "", true); args = {"-c", "4", "3"}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("2")); - EXPECT_THAT(help, HasSubstr("COMPLEX")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("2")); + CHECK_THAT(help, Contains("COMPLEX")); - EXPECT_FLOAT_EQ(1, comp.real()); - EXPECT_FLOAT_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(1)); + CHECK(comp.imag() == Approx(2)); run(); - EXPECT_FLOAT_EQ(4, comp.real()); - EXPECT_FLOAT_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); } -TEST_F(TApp, ComplexWithDelimiter) { +TEST_CASE_METHOD(TApp, "ComplexWithDelimiter", "[newparse]") { cx comp{1, 2}; app.add_complex("-c,--complex", comp, "", true)->delimiter('+'); args = {"-c", "4+3i"}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("2")); - EXPECT_THAT(help, HasSubstr("COMPLEX")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("2")); + CHECK_THAT(help, Contains("COMPLEX")); - EXPECT_DOUBLE_EQ(1, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(1)); + CHECK(comp.imag() == Approx(2)); run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); args = {"-c", "5+-3i"}; run(); - EXPECT_DOUBLE_EQ(5, comp.real()); - EXPECT_DOUBLE_EQ(-3, comp.imag()); + CHECK(comp.real() == Approx(5)); + CHECK(comp.imag() == Approx(-3)); args = {"-c", "6", "-4i"}; run(); - EXPECT_DOUBLE_EQ(6, comp.real()); - EXPECT_DOUBLE_EQ(-4, comp.imag()); + CHECK(comp.real() == Approx(6)); + CHECK(comp.imag() == Approx(-4)); } -TEST_F(TApp, ComplexWithDelimiterOption) { +TEST_CASE_METHOD(TApp, "ComplexWithDelimiterOption", "[newparse]") { cx comp{1, 2}; app.add_option("-c,--complex", comp, "", true)->delimiter('+'); args = {"-c", "4+3i"}; std::string help = app.help(); - EXPECT_THAT(help, HasSubstr("1")); - EXPECT_THAT(help, HasSubstr("2")); - EXPECT_THAT(help, HasSubstr("COMPLEX")); + CHECK_THAT(help, Contains("1")); + CHECK_THAT(help, Contains("2")); + CHECK_THAT(help, Contains("COMPLEX")); - EXPECT_DOUBLE_EQ(1, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(1)); + CHECK(comp.imag() == Approx(2)); run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); args = {"-c", "5+-3i"}; run(); - EXPECT_DOUBLE_EQ(5, comp.real()); - EXPECT_DOUBLE_EQ(-3, comp.imag()); + CHECK(comp.real() == Approx(5)); + CHECK(comp.imag() == Approx(-3)); args = {"-c", "6", "-4i"}; run(); - EXPECT_DOUBLE_EQ(6, comp.real()); - EXPECT_DOUBLE_EQ(-4, comp.imag()); + CHECK(comp.real() == Approx(6)); + CHECK(comp.imag() == Approx(-4)); } -TEST_F(TApp, ComplexIgnoreI) { +TEST_CASE_METHOD(TApp, "ComplexIgnoreI", "[newparse]") { cx comp{1, 2}; app.add_complex("-c,--complex", comp); @@ -165,11 +165,11 @@ TEST_F(TApp, ComplexIgnoreI) { run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); } -TEST_F(TApp, ComplexIgnoreIOption) { +TEST_CASE_METHOD(TApp, "ComplexIgnoreIOption", "[newparse]") { cx comp{1, 2}; app.add_option("-c,--complex", comp); @@ -177,122 +177,122 @@ TEST_F(TApp, ComplexIgnoreIOption) { run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(3, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(3)); } -TEST_F(TApp, ComplexSingleArg) { +TEST_CASE_METHOD(TApp, "ComplexSingleArg", "[newparse]") { cx comp{1, 2}; app.add_complex("-c,--complex", comp); args = {"-c", "4"}; run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(0, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(0)); args = {"-c", "4-2i"}; run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(-2, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(-2)); args = {"-c", "4+2i"}; run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(2)); args = {"-c", "-4+2j"}; run(); - EXPECT_DOUBLE_EQ(-4, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(-4)); + CHECK(comp.imag() == Approx(2)); args = {"-c", "-4.2-2j"}; run(); - EXPECT_DOUBLE_EQ(-4.2, comp.real()); - EXPECT_DOUBLE_EQ(-2, comp.imag()); + CHECK(comp.real() == Approx(-4.2)); + CHECK(comp.imag() == Approx(-2)); args = {"-c", "-4.2-2.7i"}; run(); - EXPECT_DOUBLE_EQ(-4.2, comp.real()); - EXPECT_DOUBLE_EQ(-2.7, comp.imag()); + CHECK(comp.real() == Approx(-4.2)); + CHECK(comp.imag() == Approx(-2.7)); } -TEST_F(TApp, ComplexSingleArgOption) { +TEST_CASE_METHOD(TApp, "ComplexSingleArgOption", "[newparse]") { cx comp{1, 2}; app.add_option("-c,--complex", comp); args = {"-c", "4"}; run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(0, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(0)); args = {"-c", "4-2i"}; run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(-2, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(-2)); args = {"-c", "4+2i"}; run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(2)); args = {"-c", "-4+2j"}; run(); - EXPECT_DOUBLE_EQ(-4, comp.real()); - EXPECT_DOUBLE_EQ(2, comp.imag()); + CHECK(comp.real() == Approx(-4)); + CHECK(comp.imag() == Approx(2)); args = {"-c", "-4.2-2j"}; run(); - EXPECT_DOUBLE_EQ(-4.2, comp.real()); - EXPECT_DOUBLE_EQ(-2, comp.imag()); + CHECK(comp.real() == Approx(-4.2)); + CHECK(comp.imag() == Approx(-2)); args = {"-c", "-4.2-2.7i"}; run(); - EXPECT_DOUBLE_EQ(-4.2, comp.real()); - EXPECT_DOUBLE_EQ(-2.7, comp.imag()); + CHECK(comp.real() == Approx(-4.2)); + CHECK(comp.imag() == Approx(-2.7)); } -TEST_F(TApp, ComplexSingleImag) { +TEST_CASE_METHOD(TApp, "ComplexSingleImag", "[newparse]") { cx comp{1, 2}; app.add_complex("-c,--complex", comp); args = {"-c", "4j"}; run(); - EXPECT_DOUBLE_EQ(0, comp.real()); - EXPECT_DOUBLE_EQ(4, comp.imag()); + CHECK(comp.real() == Approx(0)); + CHECK(comp.imag() == Approx(4)); args = {"-c", "-4j"}; run(); - EXPECT_DOUBLE_EQ(0, comp.real()); - EXPECT_DOUBLE_EQ(-4, comp.imag()); + CHECK(comp.real() == Approx(0)); + CHECK(comp.imag() == Approx(-4)); args = {"-c", "-4"}; run(); - EXPECT_DOUBLE_EQ(-4, comp.real()); - EXPECT_DOUBLE_EQ(0, comp.imag()); + CHECK(comp.real() == Approx(-4)); + CHECK(comp.imag() == Approx(0)); args = {"-c", "+4"}; run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(0, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(0)); } -TEST_F(TApp, ComplexSingleImagOption) { +TEST_CASE_METHOD(TApp, "ComplexSingleImagOption", "[newparse]") { cx comp{1, 2}; app.add_option("-c,--complex", comp); args = {"-c", "4j"}; run(); - EXPECT_DOUBLE_EQ(0, comp.real()); - EXPECT_DOUBLE_EQ(4, comp.imag()); + CHECK(comp.real() == Approx(0)); + CHECK(comp.imag() == Approx(4)); args = {"-c", "-4j"}; run(); - EXPECT_DOUBLE_EQ(0, comp.real()); - EXPECT_DOUBLE_EQ(-4, comp.imag()); + CHECK(comp.real() == Approx(0)); + CHECK(comp.imag() == Approx(-4)); args = {"-c", "-4"}; run(); - EXPECT_DOUBLE_EQ(-4, comp.real()); - EXPECT_DOUBLE_EQ(0, comp.imag()); + CHECK(comp.real() == Approx(-4)); + CHECK(comp.imag() == Approx(0)); args = {"-c", "+4"}; run(); - EXPECT_DOUBLE_EQ(4, comp.real()); - EXPECT_DOUBLE_EQ(0, comp.imag()); + CHECK(comp.real() == Approx(4)); + CHECK(comp.imag() == Approx(0)); } /// Simple class containing two strings useful for testing lexical cast and conversions @@ -321,24 +321,24 @@ template <> bool lexical_cast<spair>(const std::string &input, spair &output) { } // namespace detail } // namespace CLI -TEST_F(TApp, custom_string_converter) { +TEST_CASE_METHOD(TApp, "custom_string_converter", "[newparse]") { spair val; app.add_option("-d,--dual_string", val); args = {"-d", "string1:string2"}; run(); - EXPECT_EQ(val.first, "string1"); - EXPECT_EQ(val.second, "string2"); + CHECK("string1" == val.first); + CHECK("string2" == val.second); } -TEST_F(TApp, custom_string_converterFail) { +TEST_CASE_METHOD(TApp, "custom_string_converterFail", "[newparse]") { spair val; app.add_option("-d,--dual_string", val); args = {"-d", "string2"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } /// simple class to wrap another with a very specific type constructor and assignment operators to test out some of the @@ -369,14 +369,14 @@ static_assert(CLI::detail::is_direct_constructible<objWrapper<std::string>, std: static_assert(!std::is_assignable<objWrapper<std::string>, std::string>::value, "string wrapper is improperly assignable"); -TEST_F(TApp, stringWrapper) { +TEST_CASE_METHOD(TApp, "stringWrapper", "[newparse]") { objWrapper<std::string> sWrapper; app.add_option("-v", sWrapper); args = {"-v", "string test"}; run(); - EXPECT_EQ(sWrapper.value(), "string test"); + CHECK("string test" == sWrapper.value()); } static_assert(CLI::detail::is_direct_constructible<objWrapper<double>, double>::value, @@ -388,18 +388,18 @@ static_assert(!CLI::detail::is_direct_constructible<objWrapper<double>, int>::va static_assert(!CLI::detail::is_istreamable<objWrapper<double>>::value, "double wrapper is input streamable and it shouldn't be"); -TEST_F(TApp, doubleWrapper) { +TEST_CASE_METHOD(TApp, "doubleWrapper", "[newparse]") { objWrapper<double> dWrapper; app.add_option("-v", dWrapper); args = {"-v", "2.36"}; run(); - EXPECT_EQ(dWrapper.value(), 2.36); + CHECK(2.36 == dWrapper.value()); args = {"-v", "thing"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } static_assert(CLI::detail::is_direct_constructible<objWrapper<int>, int>::value, @@ -411,17 +411,17 @@ static_assert(!CLI::detail::is_direct_constructible<objWrapper<int>, double>::va static_assert(!CLI::detail::is_istreamable<objWrapper<int>>::value, "int wrapper is input streamable and it shouldn't be"); -TEST_F(TApp, intWrapper) { +TEST_CASE_METHOD(TApp, "intWrapper", "[newparse]") { objWrapper<int> iWrapper; app.add_option("-v", iWrapper); args = {"-v", "45"}; run(); - EXPECT_EQ(iWrapper.value(), 45); + CHECK(45 == iWrapper.value()); args = {"-v", "thing"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } static_assert(!CLI::detail::is_direct_constructible<objWrapper<float>, int>::value, @@ -432,17 +432,17 @@ static_assert(!CLI::detail::is_direct_constructible<objWrapper<float>, double>:: static_assert(!CLI::detail::is_istreamable<objWrapper<float>>::value, "float wrapper is input streamable and it shouldn't be"); -TEST_F(TApp, floatWrapper) { +TEST_CASE_METHOD(TApp, "floatWrapper", "[newparse]") { objWrapper<float> iWrapper; app.add_option<objWrapper<float>, float>("-v", iWrapper); args = {"-v", "45.3"}; run(); - EXPECT_EQ(iWrapper.value(), 45.3f); + CHECK(45.3f == iWrapper.value()); args = {"-v", "thing"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } #endif @@ -461,26 +461,26 @@ class dobjWrapper { int ival_{0}; }; -TEST_F(TApp, dobjWrapper) { +TEST_CASE_METHOD(TApp, "dobjWrapper", "[newparse]") { dobjWrapper iWrapper; app.add_option("-v", iWrapper); args = {"-v", "45"}; run(); - EXPECT_EQ(iWrapper.ivalue(), 45); - EXPECT_EQ(iWrapper.dvalue(), 0.0); + CHECK(45 == iWrapper.ivalue()); + CHECK(0.0 == iWrapper.dvalue()); args = {"-v", "thing"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); iWrapper = dobjWrapper{}; args = {"-v", "45.1"}; run(); - EXPECT_EQ(iWrapper.ivalue(), 0); - EXPECT_EQ(iWrapper.dvalue(), 45.1); + CHECK(0 == iWrapper.ivalue()); + CHECK(45.1 == iWrapper.dvalue()); } /// simple class to wrap another with a very specific type constructor and assignment operators to test out some of the @@ -507,25 +507,25 @@ template <class X> class AobjWrapper { static_assert(std::is_assignable<AobjWrapper<std::uint16_t> &, std::uint16_t>::value, "AobjWrapper not assignable like it should be "); -TEST_F(TApp, uint16Wrapper) { +TEST_CASE_METHOD(TApp, "uint16Wrapper", "[newparse]") { AobjWrapper<std::uint16_t> sWrapper; app.add_option<AobjWrapper<std::uint16_t>, std::uint16_t>("-v", sWrapper); args = {"-v", "9"}; run(); - EXPECT_EQ(sWrapper.value(), 9u); + CHECK(9u == sWrapper.value()); args = {"-v", "thing"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); args = {"-v", "72456245754"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); args = {"-v", "-3"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } template <class T> class SimpleWrapper { @@ -539,25 +539,25 @@ template <class T> class SimpleWrapper { T val_; }; -TEST_F(TApp, wrapperInt) { +TEST_CASE_METHOD(TApp, "wrapperInt", "[newparse]") { SimpleWrapper<int> wrap; app.add_option("--val", wrap); args = {"--val", "2"}; run(); - EXPECT_EQ(wrap.getRef(), 2); + CHECK(2 == wrap.getRef()); } -TEST_F(TApp, wrapperString) { +TEST_CASE_METHOD(TApp, "wrapperString", "[newparse]") { SimpleWrapper<std::string> wrap; app.add_option("--val", wrap); args = {"--val", "str"}; run(); - EXPECT_EQ(wrap.getRef(), "str"); + CHECK("str" == wrap.getRef()); } -TEST_F(TApp, wrapperVector) { +TEST_CASE_METHOD(TApp, "wrapperVector", "[newparse]") { SimpleWrapper<std::vector<int>> wrap; app.add_option("--val", wrap); args = {"--val", "1", "2", "3", "4"}; @@ -565,10 +565,10 @@ TEST_F(TApp, wrapperVector) { run(); auto v1 = wrap.getRef(); auto v2 = std::vector<int>{1, 2, 3, 4}; - EXPECT_EQ(v1, v2); + CHECK(v2 == v1); } -TEST_F(TApp, wrapperwrapperString) { +TEST_CASE_METHOD(TApp, "wrapperwrapperString", "[newparse]") { SimpleWrapper<SimpleWrapper<std::string>> wrap; app.add_option("--val", wrap); args = {"--val", "arg"}; @@ -576,10 +576,10 @@ TEST_F(TApp, wrapperwrapperString) { run(); auto v1 = wrap.getRef().getRef(); auto v2 = "arg"; - EXPECT_EQ(v1, v2); + CHECK(v2 == v1); } -TEST_F(TApp, wrapperwrapperVector) { +TEST_CASE_METHOD(TApp, "wrapperwrapperVector", "[newparse]") { SimpleWrapper<SimpleWrapper<std::vector<int>>> wrap; auto opt = app.add_option("--val", wrap); args = {"--val", "1", "2", "3", "4"}; @@ -587,20 +587,20 @@ TEST_F(TApp, wrapperwrapperVector) { run(); auto v1 = wrap.getRef().getRef(); auto v2 = std::vector<int>{1, 2, 3, 4}; - EXPECT_EQ(v1, v2); + CHECK(v2 == v1); opt->type_size(0, 5); args = {"--val"}; run(); - EXPECT_TRUE(wrap.getRef().getRef().empty()); + CHECK(wrap.getRef().getRef().empty()); args = {"--val", "happy", "sad"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, wrapperComplex) { +TEST_CASE_METHOD(TApp, "wrapperComplex", "[newparse]") { SimpleWrapper<std::complex<double>> wrap; app.add_option("--val", wrap); args = {"--val", "1", "2"}; @@ -608,26 +608,26 @@ TEST_F(TApp, wrapperComplex) { run(); auto &v1 = wrap.getRef(); auto v2 = std::complex<double>{1, 2}; - EXPECT_EQ(v1.real(), v2.real()); - EXPECT_EQ(v1.imag(), v2.imag()); + CHECK(v2.real() == v1.real()); + CHECK(v2.imag() == v1.imag()); args = {"--val", "1.4-4j"}; run(); v2 = std::complex<double>{1.4, -4}; - EXPECT_EQ(v1.real(), v2.real()); - EXPECT_EQ(v1.imag(), v2.imag()); + CHECK(v2.real() == v1.real()); + CHECK(v2.imag() == v1.imag()); } -TEST_F(TApp, vectorComplex) { +TEST_CASE_METHOD(TApp, "vectorComplex", "[newparse]") { std::vector<std::complex<double>> vcomplex; app.add_option("--val", vcomplex); args = {"--val", "1", "2", "--val", "1.4-4j"}; run(); - ASSERT_EQ(vcomplex.size(), 2U); - EXPECT_EQ(vcomplex[0].real(), 1.0); - EXPECT_EQ(vcomplex[0].imag(), 2.0); - EXPECT_EQ(vcomplex[1].real(), 1.4); - EXPECT_EQ(vcomplex[1].imag(), -4.0); + REQUIRE(2U == vcomplex.size()); + CHECK(1.0 == vcomplex[0].real()); + CHECK(2.0 == vcomplex[0].imag()); + CHECK(1.4 == vcomplex[1].real()); + CHECK(-4.0 == vcomplex[1].imag()); } diff --git a/packages/CLI11/tests/OptionGroupTest.cpp b/packages/CLI11/tests/OptionGroupTest.cpp index e510c1a390847b0494e60f7608a9b6a85ab073bd..175f31dc3daf5de4da456c7a304a235d31aec42e 100644 --- a/packages/CLI11/tests/OptionGroupTest.cpp +++ b/packages/CLI11/tests/OptionGroupTest.cpp @@ -6,15 +6,11 @@ #include "app_helper.hpp" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using ::testing::HasSubstr; -using ::testing::Not; +using Catch::Matchers::Contains; using vs_t = std::vector<std::string>; -TEST_F(TApp, BasicOptionGroup) { +TEST_CASE_METHOD(TApp, "BasicOptionGroup", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res; ogroup->add_option("--test1", res); @@ -23,11 +19,11 @@ TEST_F(TApp, BasicOptionGroup) { args = {"--test1", "5"}; run(); - EXPECT_EQ(res, 5); - EXPECT_EQ(app.count_all(), 1u); + CHECK(5 == res); + CHECK(1u == app.count_all()); } -TEST_F(TApp, BasicOptionGroupExact) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupExact", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -38,20 +34,20 @@ TEST_F(TApp, BasicOptionGroupExact) { ogroup->require_option(1); args = {"--test1", "5"}; run(); - EXPECT_EQ(res, 5); + CHECK(5 == res); args = {"--test1", "5", "--test2", "4"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); std::string help = ogroup->help(); auto exactloc = help.find("[Exactly 1"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupExactTooMany) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupExactTooMany", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -61,10 +57,10 @@ TEST_F(TApp, BasicOptionGroupExactTooMany) { app.add_option("--option", val2); ogroup->require_option(10); args = {"--test1", "5"}; - EXPECT_THROW(run(), CLI::InvalidError); + CHECK_THROWS_AS(run(), CLI::InvalidError); } -TEST_F(TApp, BasicOptionGroupMinMax) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMinMax", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -75,20 +71,20 @@ TEST_F(TApp, BasicOptionGroupMinMax) { ogroup->require_option(1, 1); args = {"--test1", "5"}; run(); - EXPECT_EQ(res, 5); + CHECK(5 == res); args = {"--test1", "5", "--test2", "4"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); std::string help = ogroup->help(); auto exactloc = help.find("[Exactly 1"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupMinMaxDifferent) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMinMaxDifferent", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -99,24 +95,24 @@ TEST_F(TApp, BasicOptionGroupMinMaxDifferent) { ogroup->require_option(1, 2); args = {"--test1", "5"}; run(); - EXPECT_EQ(res, 5); + CHECK(5 == res); args = {"--test1", "5", "--test2", "4"}; - EXPECT_NO_THROW(run()); - EXPECT_EQ(app.count_all(), 2u); + CHECK_NOTHROW(run()); + CHECK(2u == app.count_all()); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--test1", "5", "--test2", "4", "--test3=5"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); std::string help = ogroup->help(); auto exactloc = help.find("[Between 1 and 2"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupMinMaxDifferentReversed) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMinMaxDifferentReversed", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -125,30 +121,30 @@ TEST_F(TApp, BasicOptionGroupMinMaxDifferentReversed) { int val2{0}; app.add_option("--option", val2); ogroup->require_option(2, 1); - EXPECT_EQ(ogroup->get_require_option_min(), 2u); - EXPECT_EQ(ogroup->get_require_option_max(), 1u); + CHECK(2u == ogroup->get_require_option_min()); + CHECK(1u == ogroup->get_require_option_max()); args = {"--test1", "5"}; - EXPECT_THROW(run(), CLI::InvalidError); + CHECK_THROWS_AS(run(), CLI::InvalidError); ogroup->require_option(1, 2); - EXPECT_NO_THROW(run()); - EXPECT_EQ(res, 5); - EXPECT_EQ(ogroup->get_require_option_min(), 1u); - EXPECT_EQ(ogroup->get_require_option_max(), 2u); + CHECK_NOTHROW(run()); + CHECK(5 == res); + CHECK(1u == ogroup->get_require_option_min()); + CHECK(2u == ogroup->get_require_option_max()); args = {"--test1", "5", "--test2", "4"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--test1", "5", "--test2", "4", "--test3=5"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); std::string help = ogroup->help(); auto exactloc = help.find("[Between 1 and 2"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupMax) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMax", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -159,20 +155,20 @@ TEST_F(TApp, BasicOptionGroupMax) { ogroup->require_option(-2); args = {"--test1", "5"}; run(); - EXPECT_EQ(res, 5); + CHECK(5 == res); args = {"--option", "9"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"--test1", "5", "--test2", "4", "--test3=5"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); std::string help = ogroup->help(); auto exactloc = help.find("[At most 2"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupMax1) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMax1", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -183,20 +179,20 @@ TEST_F(TApp, BasicOptionGroupMax1) { ogroup->require_option(-1); args = {"--test1", "5"}; run(); - EXPECT_EQ(res, 5); + CHECK(5 == res); args = {"--option", "9"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"--test1", "5", "--test2", "4"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); std::string help = ogroup->help(); auto exactloc = help.find("[At most 1"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupMin) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMin", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -207,17 +203,17 @@ TEST_F(TApp, BasicOptionGroupMin) { ogroup->require_option(); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--test1", "5", "--test2", "4", "--test3=5"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); std::string help = ogroup->help(); auto exactloc = help.find("[At least 1"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupExact2) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupExact2", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -228,20 +224,20 @@ TEST_F(TApp, BasicOptionGroupExact2) { ogroup->require_option(2); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--test1", "5", "--test2", "4", "--test3=5"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--test1", "5", "--test3=5"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); std::string help = ogroup->help(); auto exactloc = help.find("[Exactly 2"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupMin2) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMin2", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); int res{0}; ogroup->add_option("--test1", res); @@ -252,17 +248,17 @@ TEST_F(TApp, BasicOptionGroupMin2) { ogroup->require_option(2, 0); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--test1", "5", "--test2", "4", "--test3=5"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); std::string help = ogroup->help(); auto exactloc = help.find("[At least 2"); - EXPECT_NE(exactloc, std::string::npos); + CHECK(std::string::npos != exactloc); } -TEST_F(TApp, BasicOptionGroupMinMoved) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMinMoved", "[optiongroup]") { int res{0}; auto opt1 = app.add_option("--test1", res); @@ -278,20 +274,20 @@ TEST_F(TApp, BasicOptionGroupMinMoved) { ogroup->add_option(opt3); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--test1", "5", "--test2", "4", "--test3=5"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); std::string help = app.help(); auto exactloc = help.find("[At least 1"); auto oloc = help.find("--test1"); - EXPECT_NE(exactloc, std::string::npos); - EXPECT_NE(oloc, std::string::npos); - EXPECT_LT(exactloc, oloc); + CHECK(std::string::npos != exactloc); + CHECK(std::string::npos != oloc); + CHECK(oloc > exactloc); } -TEST_F(TApp, BasicOptionGroupMinMovedAsGroup) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupMinMovedAsGroup", "[optiongroup]") { int res{0}; auto opt1 = app.add_option("--test1", res); @@ -304,22 +300,22 @@ TEST_F(TApp, BasicOptionGroupMinMovedAsGroup) { ogroup->require_option(); ogroup->add_options(opt1, opt2, opt3); - EXPECT_THROW(ogroup->add_options(opt1), CLI::OptionNotFound); + CHECK_THROWS_AS(ogroup->add_options(opt1), CLI::OptionNotFound); args = {"--option", "9"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--test1", "5", "--test2", "4", "--test3=5"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); std::string help = app.help(); auto exactloc = help.find("[At least 1"); auto oloc = help.find("--test1"); - EXPECT_NE(exactloc, std::string::npos); - EXPECT_NE(oloc, std::string::npos); - EXPECT_LT(exactloc, oloc); + CHECK(std::string::npos != exactloc); + CHECK(std::string::npos != oloc); + CHECK(oloc > exactloc); } -TEST_F(TApp, BasicOptionGroupAddFailures) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupAddFailures", "[optiongroup]") { int res{0}; auto opt1 = app.add_option("--test1", res); @@ -328,24 +324,24 @@ TEST_F(TApp, BasicOptionGroupAddFailures) { app.add_option("--option", val2); auto ogroup = app.add_option_group("clusters"); - EXPECT_THROW(ogroup->add_options(app.get_config_ptr()), CLI::OptionAlreadyAdded); - EXPECT_THROW(ogroup->add_options(app.get_help_ptr()), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(ogroup->add_options(app.get_config_ptr()), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(ogroup->add_options(app.get_help_ptr()), CLI::OptionAlreadyAdded); auto sub = app.add_subcommand("sub", "subcommand"); auto opt2 = sub->add_option("--option2", val2); - EXPECT_THROW(ogroup->add_option(opt2), CLI::OptionNotFound); + CHECK_THROWS_AS(ogroup->add_option(opt2), CLI::OptionNotFound); - EXPECT_THROW(ogroup->add_options(nullptr), CLI::OptionNotFound); + CHECK_THROWS_AS(ogroup->add_options(nullptr), CLI::OptionNotFound); ogroup->add_option(opt1); auto opt3 = app.add_option("--test1", res); - EXPECT_THROW(ogroup->add_option(opt3), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(ogroup->add_option(opt3), CLI::OptionAlreadyAdded); } -TEST_F(TApp, BasicOptionGroupScrewedUpMove) { +TEST_CASE_METHOD(TApp, "BasicOptionGroupScrewedUpMove", "[optiongroup]") { int res{0}; auto opt1 = app.add_option("--test1", res); @@ -356,25 +352,25 @@ TEST_F(TApp, BasicOptionGroupScrewedUpMove) { auto ogroup = app.add_option_group("clusters"); ogroup->require_option(); auto ogroup2 = ogroup->add_option_group("clusters2"); - EXPECT_THROW(ogroup2->add_options(opt1, opt2), CLI::OptionNotFound); + CHECK_THROWS_AS(ogroup2->add_options(opt1, opt2), CLI::OptionNotFound); CLI::Option_group EmptyGroup("description", "new group", nullptr); - EXPECT_THROW(EmptyGroup.add_option(opt2), CLI::OptionNotFound); - EXPECT_THROW(app._move_option(opt2, ogroup2), CLI::OptionNotFound); + CHECK_THROWS_AS(EmptyGroup.add_option(opt2), CLI::OptionNotFound); + CHECK_THROWS_AS(app._move_option(opt2, ogroup2), CLI::OptionNotFound); } -TEST_F(TApp, InvalidOptions) { +TEST_CASE_METHOD(TApp, "InvalidOptions", "[optiongroup]") { auto ogroup = app.add_option_group("clusters"); CLI::Option *opt = nullptr; - EXPECT_THROW(ogroup->excludes(opt), CLI::OptionNotFound); + CHECK_THROWS_AS(ogroup->excludes(opt), CLI::OptionNotFound); CLI::App *app_p = nullptr; - EXPECT_THROW(ogroup->excludes(app_p), CLI::OptionNotFound); - EXPECT_THROW(ogroup->excludes(ogroup), CLI::OptionNotFound); - EXPECT_THROW(ogroup->add_option(opt), CLI::OptionNotFound); + CHECK_THROWS_AS(ogroup->excludes(app_p), CLI::OptionNotFound); + CHECK_THROWS_AS(ogroup->excludes(ogroup), CLI::OptionNotFound); + CHECK_THROWS_AS(ogroup->add_option(opt), CLI::OptionNotFound); } -TEST_F(TApp, OptionGroupInheritedOptionDefaults) { +TEST_CASE_METHOD(TApp, "OptionGroupInheritedOptionDefaults", "[optiongroup]") { app.option_defaults()->ignore_case(); auto ogroup = app.add_option_group("clusters"); int res{0}; @@ -382,8 +378,8 @@ TEST_F(TApp, OptionGroupInheritedOptionDefaults) { args = {"--Test1", "5"}; run(); - EXPECT_EQ(res, 5); - EXPECT_EQ(app.count_all(), 1u); + CHECK(5 == res); + CHECK(1u == app.count_all()); } struct ManyGroups : public TApp { @@ -425,48 +421,48 @@ struct ManyGroups : public TApp { } }; -TEST_F(ManyGroups, SingleGroup) { +TEST_CASE_METHOD(ManyGroups, "SingleGroup", "[optiongroup]") { // only 1 group can be used main->require_option(1); args = {"--name1", "test"}; run(); - EXPECT_EQ(name1, "test"); + CHECK("test" == name1); args = {"--name2", "test", "--val2", "tval"}; run(); - EXPECT_EQ(val2, "tval"); + CHECK("tval" == val2); args = {"--name1", "test", "--val2", "tval"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } -TEST_F(ManyGroups, ExcludesGroup) { +TEST_CASE_METHOD(ManyGroups, "ExcludesGroup", "[optiongroup]") { // only 1 group can be used g1->excludes(g2); g1->excludes(g3); args = {"--name1", "test"}; run(); - EXPECT_EQ(name1, "test"); + CHECK("test" == name1); args = {"--name1", "test", "--name2", "test2"}; - EXPECT_THROW(run(), CLI::ExcludesError); + CHECK_THROWS_AS(run(), CLI::ExcludesError); - EXPECT_TRUE(g1->remove_excludes(g2)); - EXPECT_NO_THROW(run()); - EXPECT_FALSE(g1->remove_excludes(g1)); - EXPECT_FALSE(g1->remove_excludes(g2)); + CHECK(g1->remove_excludes(g2)); + CHECK_NOTHROW(run()); + CHECK(!g1->remove_excludes(g1)); + CHECK(!g1->remove_excludes(g2)); } -TEST_F(ManyGroups, NeedsGroup) { +TEST_CASE_METHOD(ManyGroups, "NeedsGroup", "[optiongroup]") { remove_required(); // all groups needed if g1 is used g1->needs(g2); g1->needs(g3); args = {"--name1", "test"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); // other groups should run fine args = {"--name2", "test2"}; @@ -474,11 +470,11 @@ TEST_F(ManyGroups, NeedsGroup) { // all three groups should be fine args = {"--name1", "test", "--name2", "test2", "--name3", "test3"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } // test adding an option group with existing subcommands to an app -TEST_F(TApp, ExistingSubcommandMatch) { +TEST_CASE_METHOD(TApp, "ExistingSubcommandMatch", "[optiongroup]") { auto sshared = std::make_shared<CLI::Option_group>("documenting the subcommand", "sub1g", nullptr); auto s1 = sshared->add_subcommand("sub1"); auto o1 = sshared->add_option_group("opt1"); @@ -489,9 +485,9 @@ TEST_F(TApp, ExistingSubcommandMatch) { try { app.add_subcommand(sshared); // this should throw the next line should never be reached - EXPECT_FALSE(true); + CHECK(!true); } catch(const CLI::OptionAlreadyAdded &oaa) { - EXPECT_THAT(oaa.what(), HasSubstr("sub1")); + CHECK_THAT(oaa.what(), Contains("sub1")); } sshared->remove_subcommand(s1); @@ -500,40 +496,40 @@ TEST_F(TApp, ExistingSubcommandMatch) { try { app.add_subcommand(sshared); // this should throw the next line should never be reached - EXPECT_FALSE(true); + CHECK(!true); } catch(const CLI::OptionAlreadyAdded &oaa) { - EXPECT_THAT(oaa.what(), HasSubstr("sub3")); + CHECK_THAT(oaa.what(), Contains("sub3")); } } -TEST_F(ManyGroups, SingleGroupError) { +TEST_CASE_METHOD(ManyGroups, "SingleGroupError", "[optiongroup]") { // only 1 group can be used main->require_option(1); args = {"--name1", "test", "--name2", "test3"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } -TEST_F(ManyGroups, AtMostOneGroup) { +TEST_CASE_METHOD(ManyGroups, "AtMostOneGroup", "[optiongroup]") { // only 1 group can be used main->require_option(0, 1); args = {"--name1", "test", "--name2", "test3"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(ManyGroups, AtLeastTwoGroups) { +TEST_CASE_METHOD(ManyGroups, "AtLeastTwoGroups", "[optiongroup]") { // only 1 group can be used main->require_option(2, 0); args = {"--name1", "test", "--name2", "test3"}; run(); args = {"--name1", "test"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } -TEST_F(ManyGroups, BetweenOneAndTwoGroups) { +TEST_CASE_METHOD(ManyGroups, "BetweenOneAndTwoGroups", "[optiongroup]") { // only 1 group can be used main->require_option(1, 2); args = {"--name1", "test", "--name2", "test3"}; @@ -543,19 +539,19 @@ TEST_F(ManyGroups, BetweenOneAndTwoGroups) { run(); args = {}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"--name1", "test", "--name2", "test3", "--name3=test3"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } -TEST_F(ManyGroups, RequiredFirst) { +TEST_CASE_METHOD(ManyGroups, "RequiredFirst", "[optiongroup]") { // only 1 group can be used remove_required(); g1->required(); - EXPECT_TRUE(g1->get_required()); - EXPECT_FALSE(g2->get_required()); + CHECK(g1->get_required()); + CHECK(!g2->get_required()); args = {"--name1", "test", "--name2", "test3"}; run(); @@ -563,32 +559,32 @@ TEST_F(ManyGroups, RequiredFirst) { try { run(); } catch(const CLI::RequiredError &re) { - EXPECT_THAT(re.what(), HasSubstr("g1")); + CHECK_THAT(re.what(), Contains("g1")); } args = {"--name1", "test", "--name2", "test3", "--name3=test3"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(ManyGroups, DisableFirst) { +TEST_CASE_METHOD(ManyGroups, "DisableFirst", "[optiongroup]") { // only 1 group can be used if remove_required not used remove_required(); g1->disabled(); - EXPECT_TRUE(g1->get_disabled()); - EXPECT_FALSE(g2->get_disabled()); + CHECK(g1->get_disabled()); + CHECK(!g2->get_disabled()); args = {"--name2", "test"}; run(); args = {"--name1", "test", "--name2", "test3"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); g1->disabled(false); args = {"--name1", "test", "--name2", "test3", "--name3=test3"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(ManyGroups, SameSubcommand) { +TEST_CASE_METHOD(ManyGroups, "SameSubcommand", "[optiongroup]") { // only 1 group can be used if remove_required not used remove_required(); auto sub1 = g1->add_subcommand("sub1")->disabled(); @@ -602,30 +598,30 @@ TEST_F(ManyGroups, SameSubcommand) { run(); - EXPECT_TRUE(*sub1); - EXPECT_TRUE(*sub2); - EXPECT_TRUE(*sub3); + CHECK(*sub1); + CHECK(*sub2); + CHECK(*sub3); auto subs = app.get_subcommands(); - EXPECT_EQ(subs.size(), 3u); - EXPECT_EQ(subs[0], sub1); - EXPECT_EQ(subs[1], sub2); - EXPECT_EQ(subs[2], sub3); + CHECK(3u == subs.size()); + CHECK(sub1 == subs[0]); + CHECK(sub2 == subs[1]); + CHECK(sub3 == subs[2]); args = {"sub1", "sub1", "sub1", "sub1"}; // for the 4th and future ones they will route to the first one run(); - EXPECT_EQ(sub1->count(), 2u); - EXPECT_EQ(sub2->count(), 1u); - EXPECT_EQ(sub3->count(), 1u); + CHECK(2u == sub1->count()); + CHECK(1u == sub2->count()); + CHECK(1u == sub3->count()); // subs should remain the same since the duplicate would not be registered there subs = app.get_subcommands(); - EXPECT_EQ(subs.size(), 3u); - EXPECT_EQ(subs[0], sub1); - EXPECT_EQ(subs[1], sub2); - EXPECT_EQ(subs[2], sub3); + CHECK(3u == subs.size()); + CHECK(sub1 == subs[0]); + CHECK(sub2 == subs[1]); + CHECK(sub3 == subs[2]); } -TEST_F(ManyGroups, CallbackOrder) { +TEST_CASE_METHOD(ManyGroups, "CallbackOrder", "[optiongroup]") { // only 1 group can be used if remove_required not used remove_required(); std::vector<int> callback_order; @@ -635,61 +631,61 @@ TEST_F(ManyGroups, CallbackOrder) { args = {"--name2", "test"}; run(); - EXPECT_EQ(callback_order, std::vector<int>({2, 3})); + CHECK(std::vector<int>({2, 3}) == callback_order); callback_order.clear(); args = {"--name1", "t2", "--name2", "test"}; g2->immediate_callback(); run(); - EXPECT_EQ(callback_order, std::vector<int>({2, 1, 3})); + CHECK(std::vector<int>({2, 1, 3}) == callback_order); callback_order.clear(); args = {"--name2", "test", "--name1", "t2"}; g2->immediate_callback(false); run(); - EXPECT_EQ(callback_order, std::vector<int>({1, 2, 3})); + CHECK(std::vector<int>({1, 2, 3}) == callback_order); } // Test the fallthrough for extra arguments -TEST_F(ManyGroups, ExtrasFallDown) { +TEST_CASE_METHOD(ManyGroups, "ExtrasFallDown", "[optiongroup]") { // only 1 group can be used if remove_required not used remove_required(); args = {"--test1", "--flag", "extra"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); main->allow_extras(); - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); - EXPECT_EQ(app.remaining_size(true), 3u); - EXPECT_EQ(main->remaining_size(), 3u); + CHECK(3u == app.remaining_size(true)); + CHECK(3u == main->remaining_size()); std::vector<std::string> extras{"--test1", "--flag", "extra"}; - EXPECT_EQ(app.remaining(true), extras); - EXPECT_EQ(main->remaining(), extras); + CHECK(extras == app.remaining(true)); + CHECK(extras == main->remaining()); } // Test the option Inheritance -TEST_F(ManyGroups, Inheritance) { +TEST_CASE_METHOD(ManyGroups, "Inheritance", "[optiongroup]") { remove_required(); g1->ignore_case(); g1->ignore_underscore(); auto t2 = g1->add_subcommand("t2"); args = {"T2", "t_2"}; - EXPECT_TRUE(t2->get_ignore_underscore()); - EXPECT_TRUE(t2->get_ignore_case()); + CHECK(t2->get_ignore_underscore()); + CHECK(t2->get_ignore_case()); run(); - EXPECT_EQ(t2->count(), 2u); + CHECK(2u == t2->count()); } -TEST_F(ManyGroups, Moving) { +TEST_CASE_METHOD(ManyGroups, "Moving", "[optiongroup]") { remove_required(); auto mg = app.add_option_group("maing"); mg->add_subcommand(g1); mg->add_subcommand(g2); - EXPECT_EQ(g1->get_parent(), mg); - EXPECT_EQ(g2->get_parent(), mg); - EXPECT_EQ(g3->get_parent(), main); + CHECK(mg == g1->get_parent()); + CHECK(mg == g2->get_parent()); + CHECK(main == g3->get_parent()); } struct ManyGroupsPreTrigger : public ManyGroups { @@ -704,35 +700,35 @@ struct ManyGroupsPreTrigger : public ManyGroups { } }; -TEST_F(ManyGroupsPreTrigger, PreTriggerTestsOptions) { +TEST_CASE_METHOD(ManyGroupsPreTrigger, "PreTriggerTestsOptions", "[optiongroup]") { args = {"--name1", "test", "--name2", "test3"}; run(); - EXPECT_EQ(triggerMain, 4u); - EXPECT_EQ(trigger1, 2u); - EXPECT_EQ(trigger2, 0u); - EXPECT_EQ(trigger3, 27u); + CHECK(4u == triggerMain); + CHECK(2u == trigger1); + CHECK(0u == trigger2); + CHECK(27u == trigger3); args = {"--name1", "test"}; trigger2 = 34u; run(); - EXPECT_EQ(triggerMain, 2u); - EXPECT_EQ(trigger1, 0u); - EXPECT_EQ(trigger2, 34u); + CHECK(2u == triggerMain); + CHECK(0u == trigger1); + CHECK(34u == trigger2); args = {}; run(); - EXPECT_EQ(triggerMain, 0u); + CHECK(0u == triggerMain); args = {"--name1", "test", "--val1", "45", "--name2", "test3", "--name3=test3", "--val2=37"}; run(); - EXPECT_EQ(triggerMain, 8u); - EXPECT_EQ(trigger1, 6u); - EXPECT_EQ(trigger2, 2u); - EXPECT_EQ(trigger3, 1u); + CHECK(8u == triggerMain); + CHECK(6u == trigger1); + CHECK(2u == trigger2); + CHECK(1u == trigger3); } -TEST_F(ManyGroupsPreTrigger, PreTriggerTestsPositionals) { +TEST_CASE_METHOD(ManyGroupsPreTrigger, "PreTriggerTestsPositionals", "[optiongroup]") { // only 1 group can be used g1->add_option("pos1"); g2->add_option("pos2"); @@ -740,26 +736,26 @@ TEST_F(ManyGroupsPreTrigger, PreTriggerTestsPositionals) { args = {"pos1"}; run(); - EXPECT_EQ(triggerMain, 1u); - EXPECT_EQ(trigger1, 0u); - EXPECT_EQ(trigger2, 34u); - EXPECT_EQ(trigger3, 27u); + CHECK(1u == triggerMain); + CHECK(0u == trigger1); + CHECK(34u == trigger2); + CHECK(27u == trigger3); args = {"pos1", "pos2"}; run(); - EXPECT_EQ(triggerMain, 2u); - EXPECT_EQ(trigger1, 1u); - EXPECT_EQ(trigger2, 0u); + CHECK(2u == triggerMain); + CHECK(1u == trigger1); + CHECK(0u == trigger2); args = {"pos1", "pos2", "pos3"}; run(); - EXPECT_EQ(triggerMain, 3u); - EXPECT_EQ(trigger1, 2u); - EXPECT_EQ(trigger2, 1u); - EXPECT_EQ(trigger3, 0u); + CHECK(3u == triggerMain); + CHECK(2u == trigger1); + CHECK(1u == trigger2); + CHECK(0u == trigger3); } -TEST_F(ManyGroupsPreTrigger, PreTriggerTestsSubcommand) { +TEST_CASE_METHOD(ManyGroupsPreTrigger, "PreTriggerTestsSubcommand", "[optiongroup]") { auto sub1 = g1->add_subcommand("sub1")->fallthrough(); g2->add_subcommand("sub2")->fallthrough(); @@ -769,23 +765,23 @@ TEST_F(ManyGroupsPreTrigger, PreTriggerTestsSubcommand) { sub1->preparse_callback([&subtrigger](std::size_t count) { subtrigger = count; }); args = {"sub1"}; run(); - EXPECT_EQ(triggerMain, 1u); - EXPECT_EQ(trigger1, 0u); - EXPECT_EQ(trigger2, 34u); - EXPECT_EQ(trigger3, 27u); + CHECK(1u == triggerMain); + CHECK(0u == trigger1); + CHECK(34u == trigger2); + CHECK(27u == trigger3); args = {"sub1", "sub2"}; run(); - EXPECT_EQ(triggerMain, 2u); - EXPECT_EQ(subtrigger, 1u); - EXPECT_EQ(trigger1, 1u); - EXPECT_EQ(trigger2, 0u); + CHECK(2u == triggerMain); + CHECK(1u == subtrigger); + CHECK(1u == trigger1); + CHECK(0u == trigger2); args = {"sub2", "sub3", "--name1=test", "sub1"}; run(); - EXPECT_EQ(triggerMain, 4u); - EXPECT_EQ(trigger1, 1u); - EXPECT_EQ(trigger2, 3u); - EXPECT_EQ(trigger3, 1u); // processes the first argument in group3 which includes the entire subcommand, which will - // go until the sub1 command is given + CHECK(4u == triggerMain); + CHECK(1u == trigger1); + CHECK(3u == trigger2); + CHECK(1u == trigger3); + // go until the sub1 command is given } diff --git a/packages/CLI11/tests/OptionTypeTest.cpp b/packages/CLI11/tests/OptionTypeTest.cpp index bbf71fdfb4a962984169895e6557b45fd2bc4805..a2f893bec598d56cbf17bd76c1de5c62b83eb19d 100644 --- a/packages/CLI11/tests/OptionTypeTest.cpp +++ b/packages/CLI11/tests/OptionTypeTest.cpp @@ -19,46 +19,44 @@ #include <unordered_set> #include <vector> -#include "gmock/gmock.h" - -TEST_F(TApp, OneStringAgain) { +TEST_CASE_METHOD(TApp, "OneStringAgain", "[optiontype]") { std::string str; app.add_option("-s,--string", str); args = {"--string", "mystring"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "mystring"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("mystring" == str); } -TEST_F(TApp, OneStringFunction) { +TEST_CASE_METHOD(TApp, "OneStringFunction", "[optiontype]") { std::string str; app.add_option_function<std::string>("-s,--string", [&str](const std::string &val) { str = val; }); args = {"--string", "mystring"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--string")); - EXPECT_EQ(str, "mystring"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--string") == 1u); + CHECK("mystring" == str); } -TEST_F(TApp, doubleFunction) { +TEST_CASE_METHOD(TApp, "doubleFunction", "[optiontype]") { double res{0.0}; app.add_option_function<double>("--val", [&res](double val) { res = std::abs(val + 54); }); args = {"--val", "-354.356"}; run(); - EXPECT_EQ(res, 300.356); + CHECK(300.356 == res); // get the original value as entered as an integer - EXPECT_EQ(app["--val"]->as<float>(), -354.356f); + CHECK(-354.356f == app["--val"]->as<float>()); } -TEST_F(TApp, doubleFunctionFail) { +TEST_CASE_METHOD(TApp, "doubleFunctionFail", "[optiontype]") { double res; app.add_option_function<double>("--val", [&res](double val) { res = std::abs(val + 54); }); args = {"--val", "not_double"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, doubleVectorFunction) { +TEST_CASE_METHOD(TApp, "doubleVectorFunction", "[optiontype]") { std::vector<double> res; app.add_option_function<std::vector<double>>("--val", [&res](const std::vector<double> &val) { res = val; @@ -66,12 +64,12 @@ TEST_F(TApp, doubleVectorFunction) { }); args = {"--val", "5", "--val", "6", "--val", "7"}; run(); - EXPECT_EQ(res.size(), 3u); - EXPECT_EQ(res[0], 10.0); - EXPECT_EQ(res[2], 12.0); + CHECK(3u == res.size()); + CHECK(10.0 == res[0]); + CHECK(12.0 == res[2]); } -TEST_F(TApp, doubleVectorFunctionFail) { +TEST_CASE_METHOD(TApp, "doubleVectorFunctionFail", "[optiontype]") { std::vector<double> res; std::string vstring = "--val"; app.add_option_function<std::vector<double>>(vstring, [&res](const std::vector<double> &val) { @@ -79,14 +77,14 @@ TEST_F(TApp, doubleVectorFunctionFail) { std::transform(res.begin(), res.end(), res.begin(), [](double v) { return v + 5.0; }); }); args = {"--val", "five", "--val", "nine", "--val", "7"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); // check that getting the results through the results function generates the same error - EXPECT_THROW(app[vstring]->results(res), CLI::ConversionError); + CHECK_THROWS_AS(app[vstring]->results(res), CLI::ConversionError); auto strvec = app[vstring]->as<std::vector<std::string>>(); - EXPECT_EQ(strvec.size(), 3u); + CHECK(3u == strvec.size()); } -TEST_F(TApp, doubleVectorFunctionRunCallbackOnDefault) { +TEST_CASE_METHOD(TApp, "doubleVectorFunctionRunCallbackOnDefault", "[optiontype]") { std::vector<double> res; auto opt = app.add_option_function<std::vector<double>>("--val", [&res](const std::vector<double> &val) { res = val; @@ -94,26 +92,26 @@ TEST_F(TApp, doubleVectorFunctionRunCallbackOnDefault) { }); args = {"--val", "5", "--val", "6", "--val", "7"}; run(); - EXPECT_EQ(res.size(), 3u); - EXPECT_EQ(res[0], 10.0); - EXPECT_EQ(res[2], 12.0); - EXPECT_FALSE(opt->get_run_callback_for_default()); + CHECK(3u == res.size()); + CHECK(10.0 == res[0]); + CHECK(12.0 == res[2]); + CHECK(!opt->get_run_callback_for_default()); opt->run_callback_for_default(); opt->default_val(std::vector<int>{2, 1, -2}); - EXPECT_EQ(res[0], 7.0); - EXPECT_EQ(res[2], 3.0); + CHECK(7.0 == res[0]); + CHECK(3.0 == res[2]); - EXPECT_THROW(opt->default_val("this is a string"), CLI::ConversionError); + CHECK_THROWS_AS(opt->default_val("this is a string"), CLI::ConversionError); auto vec = opt->as<std::vector<double>>(); - ASSERT_EQ(vec.size(), 3U); - EXPECT_EQ(vec[0], 5.0); - EXPECT_EQ(vec[2], 7.0); + REQUIRE(3U == vec.size()); + CHECK(5.0 == vec[0]); + CHECK(7.0 == vec[2]); opt->check(CLI::Number); opt->run_callback_for_default(false); - EXPECT_THROW(opt->default_val("this is a string"), CLI::ValidationError); + CHECK_THROWS_AS(opt->default_val("this is a string"), CLI::ValidationError); } -TEST_F(TApp, BoolAndIntFlags) { +TEST_CASE_METHOD(TApp, "BoolAndIntFlags", "[optiontype]") { bool bflag{false}; int iflag{0}; @@ -125,24 +123,24 @@ TEST_F(TApp, BoolAndIntFlags) { args = {"-b", "-i", "-u"}; run(); - EXPECT_TRUE(bflag); - EXPECT_EQ(1, iflag); - EXPECT_EQ((unsigned int)1, uflag); + CHECK(bflag); + CHECK(iflag == 1); + CHECK(uflag == (unsigned int)1); args = {"-b", "-b"}; - ASSERT_NO_THROW(run()); - EXPECT_TRUE(bflag); + REQUIRE_NOTHROW(run()); + CHECK(bflag); bflag = false; args = {"-iiiuu"}; run(); - EXPECT_FALSE(bflag); - EXPECT_EQ(3, iflag); - EXPECT_EQ((unsigned int)2, uflag); + CHECK(!bflag); + CHECK(iflag == 3); + CHECK(uflag == (unsigned int)2); } -TEST_F(TApp, atomic_bool_flags) { +TEST_CASE_METHOD(TApp, "atomic_bool_flags", "[optiontype]") { std::atomic<bool> bflag{false}; std::atomic<int> iflag{0}; @@ -152,95 +150,95 @@ TEST_F(TApp, atomic_bool_flags) { args = {"-b", "-i"}; run(); - EXPECT_TRUE(bflag.load()); - EXPECT_EQ(1, iflag.load()); + CHECK(bflag.load()); + CHECK(iflag.load() == 1); args = {"-b", "-b"}; - ASSERT_NO_THROW(run()); - EXPECT_TRUE(bflag.load()); + REQUIRE_NOTHROW(run()); + CHECK(bflag.load()); bflag = false; args = {"-iii"}; run(); - EXPECT_FALSE(bflag.load()); - EXPECT_EQ(3, iflag.load()); + CHECK(!bflag.load()); + CHECK(iflag.load() == 3); args = {"--int=notanumber"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, BoolOption) { +TEST_CASE_METHOD(TApp, "BoolOption", "[optiontype]") { bool bflag{false}; app.add_option("-b", bflag); args = {"-b", "false"}; run(); - EXPECT_FALSE(bflag); + CHECK(!bflag); args = {"-b", "1"}; run(); - EXPECT_TRUE(bflag); + CHECK(bflag); args = {"-b", "-7"}; run(); - EXPECT_FALSE(bflag); + CHECK(!bflag); // cause an out of bounds error internally args = {"-b", "751615654161688126132138844896646748852"}; run(); - EXPECT_TRUE(bflag); + CHECK(bflag); args = {"-b", "-751615654161688126132138844896646748852"}; run(); - EXPECT_FALSE(bflag); + CHECK(!bflag); } -TEST_F(TApp, atomic_int_option) { +TEST_CASE_METHOD(TApp, "atomic_int_option", "[optiontype]") { std::atomic<int> i{0}; auto aopt = app.add_option("-i,--int", i); args = {"-i4"}; run(); - EXPECT_EQ(1u, app.count("--int")); - EXPECT_EQ(1u, app.count("-i")); - EXPECT_EQ(i, 4); - EXPECT_EQ(app["-i"]->as<std::string>(), "4"); - EXPECT_EQ(app["--int"]->as<double>(), 4.0); + CHECK(app.count("--int") == 1u); + CHECK(app.count("-i") == 1u); + CHECK(4 == i); + CHECK("4" == app["-i"]->as<std::string>()); + CHECK(4.0 == app["--int"]->as<double>()); args = {"--int", "notAnInt"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); aopt->expected(0, 1); args = {"--int"}; run(); - EXPECT_EQ(i, 0); + CHECK(0 == i); } -TEST_F(TApp, CharOption) { +TEST_CASE_METHOD(TApp, "CharOption", "[optiontype]") { char c1{'t'}; app.add_option("-c", c1); args = {"-c", "g"}; run(); - EXPECT_EQ(c1, 'g'); + CHECK('g' == c1); args = {"-c", "1"}; run(); - EXPECT_EQ(c1, '1'); + CHECK('1' == c1); args = {"-c", "77"}; run(); - EXPECT_EQ(c1, 77); + CHECK(77 == c1); // convert hex for digit args = {"-c", "0x44"}; run(); - EXPECT_EQ(c1, 0x44); + CHECK(0x44 == c1); args = {"-c", "751615654161688126132138844896646748852"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, vectorDefaults) { +TEST_CASE_METHOD(TApp, "vectorDefaults", "[optiontype]") { std::vector<int> vals{4, 5}; auto opt = app.add_option("--long", vals, "", true); @@ -248,30 +246,30 @@ TEST_F(TApp, vectorDefaults) { run(); - EXPECT_EQ(vals, std::vector<int>({1, 2, 3})); + CHECK(std::vector<int>({1, 2, 3}) == vals); args.clear(); run(); auto res = app["--long"]->as<std::vector<int>>(); - EXPECT_EQ(res, std::vector<int>({4, 5})); + CHECK(std::vector<int>({4, 5}) == res); app.clear(); opt->expected(1)->take_last(); res = app["--long"]->as<std::vector<int>>(); - EXPECT_EQ(res, std::vector<int>({5})); + CHECK(std::vector<int>({5}) == res); opt->take_first(); res = app["--long"]->as<std::vector<int>>(); - EXPECT_EQ(res, std::vector<int>({4})); + CHECK(std::vector<int>({4}) == res); opt->expected(0, 1)->take_last(); run(); - EXPECT_EQ(res, std::vector<int>({4})); + CHECK(std::vector<int>({4}) == res); res = app["--long"]->as<std::vector<int>>(); - EXPECT_EQ(res, std::vector<int>({5})); + CHECK(std::vector<int>({5}) == res); } -TEST_F(TApp, CallbackBoolFlags) { +TEST_CASE_METHOD(TApp, "CallbackBoolFlags", "[optiontype]") { bool value{false}; @@ -280,24 +278,24 @@ TEST_F(TApp, CallbackBoolFlags) { auto cback = app.add_flag_callback("--val", func); args = {"--val"}; run(); - EXPECT_TRUE(value); + CHECK(value); value = false; args = {"--val=false"}; run(); - EXPECT_FALSE(value); + CHECK(!value); - EXPECT_THROW(app.add_flag_callback("hi", func), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_flag_callback("hi", func), CLI::IncorrectConstruction); cback->multi_option_policy(CLI::MultiOptionPolicy::Throw); args = {"--val", "--val=false"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, pair_check) { +TEST_CASE_METHOD(TApp, "pair_check", "[optiontype]") { std::string myfile{"pair_check_file.txt"}; bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); + CHECK(ok); - EXPECT_TRUE(CLI::ExistingFile(myfile).empty()); + CHECK(CLI::ExistingFile(myfile).empty()); std::pair<std::string, int> findex; auto v0 = CLI::ExistingFile; @@ -308,112 +306,112 @@ TEST_F(TApp, pair_check) { args = {"--file", myfile, "2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); - EXPECT_EQ(findex.first, myfile); - EXPECT_EQ(findex.second, 2); + CHECK(myfile == findex.first); + CHECK(2 == findex.second); args = {"--file", myfile, "-3"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--file", myfile, "2"}; std::remove(myfile.c_str()); - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } // this will require that modifying the multi-option policy for tuples be allowed which it isn't at present -TEST_F(TApp, pair_check_take_first) { +TEST_CASE_METHOD(TApp, "pair_check_take_first", "[optiontype]") { std::string myfile{"pair_check_file2.txt"}; bool ok = static_cast<bool>(std::ofstream(myfile.c_str()).put('a')); // create file - EXPECT_TRUE(ok); + CHECK(ok); - EXPECT_TRUE(CLI::ExistingFile(myfile).empty()); + CHECK(CLI::ExistingFile(myfile).empty()); std::pair<std::string, int> findex; auto opt = app.add_option("--file", findex)->check(CLI::ExistingFile)->check(CLI::PositiveNumber); - EXPECT_THROW(opt->get_validator(3), CLI::OptionNotFound); + CHECK_THROWS_AS(opt->get_validator(3), CLI::OptionNotFound); opt->get_validator(0)->application_index(0); opt->get_validator(1)->application_index(1); opt->multi_option_policy(CLI::MultiOptionPolicy::TakeLast); args = {"--file", "not_a_file.txt", "-16", "--file", myfile, "2"}; // should only check the last one - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); - EXPECT_EQ(findex.first, myfile); - EXPECT_EQ(findex.second, 2); + CHECK(myfile == findex.first); + CHECK(2 == findex.second); opt->multi_option_policy(CLI::MultiOptionPolicy::TakeFirst); - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, VectorFixedString) { +TEST_CASE_METHOD(TApp, "VectorFixedString", "[optiontype]") { std::vector<std::string> strvec; std::vector<std::string> answer{"mystring", "mystring2", "mystring3"}; CLI::Option *opt = app.add_option("-s,--string", strvec)->expected(3); - EXPECT_EQ(3, opt->get_expected()); + CHECK(opt->get_expected() == 3); args = {"--string", "mystring", "mystring2", "mystring3"}; run(); - EXPECT_EQ(3u, app.count("--string")); - EXPECT_EQ(answer, strvec); + CHECK(app.count("--string") == 3u); + CHECK(strvec == answer); } -TEST_F(TApp, VectorDefaultedFixedString) { +TEST_CASE_METHOD(TApp, "VectorDefaultedFixedString", "[optiontype]") { std::vector<std::string> strvec{"one"}; std::vector<std::string> answer{"mystring", "mystring2", "mystring3"}; CLI::Option *opt = app.add_option("-s,--string", strvec, "")->expected(3)->capture_default_str(); - EXPECT_EQ(3, opt->get_expected()); + CHECK(opt->get_expected() == 3); args = {"--string", "mystring", "mystring2", "mystring3"}; run(); - EXPECT_EQ(3u, app.count("--string")); - EXPECT_EQ(answer, strvec); + CHECK(app.count("--string") == 3u); + CHECK(strvec == answer); } -TEST_F(TApp, VectorIndexedValidator) { +TEST_CASE_METHOD(TApp, "VectorIndexedValidator", "[optiontype]") { std::vector<int> vvec; CLI::Option *opt = app.add_option("-v", vvec); args = {"-v", "1", "-1", "-v", "3", "-v", "-976"}; run(); - EXPECT_EQ(4u, app.count("-v")); - EXPECT_EQ(4u, vvec.size()); + CHECK(app.count("-v") == 4u); + CHECK(vvec.size() == 4u); opt->check(CLI::PositiveNumber.application_index(0)); opt->check((!CLI::PositiveNumber).application_index(1)); - EXPECT_NO_THROW(run()); - EXPECT_EQ(4u, vvec.size()); + CHECK_NOTHROW(run()); + CHECK(vvec.size() == 4u); // v[3] would be negative opt->check(CLI::PositiveNumber.application_index(3)); - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, VectorUnlimString) { +TEST_CASE_METHOD(TApp, "VectorUnlimString", "[optiontype]") { std::vector<std::string> strvec; std::vector<std::string> answer{"mystring", "mystring2", "mystring3"}; CLI::Option *opt = app.add_option("-s,--string", strvec); - EXPECT_EQ(1, opt->get_expected()); - EXPECT_EQ(CLI::detail::expected_max_vector_size, opt->get_expected_max()); + CHECK(opt->get_expected() == 1); + CHECK(opt->get_expected_max() == CLI::detail::expected_max_vector_size); args = {"--string", "mystring", "mystring2", "mystring3"}; run(); - EXPECT_EQ(3u, app.count("--string")); - EXPECT_EQ(answer, strvec); + CHECK(app.count("--string") == 3u); + CHECK(strvec == answer); args = {"-s", "mystring", "mystring2", "mystring3"}; run(); - EXPECT_EQ(3u, app.count("--string")); - EXPECT_EQ(answer, strvec); + CHECK(app.count("--string") == 3u); + CHECK(strvec == answer); } // From https://github.com/CLIUtils/CLI11/issues/420 -TEST_F(TApp, stringLikeTests) { +TEST_CASE_METHOD(TApp, "stringLikeTests", "[optiontype]") { struct nType { explicit nType(const std::string &a_value) : m_value{a_value} {} @@ -426,14 +424,14 @@ TEST_F(TApp, stringLikeTests) { app.add_option("--type", m_type, "type")->capture_default_str(); run(); - EXPECT_EQ(app["--type"]->as<std::string>(), "op str"); + CHECK("op str" == app["--type"]->as<std::string>()); args = {"--type", "bca"}; run(); - EXPECT_EQ(std::string(m_type), "op str"); - EXPECT_EQ(m_type.m_value, "bca"); + CHECK("op str" == std::string(m_type)); + CHECK("bca" == m_type.m_value); } -TEST_F(TApp, VectorExpectedRange) { +TEST_CASE_METHOD(TApp, "VectorExpectedRange", "[optiontype]") { std::vector<std::string> strvec; CLI::Option *opt = app.add_option("--string", strvec); @@ -441,47 +439,47 @@ TEST_F(TApp, VectorExpectedRange) { args = {"--string", "mystring", "mystring2", "mystring3"}; run(); - EXPECT_EQ(3u, app.count("--string")); + CHECK(app.count("--string") == 3u); args = {"--string", "mystring"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"--string", "mystring", "mystring2", "string2", "--string", "string4", "string5"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); - EXPECT_EQ(opt->get_expected_max(), 4); - EXPECT_EQ(opt->get_expected_min(), 2); + CHECK(4 == opt->get_expected_max()); + CHECK(2 == opt->get_expected_min()); opt->expected(4, 2); // just test the handling of reversed arguments - EXPECT_EQ(opt->get_expected_max(), 4); - EXPECT_EQ(opt->get_expected_min(), 2); + CHECK(4 == opt->get_expected_max()); + CHECK(2 == opt->get_expected_min()); opt->expected(-5); - EXPECT_EQ(opt->get_expected_max(), 5); - EXPECT_EQ(opt->get_expected_min(), 5); + CHECK(5 == opt->get_expected_max()); + CHECK(5 == opt->get_expected_min()); opt->expected(-5, 7); - EXPECT_EQ(opt->get_expected_max(), 7); - EXPECT_EQ(opt->get_expected_min(), 5); + CHECK(7 == opt->get_expected_max()); + CHECK(5 == opt->get_expected_min()); } -TEST_F(TApp, VectorFancyOpts) { +TEST_CASE_METHOD(TApp, "VectorFancyOpts", "[optiontype]") { std::vector<std::string> strvec; std::vector<std::string> answer{"mystring", "mystring2", "mystring3"}; CLI::Option *opt = app.add_option("-s,--string", strvec)->required()->expected(3); - EXPECT_EQ(3, opt->get_expected()); + CHECK(opt->get_expected() == 3); args = {"--string", "mystring", "mystring2", "mystring3"}; run(); - EXPECT_EQ(3u, app.count("--string")); - EXPECT_EQ(answer, strvec); + CHECK(app.count("--string") == 3u); + CHECK(strvec == answer); args = {"one", "two"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); - EXPECT_THROW(run(), CLI::ParseError); + CHECK_THROWS_AS(run(), CLI::ParseError); } // #87 -TEST_F(TApp, CustomDoubleOption) { +TEST_CASE_METHOD(TApp, "CustomDoubleOption", "[optiontype]") { std::pair<int, double> custom_opt; @@ -494,12 +492,12 @@ TEST_F(TApp, CustomDoubleOption) { args = {"12", "1.5"}; run(); - EXPECT_EQ(custom_opt.first, 12); - EXPECT_DOUBLE_EQ(custom_opt.second, 1.5); + CHECK(12 == custom_opt.first); + CHECK(1.5 == Approx(custom_opt.second)); } // now with tuple support this is possible -TEST_F(TApp, CustomDoubleOptionAlt) { +TEST_CASE_METHOD(TApp, "CustomDoubleOptionAlt", "[optiontype]") { std::pair<int, double> custom_opt; @@ -508,12 +506,12 @@ TEST_F(TApp, CustomDoubleOptionAlt) { args = {"12", "1.5"}; run(); - EXPECT_EQ(custom_opt.first, 12); - EXPECT_DOUBLE_EQ(custom_opt.second, 1.5); + CHECK(12 == custom_opt.first); + CHECK(1.5 == Approx(custom_opt.second)); } // now with independent type sizes and expected this is possible -TEST_F(TApp, vectorPair) { +TEST_CASE_METHOD(TApp, "vectorPair", "[optiontype]") { std::vector<std::pair<int, std::string>> custom_opt; @@ -522,21 +520,21 @@ TEST_F(TApp, vectorPair) { args = {"--dict", "1", "str1", "--dict", "3", "str3"}; run(); - ASSERT_EQ(custom_opt.size(), 2u); - EXPECT_EQ(custom_opt[0].first, 1); - EXPECT_EQ(custom_opt[1].second, "str3"); + REQUIRE(2u == custom_opt.size()); + CHECK(1 == custom_opt[0].first); + CHECK("str3" == custom_opt[1].second); args = {"--dict", "1", "str1", "--dict", "3", "str3", "--dict", "-1", "str4"}; run(); - ASSERT_EQ(custom_opt.size(), 3u); - EXPECT_EQ(custom_opt[2].first, -1); - EXPECT_EQ(custom_opt[2].second, "str4"); + REQUIRE(3u == custom_opt.size()); + CHECK(-1 == custom_opt[2].first); + CHECK("str4" == custom_opt[2].second); opt->check(CLI::PositiveNumber.application_index(0)); - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, vectorPairFail) { +TEST_CASE_METHOD(TApp, "vectorPairFail", "[optiontype]") { std::vector<std::pair<int, std::string>> custom_opt; @@ -544,46 +542,46 @@ TEST_F(TApp, vectorPairFail) { args = {"--dict", "1", "str1", "--dict", "str3", "1"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, vectorPairTypeRange) { +TEST_CASE_METHOD(TApp, "vectorPairTypeRange", "[optiontype]") { std::vector<std::pair<int, std::string>> custom_opt; auto opt = app.add_option("--dict", custom_opt); opt->type_size(2, 1); // just test switched arguments - EXPECT_EQ(opt->get_type_size_min(), 1); - EXPECT_EQ(opt->get_type_size_max(), 2); + CHECK(1 == opt->get_type_size_min()); + CHECK(2 == opt->get_type_size_max()); args = {"--dict", "1", "str1", "--dict", "3", "str3"}; run(); - ASSERT_EQ(custom_opt.size(), 2u); - EXPECT_EQ(custom_opt[0].first, 1); - EXPECT_EQ(custom_opt[1].second, "str3"); + REQUIRE(2u == custom_opt.size()); + CHECK(1 == custom_opt[0].first); + CHECK("str3" == custom_opt[1].second); args = {"--dict", "1", "str1", "--dict", "3", "--dict", "-1", "str4"}; run(); - ASSERT_EQ(custom_opt.size(), 3u); - EXPECT_TRUE(custom_opt[1].second.empty()); - EXPECT_EQ(custom_opt[2].first, -1); - EXPECT_EQ(custom_opt[2].second, "str4"); + REQUIRE(3u == custom_opt.size()); + CHECK(custom_opt[1].second.empty()); + CHECK(-1 == custom_opt[2].first); + CHECK("str4" == custom_opt[2].second); opt->type_size(-2, -1); // test negative arguments - EXPECT_EQ(opt->get_type_size_min(), 1); - EXPECT_EQ(opt->get_type_size_max(), 2); + CHECK(1 == opt->get_type_size_min()); + CHECK(2 == opt->get_type_size_max()); // this type size spec should run exactly as before run(); - ASSERT_EQ(custom_opt.size(), 3u); - EXPECT_TRUE(custom_opt[1].second.empty()); - EXPECT_EQ(custom_opt[2].first, -1); - EXPECT_EQ(custom_opt[2].second, "str4"); + REQUIRE(3u == custom_opt.size()); + CHECK(custom_opt[1].second.empty()); + CHECK(-1 == custom_opt[2].first); + CHECK("str4" == custom_opt[2].second); } // now with independent type sizes and expected this is possible -TEST_F(TApp, vectorTuple) { +TEST_CASE_METHOD(TApp, "vectorTuple", "[optiontype]") { std::vector<std::tuple<int, std::string, double>> custom_opt; @@ -592,28 +590,28 @@ TEST_F(TApp, vectorTuple) { args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7"}; run(); - ASSERT_EQ(custom_opt.size(), 2u); - EXPECT_EQ(std::get<0>(custom_opt[0]), 1); - EXPECT_EQ(std::get<1>(custom_opt[1]), "str3"); - EXPECT_EQ(std::get<2>(custom_opt[1]), 2.7); + REQUIRE(2u == custom_opt.size()); + CHECK(1 == std::get<0>(custom_opt[0])); + CHECK("str3" == std::get<1>(custom_opt[1])); + CHECK(2.7 == std::get<2>(custom_opt[1])); args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7", "--dict", "-1", "str4", "-1.87"}; run(); - ASSERT_EQ(custom_opt.size(), 3u); - EXPECT_EQ(std::get<0>(custom_opt[2]), -1); - EXPECT_EQ(std::get<1>(custom_opt[2]), "str4"); - EXPECT_EQ(std::get<2>(custom_opt[2]), -1.87); + REQUIRE(3u == custom_opt.size()); + CHECK(-1 == std::get<0>(custom_opt[2])); + CHECK("str4" == std::get<1>(custom_opt[2])); + CHECK(-1.87 == std::get<2>(custom_opt[2])); opt->check(CLI::PositiveNumber.application_index(0)); - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args.back() = "haha"; args[9] = "45"; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } // now with independent type sizes and expected this is possible -TEST_F(TApp, vectorVector) { +TEST_CASE_METHOD(TApp, "vectorVector", "[optiontype]") { std::vector<std::vector<int>> custom_opt; @@ -622,34 +620,34 @@ TEST_F(TApp, vectorVector) { args = {"--dict", "1", "2", "4", "--dict", "3", "1"}; run(); - ASSERT_EQ(custom_opt.size(), 2u); - EXPECT_EQ(custom_opt[0].size(), 3u); - EXPECT_EQ(custom_opt[1].size(), 2u); + REQUIRE(2u == custom_opt.size()); + CHECK(3u == custom_opt[0].size()); + CHECK(2u == custom_opt[1].size()); args = {"--dict", "1", "2", "4", "--dict", "3", "1", "--dict", "3", "--dict", "3", "3", "3", "3", "3", "3", "3", "3", "3", "-3"}; run(); - ASSERT_EQ(custom_opt.size(), 4u); - EXPECT_EQ(custom_opt[0].size(), 3u); - EXPECT_EQ(custom_opt[1].size(), 2u); - EXPECT_EQ(custom_opt[2].size(), 1u); - EXPECT_EQ(custom_opt[3].size(), 10u); + REQUIRE(4u == custom_opt.size()); + CHECK(3u == custom_opt[0].size()); + CHECK(2u == custom_opt[1].size()); + CHECK(1u == custom_opt[2].size()); + CHECK(10u == custom_opt[3].size()); opt->check(CLI::PositiveNumber.application_index(9)); - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args.pop_back(); - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args.back() = "haha"; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); args = {"--dict", "1", "2", "4", "%%", "3", "1", "%%", "3", "%%", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3"}; run(); - ASSERT_EQ(custom_opt.size(), 4u); + REQUIRE(4u == custom_opt.size()); } // now with independent type sizes and expected this is possible -TEST_F(TApp, vectorVectorFixedSize) { +TEST_CASE_METHOD(TApp, "vectorVectorFixedSize", "[optiontype]") { std::vector<std::vector<int>> custom_opt; @@ -658,21 +656,21 @@ TEST_F(TApp, vectorVectorFixedSize) { args = {"--dict", "1", "2", "4", "3", "--dict", "3", "1", "2", "8"}; run(); - ASSERT_EQ(custom_opt.size(), 2u); - EXPECT_EQ(custom_opt[0].size(), 4u); - EXPECT_EQ(custom_opt[1].size(), 4u); + REQUIRE(2u == custom_opt.size()); + CHECK(4u == custom_opt[0].size()); + CHECK(4u == custom_opt[1].size()); args = {"--dict", "1", "2", "4", "--dict", "3", "1", "7", "6"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); // this should reset it opt->type_size(CLI::detail::expected_max_vector_size); opt->type_size(1, CLI::detail::expected_max_vector_size); - EXPECT_NO_THROW(run()); - ASSERT_EQ(custom_opt.size(), 2U); + CHECK_NOTHROW(run()); + REQUIRE(2U == custom_opt.size()); } // now with independent type sizes and expected this is possible -TEST_F(TApp, tuplePair) { +TEST_CASE_METHOD(TApp, "tuplePair", "[optiontype]") { std::tuple<std::pair<int, double>> custom_opt; app.add_option("--pr", custom_opt); @@ -680,11 +678,11 @@ TEST_F(TApp, tuplePair) { args = {"--pr", "1", "2"}; run(); - EXPECT_EQ(std::get<0>(custom_opt).first, 1); - EXPECT_EQ(std::get<0>(custom_opt).second, 2.0); + CHECK(1 == std::get<0>(custom_opt).first); + CHECK(2.0 == std::get<0>(custom_opt).second); } // now with independent type sizes and expected this is possible -TEST_F(TApp, tupleintPair) { +TEST_CASE_METHOD(TApp, "tupleintPair", "[optiontype]") { std::tuple<int, std::pair<int, double>> custom_opt; app.add_option("--pr", custom_opt); @@ -692,9 +690,9 @@ TEST_F(TApp, tupleintPair) { args = {"--pr", "3", "1", "2"}; run(); - EXPECT_EQ(std::get<0>(custom_opt), 3); - EXPECT_EQ(std::get<1>(custom_opt).first, 1); - EXPECT_EQ(std::get<1>(custom_opt).second, 2.0); + CHECK(3 == std::get<0>(custom_opt)); + CHECK(1 == std::get<1>(custom_opt).first); + CHECK(2.0 == std::get<1>(custom_opt).second); } static_assert(CLI::detail::is_mutable_container<std::set<std::string>>::value, "set should be a container"); @@ -719,66 +717,54 @@ static_assert(CLI::detail::type_count<std::list<std::pair<int, std::string>>>::v static_assert(CLI::detail::type_count<std::map<std::string, std::pair<int, std::string>>>::value == 3, "map<string,pair<int,string>> should have a type size of 3"); -template <class T> class TApp_container_single : public TApp { - public: - using container_type = T; - container_type cval{}; - TApp_container_single() : TApp() {} -}; - -using containerTypes_single = - ::testing::Types<std::vector<int>, std::deque<int>, std::set<int>, std::list<int>, std::unordered_set<int>>; - -TYPED_TEST_SUITE(TApp_container_single, containerTypes_single, ); - -TYPED_TEST(TApp_container_single, containerInt) { - - auto &cv = TApp_container_single<TypeParam>::cval; - CLI::Option *opt = (TApp::app).add_option("-v", cv); - - TApp::args = {"-v", "1", "-1", "-v", "3", "-v", "-976"}; - TApp::run(); - EXPECT_EQ(4u, (TApp::app).count("-v")); - EXPECT_EQ(4u, cv.size()); +TEMPLATE_TEST_CASE("Container int single", + "[optiontype]", + std::vector<int>, + std::deque<int>, + std::set<int>, + std::list<int>, + std::unordered_set<int>) { + TApp tapp; + TestType cv; + + CLI::Option *opt = tapp.app.add_option("-v", cv); + + tapp.args = {"-v", "1", "-1", "-v", "3", "-v", "-976"}; + tapp.run(); + CHECK(tapp.app.count("-v") == 4u); + CHECK(cv.size() == 4u); opt->check(CLI::PositiveNumber.application_index(0)); opt->check((!CLI::PositiveNumber).application_index(1)); - EXPECT_NO_THROW(TApp::run()); - EXPECT_EQ(4u, cv.size()); + CHECK_NOTHROW(tapp.run()); + CHECK(cv.size() == 4u); // v[3] would be negative opt->check(CLI::PositiveNumber.application_index(3)); - EXPECT_THROW(TApp::run(), CLI::ValidationError); + CHECK_THROWS_AS(tapp.run(), CLI::ValidationError); } -template <class T> class TApp_container_pair : public TApp { - public: - using container_type = T; - container_type cval{}; - TApp_container_pair() : TApp() {} -}; - using isp = std::pair<int, std::string>; -using containerTypes_pair = ::testing::Types<std::vector<isp>, - std::deque<isp>, - std::set<isp>, - std::list<isp>, - std::map<int, std::string>, - std::unordered_map<int, std::string>>; -TYPED_TEST_SUITE(TApp_container_pair, containerTypes_pair, ); +TEMPLATE_TEST_CASE("Container pair", + "[optiontype]", + std::vector<isp>, + std::deque<isp>, + std::set<isp>, + std::list<isp>, + (std::map<int, std::string>), + (std::unordered_map<int, std::string>)) { + TApp tapp; + TestType cv; -TYPED_TEST(TApp_container_pair, containerPair) { + (tapp.app).add_option("--dict", cv); - auto &cv = TApp_container_pair<TypeParam>::cval; - (TApp::app).add_option("--dict", cv); + tapp.args = {"--dict", "1", "str1", "--dict", "3", "str3"}; - TApp::args = {"--dict", "1", "str1", "--dict", "3", "str3"}; + tapp.run(); + CHECK(2u == cv.size()); - TApp::run(); - EXPECT_EQ(cv.size(), 2u); - - TApp::args = {"--dict", "1", "str1", "--dict", "3", "--dict", "-1", "str4"}; - TApp::run(); - EXPECT_EQ(cv.size(), 3u); + tapp.args = {"--dict", "1", "str1", "--dict", "3", "--dict", "-1", "str4"}; + tapp.run(); + CHECK(3u == cv.size()); } template <class T> class TApp_container_tuple : public TApp { @@ -789,28 +775,28 @@ template <class T> class TApp_container_tuple : public TApp { }; using tup_obj = std::tuple<int, std::string, double>; -using containerTypes_tuple = ::testing::Types<std::vector<tup_obj>, - std::deque<tup_obj>, - std::set<tup_obj>, - std::list<tup_obj>, - std::map<int, std::pair<std::string, double>>, - std::unordered_map<int, std::tuple<std::string, double>>>; - -TYPED_TEST_SUITE(TApp_container_tuple, containerTypes_tuple, ); -TYPED_TEST(TApp_container_tuple, containerTuple) { +TEMPLATE_TEST_CASE("Container tuple", + "[optiontype]", + std::vector<tup_obj>, + std::deque<tup_obj>, + std::set<tup_obj>, + std::list<tup_obj>, + (std::map<int, std::pair<std::string, double>>), + (std::unordered_map<int, std::tuple<std::string, double>>)) { + TApp tapp; + TestType cv; - auto &cv = TApp_container_tuple<TypeParam>::cval; - (TApp::app).add_option("--dict", cv); + (tapp.app).add_option("--dict", cv); - TApp::args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7"}; + tapp.args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7"}; - TApp::run(); - EXPECT_EQ(cv.size(), 2u); + tapp.run(); + CHECK(2u == cv.size()); - TApp::args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7", "--dict", "-1", "str4", "-1.87"}; - TApp::run(); - EXPECT_EQ(cv.size(), 3u); + tapp.args = {"--dict", "1", "str1", "4.3", "--dict", "3", "str3", "2.7", "--dict", "-1", "str4", "-1.87"}; + tapp.run(); + CHECK(3u == cv.size()); } using icontainer1 = std::vector<int>; @@ -818,45 +804,37 @@ using icontainer2 = std::list<int>; using icontainer3 = std::set<int>; using icontainer4 = std::pair<int, std::vector<int>>; -using containerTypes_container = ::testing::Types<std::vector<icontainer1>, - std::list<icontainer1>, - std::set<icontainer1>, - std::deque<icontainer1>, - std::vector<icontainer2>, - std::list<icontainer2>, - std::set<icontainer2>, - std::deque<icontainer2>, - std::vector<icontainer3>, - std::list<icontainer3>, - std::set<icontainer3>, - std::deque<icontainer3>>; - -template <class T> class TApp_container_container : public TApp { - public: - using container_type = T; - container_type cval{}; - TApp_container_container() : TApp() {} -}; - -TYPED_TEST_SUITE(TApp_container_container, containerTypes_container, ); - -TYPED_TEST(TApp_container_container, containerContainer) { +TEMPLATE_TEST_CASE("Container container", + "[optiontype]", + std::vector<icontainer1>, + std::list<icontainer1>, + std::set<icontainer1>, + std::deque<icontainer1>, + std::vector<icontainer2>, + std::list<icontainer2>, + std::set<icontainer2>, + std::deque<icontainer2>, + std::vector<icontainer3>, + std::list<icontainer3>, + std::set<icontainer3>, + std::deque<icontainer3>) { + TApp tapp; + TestType cv; - auto &cv = TApp_container_container<TypeParam>::cval; - (TApp::app).add_option("--dict", cv); + (tapp.app).add_option("--dict", cv); - TApp::args = {"--dict", "1", "2", "4", "--dict", "3", "1"}; + tapp.args = {"--dict", "1", "2", "4", "--dict", "3", "1"}; - TApp::run(); - EXPECT_EQ(cv.size(), 2u); + tapp.run(); + CHECK(2u == cv.size()); - TApp::args = {"--dict", "1", "2", "4", "--dict", "3", "1", "--dict", "3", "--dict", - "3", "3", "3", "3", "3", "3", "3", "3", "3", "-3"}; - TApp::run(); - EXPECT_EQ(cv.size(), 4u); + tapp.args = {"--dict", "1", "2", "4", "--dict", "3", "1", "--dict", "3", "--dict", + "3", "3", "3", "3", "3", "3", "3", "3", "3", "-3"}; + tapp.run(); + CHECK(4u == cv.size()); } -TEST_F(TApp, containerContainer) { +TEST_CASE_METHOD(TApp, "containerContainer", "[optiontype]") { std::vector<icontainer4> cv; app.add_option("--dict", cv); @@ -864,15 +842,15 @@ TEST_F(TApp, containerContainer) { args = {"--dict", "1", "2", "4", "--dict", "3", "1"}; run(); - EXPECT_EQ(cv.size(), 2u); + CHECK(2u == cv.size()); args = {"--dict", "1", "2", "4", "--dict", "3", "1", "--dict", "3", "", "--dict", "3", "3", "3", "3", "3", "3", "3", "3", "3", "-3"}; run(); - EXPECT_EQ(cv.size(), 4u); + CHECK(4u == cv.size()); } -TEST_F(TApp, unknownContainerWrapper) { +TEST_CASE_METHOD(TApp, "unknownContainerWrapper", "[optiontype]") { class vopt { public: @@ -887,14 +865,14 @@ TEST_F(TApp, unknownContainerWrapper) { args = {"--vv", "1", "2", "4"}; run(); - EXPECT_EQ(cv.val_.size(), 3u); + CHECK(3u == cv.val_.size()); args = {"--vv", ""}; run(); - EXPECT_TRUE(cv.val_.empty()); + CHECK(cv.val_.empty()); } -TEST_F(TApp, tupleTwoVectors) { +TEST_CASE_METHOD(TApp, "tupleTwoVectors", "[optiontype]") { std::tuple<std::vector<int>, std::vector<int>> cv; app.add_option("--vv", cv); @@ -902,17 +880,17 @@ TEST_F(TApp, tupleTwoVectors) { args = {"--vv", "1", "2", "4"}; run(); - EXPECT_EQ(std::get<0>(cv).size(), 3U); - EXPECT_TRUE(std::get<1>(cv).empty()); + CHECK(3U == std::get<0>(cv).size()); + CHECK(std::get<1>(cv).empty()); args = {"--vv", "1", "2", "%%", "4", "4", "5"}; run(); - EXPECT_EQ(std::get<0>(cv).size(), 2U); - EXPECT_EQ(std::get<1>(cv).size(), 3U); + CHECK(2U == std::get<0>(cv).size()); + CHECK(3U == std::get<1>(cv).size()); } -TEST_F(TApp, vectorSingleArg) { +TEST_CASE_METHOD(TApp, "vectorSingleArg", "[optiontype]") { std::vector<int> cv; app.add_option("-c", cv)->allow_extra_args(false); @@ -921,11 +899,11 @@ TEST_F(TApp, vectorSingleArg) { args = {"-c", "1", "-c", "2", "4"}; run(); - EXPECT_EQ(cv.size(), 2U); - EXPECT_EQ(extra, "4"); + CHECK(2U == cv.size()); + CHECK("4" == extra); } -TEST_F(TApp, vectorDoubleArg) { +TEST_CASE_METHOD(TApp, "vectorDoubleArg", "[optiontype]") { std::vector<std::pair<int, std::string>> cv; app.add_option("-c", cv)->allow_extra_args(false); @@ -934,6 +912,6 @@ TEST_F(TApp, vectorDoubleArg) { args = {"-c", "1", "bob", "-c", "2", "apple", "4", "key"}; run(); - EXPECT_EQ(cv.size(), 2U); - EXPECT_EQ(extras.size(), 2U); + CHECK(2U == cv.size()); + CHECK(2U == extras.size()); } diff --git a/packages/CLI11/tests/OptionalTest.cpp b/packages/CLI11/tests/OptionalTest.cpp index 51f159aa69975b0dff22ed06ef315f0be8797e76..4d8da020b67b1f3531cce758d0eaa0e238bec745 100644 --- a/packages/CLI11/tests/OptionalTest.cpp +++ b/packages/CLI11/tests/OptionalTest.cpp @@ -62,70 +62,70 @@ #pragma warning(disable : 4244) #endif -TEST_F(TApp, StdOptionalTest) { +TEST_CASE_METHOD(TApp, "StdOptionalTest", "[optional]") { std::optional<int> opt; app.add_option("-c,--count", opt); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c", "1"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 1); + CHECK(opt); + CHECK(1 == *opt); args = {"--count", "3"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 3); + CHECK(opt); + CHECK(3 == *opt); } -TEST_F(TApp, StdOptionalVectorEmptyDirect) { +TEST_CASE_METHOD(TApp, "StdOptionalVectorEmptyDirect", "[optional]") { std::optional<std::vector<int>> opt; app.add_option("-v,--vec", opt)->expected(0, 3)->allow_extra_args(); // app.add_option("-v,--vec", opt)->expected(0, 3)->allow_extra_args(); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-v"}; opt = std::vector<int>{4, 3}; run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-v", "1", "4", "5"}; run(); - EXPECT_TRUE(opt); + CHECK(opt); std::vector<int> expV{1, 4, 5}; - EXPECT_EQ(*opt, expV); + CHECK(expV == *opt); } -TEST_F(TApp, StdOptionalComplexDirect) { +TEST_CASE_METHOD(TApp, "StdOptionalComplexDirect", "[optional]") { std::optional<std::complex<double>> opt; app.add_option("-c,--complex", opt)->type_size(0, 2); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c"}; opt = std::complex<double>{4.0, 3.0}; run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c", "1+2j"}; run(); - EXPECT_TRUE(opt); + CHECK(opt); std::complex<double> val{1, 2}; - EXPECT_EQ(*opt, val); + CHECK(val == *opt); args = {"-c", "3", "-4"}; run(); - EXPECT_TRUE(opt); + CHECK(opt); std::complex<double> val2{3, -4}; - EXPECT_EQ(*opt, val2); + CHECK(val2 == *opt); } -TEST_F(TApp, StdOptionalUint) { +TEST_CASE_METHOD(TApp, "StdOptionalUint", "[optional]") { std::optional<std::uint64_t> opt; app.add_option("-i,--int", opt); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-i", "15"}; run(); - EXPECT_EQ(*opt, 15U); + CHECK(15U == *opt); static_assert(CLI::detail::classify_object<std::optional<std::uint64_t>>::value == CLI::detail::object_category::wrapper_value); } @@ -137,97 +137,97 @@ TEST_F(TApp, StdOptionalUint) { #endif #if CLI11_EXPERIMENTAL_OPTIONAL -TEST_F(TApp, ExperimentalOptionalTest) { +TEST_CASE_METHOD(TApp, "ExperimentalOptionalTest", "[optional]") { std::experimental::optional<int> opt; app.add_option("-c,--count", opt); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c", "1"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 1); + CHECK(opt); + CHECK(1 == *opt); args = {"--count", "3"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 3); + CHECK(opt); + CHECK(3 == *opt); } #endif #if CLI11_BOOST_OPTIONAL -TEST_F(TApp, BoostOptionalTest) { +TEST_CASE_METHOD(TApp, "BoostOptionalTest", "[optional]") { boost::optional<int> opt; app.add_option("-c,--count", opt); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c", "1"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 1); + CHECK(opt); + CHECK(1 == *opt); opt = {}; args = {"--count", "3"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 3); + CHECK(opt); + CHECK(3 == *opt); } -TEST_F(TApp, BoostOptionalTestZarg) { +TEST_CASE_METHOD(TApp, "BoostOptionalTestZarg", "[optional]") { boost::optional<int> opt; app.add_option("-c,--count", opt)->expected(0, 1); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c", "1"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 1); + CHECK(opt); + CHECK(1 == *opt); opt = {}; args = {"--count"}; run(); - EXPECT_FALSE(opt); + CHECK(!opt); } -TEST_F(TApp, BoostOptionalint64Test) { +TEST_CASE_METHOD(TApp, "BoostOptionalint64Test", "[optional]") { boost::optional<std::int64_t> opt; app.add_option("-c,--count", opt); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c", "1"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 1); + CHECK(opt); + CHECK(1 == *opt); opt = {}; args = {"--count", "3"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, 3); + CHECK(opt); + CHECK(3 == *opt); } -TEST_F(TApp, BoostOptionalStringTest) { +TEST_CASE_METHOD(TApp, "BoostOptionalStringTest", "[optional]") { boost::optional<std::string> opt; app.add_option("-s,--string", opt); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-s", "strval"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, "strval"); + CHECK(opt); + CHECK("strval" == *opt); opt = {}; args = {"--string", "strv"}; run(); - EXPECT_TRUE(opt); - EXPECT_EQ(*opt, "strv"); + CHECK(opt); + CHECK("strv" == *opt); } namespace boost { using CLI::enums::operator<<; } -TEST_F(TApp, BoostOptionalEnumTest) { +TEST_CASE_METHOD(TApp, "BoostOptionalEnumTest", "[optional]") { enum class eval : char { val0 = 0, val1 = 1, val2 = 2, val3 = 3, val4 = 4 }; boost::optional<eval> opt, opt2; @@ -237,93 +237,89 @@ TEST_F(TApp, BoostOptionalEnumTest) { optptr->capture_default_str(); auto dstring = optptr->get_default_str(); - EXPECT_TRUE(dstring.empty()); + CHECK(dstring.empty()); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-v", "3"}; run(); - EXPECT_TRUE(opt); - EXPECT_TRUE(*opt == eval::val3); + CHECK(opt); + CHECK(*opt == eval::val3); opt = {}; args = {"--val", "1"}; run(); - EXPECT_TRUE(opt); - EXPECT_TRUE(*opt == eval::val1); + CHECK(opt); + CHECK(*opt == eval::val1); } -TEST_F(TApp, BoostOptionalVector) { +TEST_CASE_METHOD(TApp, "BoostOptionalVector", "[optional]") { boost::optional<std::vector<int>> opt; app.add_option_function<std::vector<int>>( "-v,--vec", [&opt](const std::vector<int> &v) { opt = v; }, "some vector") ->expected(3); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-v", "1", "4", "5"}; run(); - EXPECT_TRUE(opt); + CHECK(opt); std::vector<int> expV{1, 4, 5}; - EXPECT_EQ(*opt, expV); + CHECK(expV == *opt); } -TEST_F(TApp, BoostOptionalVectorEmpty) { +TEST_CASE_METHOD(TApp, "BoostOptionalVectorEmpty", "[optional]") { boost::optional<std::vector<int>> opt; app.add_option<decltype(opt), std::vector<int>>("-v,--vec", opt)->expected(0, 3)->allow_extra_args(); // app.add_option("-v,--vec", opt)->expected(0, 3)->allow_extra_args(); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-v"}; opt = std::vector<int>{4, 3}; run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-v", "1", "4", "5"}; run(); - EXPECT_TRUE(opt); + CHECK(opt); std::vector<int> expV{1, 4, 5}; - EXPECT_EQ(*opt, expV); + CHECK(expV == *opt); } -TEST_F(TApp, BoostOptionalVectorEmptyDirect) { +TEST_CASE_METHOD(TApp, "BoostOptionalVectorEmptyDirect", "[optional]") { boost::optional<std::vector<int>> opt; app.add_option_no_stream("-v,--vec", opt)->expected(0, 3)->allow_extra_args(); // app.add_option("-v,--vec", opt)->expected(0, 3)->allow_extra_args(); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-v"}; opt = std::vector<int>{4, 3}; run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-v", "1", "4", "5"}; run(); - EXPECT_TRUE(opt); + CHECK(opt); std::vector<int> expV{1, 4, 5}; - EXPECT_EQ(*opt, expV); + CHECK(expV == *opt); } -TEST_F(TApp, BoostOptionalComplexDirect) { +TEST_CASE_METHOD(TApp, "BoostOptionalComplexDirect", "[optional]") { boost::optional<std::complex<double>> opt; app.add_option("-c,--complex", opt)->type_size(0, 2); run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c"}; opt = std::complex<double>{4.0, 3.0}; run(); - EXPECT_FALSE(opt); + CHECK(!opt); args = {"-c", "1+2j"}; run(); - EXPECT_TRUE(opt); + CHECK(opt); std::complex<double> val{1, 2}; - EXPECT_EQ(*opt, val); + CHECK(val == *opt); args = {"-c", "3", "-4"}; run(); - EXPECT_TRUE(opt); + CHECK(opt); std::complex<double> val2{3, -4}; - EXPECT_EQ(*opt, val2); + CHECK(val2 == *opt); } #endif - -#if !CLI11_OPTIONAL -TEST_F(TApp, DISABLED_OptionalTest) {} -#endif diff --git a/packages/CLI11/tests/SetTest.cpp b/packages/CLI11/tests/SetTest.cpp index 7b6236edcbb13dfe0eab73200e11fd223daf57bf..d34a67a64f2163a7e1704fbc99a1952188611e37 100644 --- a/packages/CLI11/tests/SetTest.cpp +++ b/packages/CLI11/tests/SetTest.cpp @@ -31,79 +31,79 @@ static_assert(CLI::detail::pair_adaptor<std::vector<std::string>>::value == fals static_assert(CLI::detail::pair_adaptor<std::map<int, int>>::value == true, "Should have pairs"); static_assert(CLI::detail::pair_adaptor<std::vector<std::pair<int, int>>>::value == true, "Should have pairs"); -TEST_F(TApp, SimpleMaps) { +TEST_CASE_METHOD(TApp, "SimpleMaps", "[set]") { int value{0}; std::map<std::string, int> map = {{"one", 1}, {"two", 2}}; auto opt = app.add_option("-s,--set", value)->transform(CLI::Transformer(map)); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } -TEST_F(TApp, StringStringMap) { +TEST_CASE_METHOD(TApp, "StringStringMap", "[set]") { std::string value; std::map<std::string, std::string> map = {{"a", "b"}, {"b", "c"}}; app.add_option("-s,--set", value)->transform(CLI::CheckedTransformer(map)); args = {"-s", "a"}; run(); - EXPECT_EQ(value, "b"); + CHECK("b" == value); args = {"-s", "b"}; run(); - EXPECT_EQ(value, "c"); + CHECK("c" == value); args = {"-s", "c"}; - EXPECT_EQ(value, "c"); + CHECK("c" == value); } -TEST_F(TApp, StringStringMapNoModify) { +TEST_CASE_METHOD(TApp, "StringStringMapNoModify", "[set]") { std::string value; std::map<std::string, std::string> map = {{"a", "b"}, {"b", "c"}}; app.add_option("-s,--set", value)->check(CLI::IsMember(map)); args = {"-s", "a"}; run(); - EXPECT_EQ(value, "a"); + CHECK("a" == value); args = {"-s", "b"}; run(); - EXPECT_EQ(value, "b"); + CHECK("b" == value); args = {"-s", "c"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } enum SimpleEnum { SE_one = 1, SE_two = 2 }; -TEST_F(TApp, EnumMap) { +TEST_CASE_METHOD(TApp, "EnumMap", "[set]") { SimpleEnum value; std::map<std::string, SimpleEnum> map = {{"one", SE_one}, {"two", SE_two}}; auto opt = app.add_option("-s,--set", value)->transform(CLI::Transformer(map)); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, SE_one); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK(SE_one == value); } enum class SimpleEnumC { one = 1, two = 2 }; -TEST_F(TApp, EnumCMap) { +TEST_CASE_METHOD(TApp, "EnumCMap", "[set]") { SimpleEnumC value; std::map<std::string, SimpleEnumC> map = {{"one", SimpleEnumC::one}, {"two", SimpleEnumC::two}}; auto opt = app.add_option("-s,--set", value)->transform(CLI::Transformer(map)); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, SimpleEnumC::one); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK(SimpleEnumC::one == value); } -TEST_F(TApp, structMap) { +TEST_CASE_METHOD(TApp, "structMap", "[set]") { struct tstruct { int val2; double val3; @@ -114,16 +114,16 @@ TEST_F(TApp, structMap) { auto opt = app.add_option("-s,--set", struct_name)->check(CLI::IsMember(map)); args = {"-s", "sone"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(struct_name, "sone"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("sone" == struct_name); args = {"-s", "sthree"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, structMapChange) { +TEST_CASE_METHOD(TApp, "structMapChange", "[set]") { struct tstruct { int val2; double val3; @@ -135,23 +135,23 @@ TEST_F(TApp, structMapChange) { ->transform(CLI::IsMember(map, CLI::ignore_case, CLI::ignore_underscore, CLI::ignore_space)); args = {"-s", "s one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(struct_name, "sone"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("sone" == struct_name); args = {"-s", "sthree"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-s", "S_t_w_o"}; run(); - EXPECT_EQ(struct_name, "stwo"); + CHECK("stwo" == struct_name); args = {"-s", "S two"}; run(); - EXPECT_EQ(struct_name, "stwo"); + CHECK("stwo" == struct_name); } -TEST_F(TApp, structMapNoChange) { +TEST_CASE_METHOD(TApp, "structMapNoChange", "[set]") { struct tstruct { int val2; double val3; @@ -163,24 +163,24 @@ TEST_F(TApp, structMapNoChange) { ->check(CLI::IsMember(map, CLI::ignore_case, CLI::ignore_underscore, CLI::ignore_space)); args = {"-s", "SONE"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(struct_name, "SONE"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("SONE" == struct_name); args = {"-s", "sthree"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-s", "S_t_w_o"}; run(); - EXPECT_EQ(struct_name, "S_t_w_o"); + CHECK("S_t_w_o" == struct_name); args = {"-s", "S two"}; run(); - EXPECT_EQ(struct_name, "S two"); + CHECK("S two" == struct_name); } -TEST_F(TApp, NonCopyableMap) { +TEST_CASE_METHOD(TApp, "NonCopyableMap", "[set]") { std::string map_name; std::map<std::string, std::unique_ptr<double>> map; @@ -189,16 +189,16 @@ TEST_F(TApp, NonCopyableMap) { auto opt = app.add_option("-s,--set", map_name)->check(CLI::IsMember(&map)); args = {"-s", "e1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(map_name, "e1"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("e1" == map_name); args = {"-s", "e45"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, NonCopyableMapWithFunction) { +TEST_CASE_METHOD(TApp, "NonCopyableMapWithFunction", "[set]") { std::string map_name; std::map<std::string, std::unique_ptr<double>> map; @@ -207,16 +207,16 @@ TEST_F(TApp, NonCopyableMapWithFunction) { auto opt = app.add_option("-s,--set", map_name)->transform(CLI::IsMember(&map, CLI::ignore_underscore)); args = {"-s", "e_1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(map_name, "e1"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("e1" == map_name); args = {"-s", "e45"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, NonCopyableMapNonStringMap) { +TEST_CASE_METHOD(TApp, "NonCopyableMapNonStringMap", "[set]") { std::string map_name; std::map<int, std::unique_ptr<double>> map; @@ -225,16 +225,16 @@ TEST_F(TApp, NonCopyableMapNonStringMap) { auto opt = app.add_option("-s,--set", map_name)->check(CLI::IsMember(&map)); args = {"-s", "4"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(map_name, "4"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("4" == map_name); args = {"-s", "e45"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, CopyableMapMove) { +TEST_CASE_METHOD(TApp, "CopyableMapMove", "[set]") { std::string map_name; std::map<int, double> map; @@ -243,162 +243,162 @@ TEST_F(TApp, CopyableMapMove) { auto opt = app.add_option("-s,--set", map_name)->check(CLI::IsMember(std::move(map))); args = {"-s", "4"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(map_name, "4"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("4" == map_name); args = {"-s", "e45"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, SimpleSets) { +TEST_CASE_METHOD(TApp, "SimpleSets", "[set]") { std::string value; auto opt = app.add_option("-s,--set", value)->check(CLI::IsMember{std::set<std::string>({"one", "two", "three"})}); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, "one"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("one" == value); } -TEST_F(TApp, SimpleSetsPtrs) { +TEST_CASE_METHOD(TApp, "SimpleSetsPtrs", "[set]") { auto set = std::shared_ptr<std::set<std::string>>(new std::set<std::string>{"one", "two", "three"}); std::string value; auto opt = app.add_option("-s,--set", value)->check(CLI::IsMember{set}); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, "one"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("one" == value); set->insert("four"); args = {"-s", "four"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, "four"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("four" == value); } -TEST_F(TApp, SimiShortcutSets) { +TEST_CASE_METHOD(TApp, "SimiShortcutSets", "[set]") { std::string value; auto opt = app.add_option("--set", value)->check(CLI::IsMember({"one", "two", "three"})); args = {"--set", "one"}; run(); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, "one"); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("one" == value); std::string value2; auto opt2 = app.add_option("--set2", value2)->transform(CLI::IsMember({"One", "two", "three"}, CLI::ignore_case)); args = {"--set2", "onE"}; run(); - EXPECT_EQ(1u, app.count("--set2")); - EXPECT_EQ(1u, opt2->count()); - EXPECT_EQ(value2, "One"); + CHECK(app.count("--set2") == 1u); + CHECK(opt2->count() == 1u); + CHECK("One" == value2); std::string value3; auto opt3 = app.add_option("--set3", value3) ->transform(CLI::IsMember({"O_ne", "two", "three"}, CLI::ignore_case, CLI::ignore_underscore)); args = {"--set3", "onE"}; run(); - EXPECT_EQ(1u, app.count("--set3")); - EXPECT_EQ(1u, opt3->count()); - EXPECT_EQ(value3, "O_ne"); + CHECK(app.count("--set3") == 1u); + CHECK(opt3->count() == 1u); + CHECK("O_ne" == value3); } -TEST_F(TApp, SetFromCharStarArrayVector) { +TEST_CASE_METHOD(TApp, "SetFromCharStarArrayVector", "[set]") { constexpr const char *names[3]{"one", "two", "three"}; std::string value; auto opt = app.add_option("-s,--set", value) ->check(CLI::IsMember{std::vector<std::string>(std::begin(names), std::end(names))}); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, "one"); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK("one" == value); } -TEST_F(TApp, OtherTypeSets) { +TEST_CASE_METHOD(TApp, "OtherTypeSets", "[set]") { int value{0}; std::vector<int> set = {2, 3, 4}; auto opt = app.add_option("--set", value)->check(CLI::IsMember(set)); args = {"--set", "3"}; run(); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 3); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK(3 == value); args = {"--set", "5"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); std::vector<int> set2 = {-2, 3, 4}; auto opt2 = app.add_option("--set2", value)->transform(CLI::IsMember(set2, [](int x) { return std::abs(x); })); args = {"--set2", "-3"}; run(); - EXPECT_EQ(1u, app.count("--set2")); - EXPECT_EQ(1u, opt2->count()); - EXPECT_EQ(value, 3); + CHECK(app.count("--set2") == 1u); + CHECK(opt2->count() == 1u); + CHECK(3 == value); args = {"--set2", "-3"}; run(); - EXPECT_EQ(1u, app.count("--set2")); - EXPECT_EQ(1u, opt2->count()); - EXPECT_EQ(value, 3); + CHECK(app.count("--set2") == 1u); + CHECK(opt2->count() == 1u); + CHECK(3 == value); args = {"--set2", "2"}; run(); - EXPECT_EQ(1u, app.count("--set2")); - EXPECT_EQ(1u, opt2->count()); - EXPECT_EQ(value, -2); + CHECK(app.count("--set2") == 1u); + CHECK(opt2->count() == 1u); + CHECK(-2 == value); } -TEST_F(TApp, NumericalSets) { +TEST_CASE_METHOD(TApp, "NumericalSets", "[set]") { int value{0}; auto opt = app.add_option("-s,--set", value)->check(CLI::IsMember{std::set<int>({1, 2, 3})}); args = {"-s", "1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, app.count("--set")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(app.count("--set") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } // Converted original set tests -TEST_F(TApp, SetWithDefaults) { +TEST_CASE_METHOD(TApp, "SetWithDefaults", "[set]") { int someint{2}; app.add_option("-a", someint, "", true)->check(CLI::IsMember({1, 2, 3, 4})); args = {"-a1", "-a2"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, SetWithDefaultsConversion) { +TEST_CASE_METHOD(TApp, "SetWithDefaultsConversion", "[set]") { int someint{2}; app.add_option("-a", someint, "", true)->check(CLI::IsMember({1, 2, 3, 4})); args = {"-a", "hi"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, SetWithDefaultsIC) { +TEST_CASE_METHOD(TApp, "SetWithDefaultsIC", "[set]") { std::string someint = "ho"; app.add_option("-a", someint, "", true)->check(CLI::IsMember({"Hi", "Ho"})); args = {"-aHi", "-aHo"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, InSet) { +TEST_CASE_METHOD(TApp, "InSet", "[set]") { std::string choice; app.add_option("-q,--quick", choice)->check(CLI::IsMember({"one", "two", "three"})); @@ -406,47 +406,47 @@ TEST_F(TApp, InSet) { args = {"--quick", "two"}; run(); - EXPECT_EQ("two", choice); + CHECK(choice == "two"); args = {"--quick", "four"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, InSetWithDefault) { +TEST_CASE_METHOD(TApp, "InSetWithDefault", "[set]") { std::string choice = "one"; app.add_option("-q,--quick", choice, "", true)->check(CLI::IsMember({"one", "two", "three"})); run(); - EXPECT_EQ("one", choice); + CHECK(choice == "one"); args = {"--quick", "two"}; run(); - EXPECT_EQ("two", choice); + CHECK(choice == "two"); args = {"--quick", "four"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, InCaselessSetWithDefault) { +TEST_CASE_METHOD(TApp, "InCaselessSetWithDefault", "[set]") { std::string choice = "one"; app.add_option("-q,--quick", choice, "", true)->transform(CLI::IsMember({"one", "two", "three"}, CLI::ignore_case)); run(); - EXPECT_EQ("one", choice); + CHECK(choice == "one"); args = {"--quick", "tWo"}; run(); - EXPECT_EQ("two", choice); + CHECK(choice == "two"); args = {"--quick", "four"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, InIntSet) { +TEST_CASE_METHOD(TApp, "InIntSet", "[set]") { int choice{0}; app.add_option("-q,--quick", choice)->check(CLI::IsMember({1, 2, 3})); @@ -454,13 +454,13 @@ TEST_F(TApp, InIntSet) { args = {"--quick", "2"}; run(); - EXPECT_EQ(2, choice); + CHECK(choice == 2); args = {"--quick", "4"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, InIntSetWindows) { +TEST_CASE_METHOD(TApp, "InIntSetWindows", "[set]") { int choice{0}; app.add_option("-q,--quick", choice)->check(CLI::IsMember({1, 2, 3})); @@ -468,28 +468,28 @@ TEST_F(TApp, InIntSetWindows) { args = {"/q", "2"}; run(); - EXPECT_EQ(2, choice); + CHECK(choice == 2); args = {"/q", "4"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"/q4"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(TApp, FailSet) { +TEST_CASE_METHOD(TApp, "FailSet", "[set]") { int choice{0}; app.add_option("-q,--quick", choice)->check(CLI::IsMember({1, 2, 3})); args = {"--quick", "3", "--quick=2"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); args = {"--quick=hello"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, FailMutableSet) { +TEST_CASE_METHOD(TApp, "FailMutableSet", "[set]") { int choice{0}; auto vals = std::shared_ptr<std::set<int>>(new std::set<int>({1, 2, 3})); @@ -497,37 +497,37 @@ TEST_F(TApp, FailMutableSet) { app.add_option("-s,--slow", choice, "", true)->check(CLI::IsMember(vals)); args = {"--quick=hello"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--slow=hello"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, InSetIgnoreCase) { +TEST_CASE_METHOD(TApp, "InSetIgnoreCase", "[set]") { std::string choice; app.add_option("-q,--quick", choice)->transform(CLI::IsMember({"one", "Two", "THREE"}, CLI::ignore_case)); args = {"--quick", "One"}; run(); - EXPECT_EQ("one", choice); + CHECK(choice == "one"); args = {"--quick", "two"}; run(); - EXPECT_EQ("Two", choice); // Keeps caps from set + CHECK(choice == "Two"); args = {"--quick", "ThrEE"}; run(); - EXPECT_EQ("THREE", choice); // Keeps caps from set + CHECK(choice == "THREE"); args = {"--quick", "four"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--quick=one", "--quick=two"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, InSetIgnoreCaseMutableValue) { +TEST_CASE_METHOD(TApp, "InSetIgnoreCaseMutableValue", "[set]") { std::set<std::string> options{"one", "Two", "THREE"}; std::string choice; @@ -535,22 +535,22 @@ TEST_F(TApp, InSetIgnoreCaseMutableValue) { args = {"--quick", "One"}; run(); - EXPECT_EQ("one", choice); + CHECK(choice == "one"); args = {"--quick", "two"}; run(); - EXPECT_EQ("Two", choice); // Keeps caps from set + CHECK(choice == "Two"); args = {"--quick", "ThrEE"}; run(); - EXPECT_EQ("THREE", choice); // Keeps caps from set + CHECK(choice == "THREE"); options.clear(); args = {"--quick", "ThrEE"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, InSetIgnoreCasePointer) { +TEST_CASE_METHOD(TApp, "InSetIgnoreCasePointer", "[set]") { std::set<std::string> *options = new std::set<std::string>{"one", "Two", "THREE"}; std::string choice; @@ -558,43 +558,43 @@ TEST_F(TApp, InSetIgnoreCasePointer) { args = {"--quick", "One"}; run(); - EXPECT_EQ("one", choice); + CHECK(choice == "one"); args = {"--quick", "two"}; run(); - EXPECT_EQ("Two", choice); // Keeps caps from set + CHECK(choice == "Two"); args = {"--quick", "ThrEE"}; run(); - EXPECT_EQ("THREE", choice); // Keeps caps from set + CHECK(choice == "THREE"); delete options; args = {"--quick", "ThrEE"}; run(); - EXPECT_EQ("THREE", choice); // this does not throw a segfault + CHECK(choice == "THREE"); args = {"--quick", "four"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--quick=one", "--quick=two"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, NotInSetIgnoreCasePointer) { +TEST_CASE_METHOD(TApp, "NotInSetIgnoreCasePointer", "[set]") { std::set<std::string> *options = new std::set<std::string>{"one", "Two", "THREE"}; std::string choice; app.add_option("-q,--quick", choice)->check(!CLI::IsMember(*options, CLI::ignore_case)); args = {"--quick", "One"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--quick", "four"}; run(); - EXPECT_EQ(choice, "four"); + CHECK("four" == choice); } -TEST_F(TApp, InSetIgnoreUnderscore) { +TEST_CASE_METHOD(TApp, "InSetIgnoreUnderscore", "[set]") { std::string choice; app.add_option("-q,--quick", choice) @@ -602,24 +602,24 @@ TEST_F(TApp, InSetIgnoreUnderscore) { args = {"--quick", "option_one"}; run(); - EXPECT_EQ("option_one", choice); + CHECK(choice == "option_one"); args = {"--quick", "optiontwo"}; run(); - EXPECT_EQ("option_two", choice); // Keeps underscore from set + CHECK(choice == "option_two"); args = {"--quick", "_option_thr_ee"}; run(); - EXPECT_EQ("optionthree", choice); // no underscore + CHECK(choice == "optionthree"); args = {"--quick", "Option4"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--quick=option_one", "--quick=option_two"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } -TEST_F(TApp, InSetIgnoreCaseUnderscore) { +TEST_CASE_METHOD(TApp, "InSetIgnoreCaseUnderscore", "[set]") { std::string choice; app.add_option("-q,--quick", choice) @@ -628,25 +628,25 @@ TEST_F(TApp, InSetIgnoreCaseUnderscore) { args = {"--quick", "option_one"}; run(); - EXPECT_EQ("Option_One", choice); + CHECK(choice == "Option_One"); args = {"--quick", "OptionTwo"}; run(); - EXPECT_EQ("option_two", choice); // Keeps underscore and case from set + CHECK(choice == "option_two"); args = {"--quick", "_OPTION_thr_ee"}; run(); - EXPECT_EQ("OptionThree", choice); // no underscore + CHECK(choice == "OptionThree"); args = {"--quick", "Option4"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--quick=option_one", "--quick=option_two"}; - EXPECT_THROW(run(), CLI::ArgumentMismatch); + CHECK_THROWS_AS(run(), CLI::ArgumentMismatch); } // #113 -TEST_F(TApp, AddRemoveSetItems) { +TEST_CASE_METHOD(TApp, "AddRemoveSetItems", "[set]") { std::set<std::string> items{"TYPE1", "TYPE2", "TYPE3", "TYPE4", "TYPE5"}; std::string type1, type2; @@ -656,8 +656,8 @@ TEST_F(TApp, AddRemoveSetItems) { args = {"--type1", "TYPE1", "--type2", "TYPE2"}; run(); - EXPECT_EQ(type1, "TYPE1"); - EXPECT_EQ(type2, "TYPE2"); + CHECK("TYPE1" == type1); + CHECK("TYPE2" == type2); items.insert("TYPE6"); items.insert("TYPE7"); @@ -667,17 +667,17 @@ TEST_F(TApp, AddRemoveSetItems) { args = {"--type1", "TYPE6", "--type2", "TYPE7"}; run(); - EXPECT_EQ(type1, "TYPE6"); - EXPECT_EQ(type2, "TYPE7"); + CHECK("TYPE6" == type1); + CHECK("TYPE7" == type2); args = {"--type1", "TYPE1"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--type2", "TYPE2"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, AddRemoveSetItemsNoCase) { +TEST_CASE_METHOD(TApp, "AddRemoveSetItemsNoCase", "[set]") { std::set<std::string> items{"TYPE1", "TYPE2", "TYPE3", "TYPE4", "TYPE5"}; std::string type1, type2; @@ -687,8 +687,8 @@ TEST_F(TApp, AddRemoveSetItemsNoCase) { args = {"--type1", "TYPe1", "--type2", "TyPE2"}; run(); - EXPECT_EQ(type1, "TYPE1"); - EXPECT_EQ(type2, "TYPE2"); + CHECK("TYPE1" == type1); + CHECK("TYPE2" == type2); items.insert("TYPE6"); items.insert("TYPE7"); @@ -698,12 +698,12 @@ TEST_F(TApp, AddRemoveSetItemsNoCase) { args = {"--type1", "TyPE6", "--type2", "tYPE7"}; run(); - EXPECT_EQ(type1, "TYPE6"); - EXPECT_EQ(type2, "TYPE7"); + CHECK("TYPE6" == type1); + CHECK("TYPE7" == type2); args = {"--type1", "TYPe1"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"--type2", "TYpE2"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } diff --git a/packages/CLI11/tests/SimpleTest.cpp b/packages/CLI11/tests/SimpleTest.cpp index dfcd5579127bf363a04893de6826fbb36ed345bd..92262b5e64ef7f30a65fe50d7a38592d33ac1809 100644 --- a/packages/CLI11/tests/SimpleTest.cpp +++ b/packages/CLI11/tests/SimpleTest.cpp @@ -10,11 +10,11 @@ #include "CLI/CLI.hpp" #endif -#include "gtest/gtest.h" +#include "catch.hpp" using input_t = std::vector<std::string>; -TEST(Basic, Empty) { +TEST_CASE("Basic: Empty", "[simple]") { { CLI::App app; @@ -24,7 +24,7 @@ TEST(Basic, Empty) { { CLI::App app; input_t spare = {"spare"}; - EXPECT_THROW(app.parse(spare), CLI::ExtrasError); + CHECK_THROWS_AS(app.parse(spare), CLI::ExtrasError); } { CLI::App app; diff --git a/packages/CLI11/tests/StringParseTest.cpp b/packages/CLI11/tests/StringParseTest.cpp index 568bb4ecc45105db27fd51df415c70c379016cac..3a9f7371d87bd0412554eb49d2fe559ea060c95c 100644 --- a/packages/CLI11/tests/StringParseTest.cpp +++ b/packages/CLI11/tests/StringParseTest.cpp @@ -6,11 +6,10 @@ #include "app_helper.hpp" -#include "gmock/gmock.h" #include <cstdio> #include <sstream> -TEST_F(TApp, ExistingExeCheck) { +TEST_CASE_METHOD(TApp, "ExistingExeCheck", "[stringparse]") { TempFile tmpexe{"existingExe.out"}; @@ -27,12 +26,12 @@ TEST_F(TApp, ExistingExeCheck) { app.parse(std::string("./") + std::string(tmpexe) + " --string=\"this is my quoted string\" -t 'qstring 2' -m=`\"quoted string\"`", true); - EXPECT_EQ(str, "this is my quoted string"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("this is my quoted string" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); } -TEST_F(TApp, ExistingExeCheckWithSpace) { +TEST_CASE_METHOD(TApp, "ExistingExeCheckWithSpace", "[stringparse]") { TempFile tmpexe{"Space File.out"}; @@ -49,14 +48,14 @@ TEST_F(TApp, ExistingExeCheckWithSpace) { app.parse(std::string("./") + std::string(tmpexe) + " --string=\"this is my quoted string\" -t 'qstring 2' -m=`\"quoted string\"`", true); - EXPECT_EQ(str, "this is my quoted string"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("this is my quoted string" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); - EXPECT_EQ(app.get_name(), std::string("./") + std::string(tmpexe)); + CHECK(std::string("./") + std::string(tmpexe) == app.get_name()); } -TEST_F(TApp, ExistingExeCheckWithLotsOfSpace) { +TEST_CASE_METHOD(TApp, "ExistingExeCheckWithLotsOfSpace", "[stringparse]") { TempFile tmpexe{"this is a weird file.exe"}; @@ -73,9 +72,9 @@ TEST_F(TApp, ExistingExeCheckWithLotsOfSpace) { app.parse(std::string("./") + std::string(tmpexe) + " --string=\"this is my quoted string\" -t 'qstring 2' -m=`\"quoted string\"`", true); - EXPECT_EQ(str, "this is my quoted string"); - EXPECT_EQ(str2, "qstring 2"); - EXPECT_EQ(str3, "\"quoted string\""); + CHECK("this is my quoted string" == str); + CHECK("qstring 2" == str2); + CHECK("\"quoted string\"" == str3); - EXPECT_EQ(app.get_name(), std::string("./") + std::string(tmpexe)); + CHECK(std::string("./") + std::string(tmpexe) == app.get_name()); } diff --git a/packages/CLI11/tests/SubcommandTest.cpp b/packages/CLI11/tests/SubcommandTest.cpp index e6f961345ed1f784f9bb0a02a02899b8a9c4ec1d..eef67bca7d44994678b3534dd8c3d155171a7479 100644 --- a/packages/CLI11/tests/SubcommandTest.cpp +++ b/packages/CLI11/tests/SubcommandTest.cpp @@ -6,59 +6,55 @@ #include "app_helper.hpp" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using ::testing::HasSubstr; -using ::testing::Not; +using Catch::Matchers::Contains; using vs_t = std::vector<std::string>; -TEST_F(TApp, BasicSubcommands) { +TEST_CASE_METHOD(TApp, "BasicSubcommands", "[subcom]") { auto sub1 = app.add_subcommand("sub1"); auto sub2 = app.add_subcommand("sub2"); - EXPECT_EQ(sub1->get_parent(), &app); + CHECK(&app == sub1->get_parent()); - EXPECT_EQ(sub1, app.get_subcommand(sub1)); - EXPECT_EQ(sub1, app.get_subcommand("sub1")); - EXPECT_THROW(app.get_subcommand("sub3"), CLI::OptionNotFound); + CHECK(app.get_subcommand(sub1) == sub1); + CHECK(app.get_subcommand("sub1") == sub1); + CHECK_THROWS_AS(app.get_subcommand("sub3"), CLI::OptionNotFound); run(); - EXPECT_EQ(0u, app.get_subcommands().size()); + CHECK(app.get_subcommands().size() == 0u); args = {"sub1"}; run(); - EXPECT_EQ(sub1, app.get_subcommands().at(0)); - EXPECT_EQ(1u, app.get_subcommands().size()); + CHECK(app.get_subcommands().at(0) == sub1); + CHECK(app.get_subcommands().size() == 1u); app.clear(); - EXPECT_EQ(0u, app.get_subcommands().size()); + CHECK(app.get_subcommands().size() == 0u); args = {"sub2"}; run(); - EXPECT_EQ(1u, app.get_subcommands().size()); - EXPECT_EQ(sub2, app.get_subcommands().at(0)); + CHECK(app.get_subcommands().size() == 1u); + CHECK(app.get_subcommands().at(0) == sub2); args = {"SUb2"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"SUb2"}; try { run(); } catch(const CLI::ExtrasError &e) { - EXPECT_THAT(e.what(), HasSubstr("SUb2")); + CHECK_THAT(e.what(), Contains("SUb2")); } args = {"sub1", "extra"}; try { run(); } catch(const CLI::ExtrasError &e) { - EXPECT_THAT(e.what(), HasSubstr("extra")); + CHECK_THAT(e.what(), Contains("extra")); } } -TEST_F(TApp, MultiSubFallthrough) { +TEST_CASE_METHOD(TApp, "MultiSubFallthrough", "[subcom]") { // No explicit fallthrough auto sub1 = app.add_subcommand("sub1"); @@ -66,15 +62,15 @@ TEST_F(TApp, MultiSubFallthrough) { args = {"sub1", "sub2"}; run(); - EXPECT_TRUE(app.got_subcommand("sub1")); - EXPECT_TRUE(app.got_subcommand(sub1)); - EXPECT_TRUE(*sub1); - EXPECT_TRUE(sub1->parsed()); - EXPECT_EQ(sub1->count(), 1u); + CHECK(app.got_subcommand("sub1")); + CHECK(app.got_subcommand(sub1)); + CHECK(*sub1); + CHECK(sub1->parsed()); + CHECK(1u == sub1->count()); - EXPECT_TRUE(app.got_subcommand("sub2")); - EXPECT_TRUE(app.got_subcommand(sub2)); - EXPECT_TRUE(*sub2); + CHECK(app.got_subcommand("sub2")); + CHECK(app.got_subcommand(sub2)); + CHECK(*sub2); app.require_subcommand(); run(); @@ -83,34 +79,34 @@ TEST_F(TApp, MultiSubFallthrough) { run(); app.require_subcommand(1); - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"sub1"}; run(); - EXPECT_TRUE(app.got_subcommand("sub1")); - EXPECT_FALSE(app.got_subcommand("sub2")); + CHECK(app.got_subcommand("sub1")); + CHECK(!app.got_subcommand("sub2")); - EXPECT_TRUE(*sub1); - EXPECT_FALSE(*sub2); - EXPECT_FALSE(sub2->parsed()); - EXPECT_EQ(sub2->count(), 0u); + CHECK(*sub1); + CHECK(!*sub2); + CHECK(!sub2->parsed()); + CHECK(0u == sub2->count()); - EXPECT_THROW(app.got_subcommand("sub3"), CLI::OptionNotFound); + CHECK_THROWS_AS(app.got_subcommand("sub3"), CLI::OptionNotFound); } -TEST_F(TApp, CrazyNameSubcommand) { +TEST_CASE_METHOD(TApp, "CrazyNameSubcommand", "[subcom]") { auto sub1 = app.add_subcommand("sub1"); // name can be set to whatever - EXPECT_NO_THROW(sub1->name("crazy name with spaces")); + CHECK_NOTHROW(sub1->name("crazy name with spaces")); args = {"crazy name with spaces"}; run(); - EXPECT_TRUE(app.got_subcommand("crazy name with spaces")); - EXPECT_EQ(sub1->count(), 1u); + CHECK(app.got_subcommand("crazy name with spaces")); + CHECK(1u == sub1->count()); } -TEST_F(TApp, RequiredAndSubcommands) { // #23 +TEST_CASE_METHOD(TApp, "RequiredAndSubcommands", "[subcom]") { std::string baz; app.add_option("baz", baz, "Baz Description", true)->required(); @@ -118,26 +114,26 @@ TEST_F(TApp, RequiredAndSubcommands) { // #23 auto bar = app.add_subcommand("bar"); args = {"bar", "foo"}; - ASSERT_NO_THROW(run()); - EXPECT_TRUE(*foo); - EXPECT_FALSE(*bar); - EXPECT_EQ(baz, "bar"); + REQUIRE_NOTHROW(run()); + CHECK(*foo); + CHECK(!*bar); + CHECK("bar" == baz); args = {"foo"}; - ASSERT_NO_THROW(run()); - EXPECT_FALSE(*foo); - EXPECT_EQ(baz, "foo"); + REQUIRE_NOTHROW(run()); + CHECK(!*foo); + CHECK("foo" == baz); args = {"foo", "foo"}; - ASSERT_NO_THROW(run()); - EXPECT_TRUE(*foo); - EXPECT_EQ(baz, "foo"); + REQUIRE_NOTHROW(run()); + CHECK(*foo); + CHECK("foo" == baz); args = {"foo", "other"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(TApp, RequiredAndSubcomFallthrough) { +TEST_CASE_METHOD(TApp, "RequiredAndSubcomFallthrough", "[subcom]") { std::string baz; app.add_option("baz", baz)->required(); @@ -147,14 +143,14 @@ TEST_F(TApp, RequiredAndSubcomFallthrough) { args = {"other", "bar"}; run(); - EXPECT_TRUE(bar); - EXPECT_EQ(baz, "other"); + CHECK(bar); + CHECK("other" == baz); args = {"bar", "other2"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(TApp, FooFooProblem) { +TEST_CASE_METHOD(TApp, "FooFooProblem", "[subcom]") { std::string baz_str, other_str; auto baz = app.add_option("baz", baz_str); @@ -163,55 +159,55 @@ TEST_F(TApp, FooFooProblem) { args = {"foo", "foo"}; run(); - EXPECT_TRUE(*foo); - EXPECT_FALSE(*baz); - EXPECT_TRUE(*other); - EXPECT_EQ(baz_str, ""); - EXPECT_EQ(other_str, "foo"); + CHECK(*foo); + CHECK(!*baz); + CHECK(*other); + CHECK("" == baz_str); + CHECK("foo" == other_str); baz_str = ""; other_str = ""; baz->required(); run(); - EXPECT_TRUE(*foo); - EXPECT_TRUE(*baz); - EXPECT_FALSE(*other); - EXPECT_EQ(baz_str, "foo"); - EXPECT_EQ(other_str, ""); + CHECK(*foo); + CHECK(*baz); + CHECK(!*other); + CHECK("foo" == baz_str); + CHECK("" == other_str); } -TEST_F(TApp, DuplicateSubcommands) { +TEST_CASE_METHOD(TApp, "DuplicateSubcommands", "[subcom]") { auto foo = app.add_subcommand("foo"); args = {"foo", "foo"}; run(); - EXPECT_TRUE(*foo); - EXPECT_EQ(foo->count(), 2u); + CHECK(*foo); + CHECK(2u == foo->count()); args = {"foo", "foo", "foo"}; run(); - EXPECT_TRUE(*foo); - EXPECT_EQ(foo->count(), 3u); + CHECK(*foo); + CHECK(3u == foo->count()); } -TEST_F(TApp, DuplicateSubcommandCallbacks) { +TEST_CASE_METHOD(TApp, "DuplicateSubcommandCallbacks", "[subcom]") { auto foo = app.add_subcommand("foo"); int count{0}; foo->callback([&count]() { ++count; }); foo->immediate_callback(); - EXPECT_TRUE(foo->get_immediate_callback()); + CHECK(foo->get_immediate_callback()); args = {"foo", "foo"}; run(); - EXPECT_EQ(count, 2); + CHECK(2 == count); count = 0; args = {"foo", "foo", "foo"}; run(); - EXPECT_EQ(count, 3); + CHECK(3 == count); } -TEST_F(TApp, DuplicateSubcommandCallbacksValues) { +TEST_CASE_METHOD(TApp, "DuplicateSubcommandCallbacksValues", "[subcom]") { auto foo = app.add_subcommand("foo"); int val{0}; @@ -221,19 +217,19 @@ TEST_F(TApp, DuplicateSubcommandCallbacksValues) { foo->immediate_callback(); args = {"foo", "--val=45", "foo", "--val=27"}; run(); - EXPECT_EQ(vals.size(), 2u); - EXPECT_EQ(vals[0], 45); - EXPECT_EQ(vals[1], 27); + CHECK(2u == vals.size()); + CHECK(45 == vals[0]); + CHECK(27 == vals[1]); vals.clear(); args = {"foo", "--val=45", "foo", "--val=27", "foo", "--val=36"}; run(); - EXPECT_EQ(vals.size(), 3u); - EXPECT_EQ(vals[0], 45); - EXPECT_EQ(vals[1], 27); - EXPECT_EQ(vals[2], 36); + CHECK(3u == vals.size()); + CHECK(45 == vals[0]); + CHECK(27 == vals[1]); + CHECK(36 == vals[2]); } -TEST_F(TApp, Callbacks) { +TEST_CASE_METHOD(TApp, "Callbacks", "[subcom]") { auto sub1 = app.add_subcommand("sub1"); sub1->callback([]() { throw CLI::Success(); }); auto sub2 = app.add_subcommand("sub2"); @@ -241,12 +237,12 @@ TEST_F(TApp, Callbacks) { sub2->callback([&val]() { val = true; }); args = {"sub2"}; - EXPECT_FALSE(val); + CHECK(!val); run(); - EXPECT_TRUE(val); + CHECK(val); } -TEST_F(TApp, CallbackOrder) { +TEST_CASE_METHOD(TApp, "CallbackOrder", "[subcom]") { std::vector<std::string> cb; app.parse_complete_callback([&cb]() { cb.push_back("ac1"); }); @@ -281,18 +277,18 @@ TEST_F(TApp, CallbackOrder) { "--sub2opt2", "val"}; run(); - EXPECT_EQ(cb.size(), 8u); - EXPECT_EQ(cb[0], "pa-13"); - EXPECT_EQ(cb[1], "pc1-10"); - EXPECT_EQ(cb[2], "c1"); - EXPECT_EQ(cb[3], "pc2-6"); - EXPECT_EQ(cb[4], "c1"); - EXPECT_EQ(cb[5], "ac1"); - EXPECT_EQ(cb[6], "c2"); - EXPECT_EQ(cb[7], "ac2"); + CHECK(8u == cb.size()); + CHECK("pa-13" == cb[0]); + CHECK("pc1-10" == cb[1]); + CHECK("c1" == cb[2]); + CHECK("pc2-6" == cb[3]); + CHECK("c1" == cb[4]); + CHECK("ac1" == cb[5]); + CHECK("c2" == cb[6]); + CHECK("ac2" == cb[7]); } -TEST_F(TApp, CallbackOrder2) { +TEST_CASE_METHOD(TApp, "CallbackOrder2", "[subcom]") { std::vector<std::string> cb; app.add_subcommand("sub1")->parse_complete_callback([&cb]() { cb.push_back("sub1"); }); @@ -301,17 +297,17 @@ TEST_F(TApp, CallbackOrder2) { args = {"sub1", "sub2", "sub3", "sub1", "sub1", "sub2", "sub1"}; run(); - EXPECT_EQ(cb.size(), 7u); - EXPECT_EQ(cb[0], "sub1"); - EXPECT_EQ(cb[1], "sub2"); - EXPECT_EQ(cb[2], "sub3"); - EXPECT_EQ(cb[3], "sub1"); - EXPECT_EQ(cb[4], "sub1"); - EXPECT_EQ(cb[5], "sub2"); - EXPECT_EQ(cb[6], "sub1"); + CHECK(7u == cb.size()); + CHECK("sub1" == cb[0]); + CHECK("sub2" == cb[1]); + CHECK("sub3" == cb[2]); + CHECK("sub1" == cb[3]); + CHECK("sub1" == cb[4]); + CHECK("sub2" == cb[5]); + CHECK("sub1" == cb[6]); } -TEST_F(TApp, CallbackOrder2_withFallthrough) { +TEST_CASE_METHOD(TApp, "CallbackOrder2_withFallthrough", "[subcom]") { std::vector<std::string> cb; @@ -321,64 +317,64 @@ TEST_F(TApp, CallbackOrder2_withFallthrough) { args = {"sub1", "sub2", "sub3", "sub1", "sub1", "sub2", "sub1"}; run(); - EXPECT_EQ(cb.size(), 7u); - EXPECT_EQ(cb[0], "sub1"); - EXPECT_EQ(cb[1], "sub2"); - EXPECT_EQ(cb[2], "sub3"); - EXPECT_EQ(cb[3], "sub1"); - EXPECT_EQ(cb[4], "sub1"); - EXPECT_EQ(cb[5], "sub2"); - EXPECT_EQ(cb[6], "sub1"); + CHECK(7u == cb.size()); + CHECK("sub1" == cb[0]); + CHECK("sub2" == cb[1]); + CHECK("sub3" == cb[2]); + CHECK("sub1" == cb[3]); + CHECK("sub1" == cb[4]); + CHECK("sub2" == cb[5]); + CHECK("sub1" == cb[6]); } -TEST_F(TApp, RuntimeErrorInCallback) { +TEST_CASE_METHOD(TApp, "RuntimeErrorInCallback", "[subcom]") { auto sub1 = app.add_subcommand("sub1"); sub1->callback([]() { throw CLI::RuntimeError(); }); auto sub2 = app.add_subcommand("sub2"); sub2->callback([]() { throw CLI::RuntimeError(2); }); args = {"sub1"}; - EXPECT_THROW(run(), CLI::RuntimeError); + CHECK_THROWS_AS(run(), CLI::RuntimeError); args = {"sub1"}; try { run(); } catch(const CLI::RuntimeError &e) { - EXPECT_EQ(1, e.get_exit_code()); + CHECK(e.get_exit_code() == 1); } args = {"sub2"}; - EXPECT_THROW(run(), CLI::RuntimeError); + CHECK_THROWS_AS(run(), CLI::RuntimeError); args = {"sub2"}; try { run(); } catch(const CLI::RuntimeError &e) { - EXPECT_EQ(2, e.get_exit_code()); + CHECK(e.get_exit_code() == 2); } } -TEST_F(TApp, NoFallThroughOpts) { +TEST_CASE_METHOD(TApp, "NoFallThroughOpts", "[subcom]") { int val{1}; app.add_option("--val", val); app.add_subcommand("sub"); args = {"sub", "--val", "2"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(TApp, NoFallThroughPositionals) { +TEST_CASE_METHOD(TApp, "NoFallThroughPositionals", "[subcom]") { int val{1}; app.add_option("val", val); app.add_subcommand("sub"); args = {"sub", "2"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(TApp, NoFallThroughOptsWithTerminator) { +TEST_CASE_METHOD(TApp, "NoFallThroughOptsWithTerminator", "[subcom]") { int val{1}; app.add_option("--val", val); @@ -386,10 +382,10 @@ TEST_F(TApp, NoFallThroughOptsWithTerminator) { args = {"sub", "++", "--val", "2"}; run(); - EXPECT_EQ(val, 2); + CHECK(2 == val); } -TEST_F(TApp, NoFallThroughPositionalsWithTerminator) { +TEST_CASE_METHOD(TApp, "NoFallThroughPositionalsWithTerminator", "[subcom]") { int val{1}; app.add_option("val", val); @@ -397,15 +393,15 @@ TEST_F(TApp, NoFallThroughPositionalsWithTerminator) { args = {"sub", "++", "2"}; run(); - EXPECT_EQ(val, 2); + CHECK(2 == val); // try with positional only mark args = {"sub", "--", "3"}; run(); - EXPECT_EQ(val, 3); + CHECK(3 == val); } -TEST_F(TApp, NamelessSubComPositionals) { +TEST_CASE_METHOD(TApp, "NamelessSubComPositionals", "[subcom]") { auto sub = app.add_subcommand(); int val{1}; @@ -413,21 +409,21 @@ TEST_F(TApp, NamelessSubComPositionals) { args = {"2"}; run(); - EXPECT_EQ(val, 2); + CHECK(2 == val); } -TEST_F(TApp, NamelessSubWithSub) { +TEST_CASE_METHOD(TApp, "NamelessSubWithSub", "[subcom]") { auto sub = app.add_subcommand(); auto subsub = sub->add_subcommand("val"); args = {"val"}; run(); - EXPECT_TRUE(subsub->parsed()); - EXPECT_TRUE(app.got_subcommand("val")); + CHECK(subsub->parsed()); + CHECK(app.got_subcommand("val")); } -TEST_F(TApp, NamelessSubWithMultipleSub) { +TEST_CASE_METHOD(TApp, "NamelessSubWithMultipleSub", "[subcom]") { auto sub1 = app.add_subcommand(); auto sub2 = app.add_subcommand(); @@ -437,33 +433,33 @@ TEST_F(TApp, NamelessSubWithMultipleSub) { auto sub2sub2 = sub2->add_subcommand("val4"); args = {"val1"}; run(); - EXPECT_TRUE(sub1sub1->parsed()); - EXPECT_TRUE(app.got_subcommand("val1")); + CHECK(sub1sub1->parsed()); + CHECK(app.got_subcommand("val1")); args = {"val2"}; run(); - EXPECT_TRUE(sub1sub2->parsed()); - EXPECT_TRUE(app.got_subcommand("val2")); + CHECK(sub1sub2->parsed()); + CHECK(app.got_subcommand("val2")); args = {"val3"}; run(); - EXPECT_TRUE(sub2sub1->parsed()); - EXPECT_TRUE(app.got_subcommand("val3")); + CHECK(sub2sub1->parsed()); + CHECK(app.got_subcommand("val3")); args = {"val4"}; run(); - EXPECT_TRUE(sub2sub2->parsed()); - EXPECT_TRUE(app.got_subcommand("val4")); + CHECK(sub2sub2->parsed()); + CHECK(app.got_subcommand("val4")); args = {"val4", "val1"}; run(); - EXPECT_TRUE(sub2sub2->parsed()); - EXPECT_TRUE(app.got_subcommand("val4")); - EXPECT_TRUE(sub1sub1->parsed()); - EXPECT_TRUE(app.got_subcommand("val1")); + CHECK(sub2sub2->parsed()); + CHECK(app.got_subcommand("val4")); + CHECK(sub1sub1->parsed()); + CHECK(app.got_subcommand("val1")); } -TEST_F(TApp, Nameless4LayerDeep) { +TEST_CASE_METHOD(TApp, "Nameless4LayerDeep", "[subcom]") { auto sub = app.add_subcommand(); auto ssub = sub->add_subcommand(); @@ -474,12 +470,12 @@ TEST_F(TApp, Nameless4LayerDeep) { args = {"val"}; run(); - EXPECT_TRUE(sssssub->parsed()); - EXPECT_TRUE(app.got_subcommand("val")); + CHECK(sssssub->parsed()); + CHECK(app.got_subcommand("val")); } /// Put subcommands in some crazy pattern and make everything still works -TEST_F(TApp, Nameless4LayerDeepMulti) { +TEST_CASE_METHOD(TApp, "Nameless4LayerDeepMulti", "[subcom]") { auto sub1 = app.add_subcommand(); auto sub2 = app.add_subcommand(); @@ -495,31 +491,31 @@ TEST_F(TApp, Nameless4LayerDeepMulti) { sssub2->add_subcommand("val5"); args = {"val1"}; run(); - EXPECT_TRUE(app.got_subcommand("val1")); + CHECK(app.got_subcommand("val1")); args = {"val2"}; run(); - EXPECT_TRUE(app.got_subcommand("val2")); + CHECK(app.got_subcommand("val2")); args = {"val3"}; run(); - EXPECT_TRUE(app.got_subcommand("val3")); + CHECK(app.got_subcommand("val3")); args = {"val4"}; run(); - EXPECT_TRUE(app.got_subcommand("val4")); + CHECK(app.got_subcommand("val4")); args = {"val5"}; run(); - EXPECT_TRUE(app.got_subcommand("val5")); + CHECK(app.got_subcommand("val5")); args = {"val4", "val1", "val5"}; run(); - EXPECT_TRUE(app.got_subcommand("val4")); - EXPECT_TRUE(app.got_subcommand("val1")); - EXPECT_TRUE(app.got_subcommand("val5")); + CHECK(app.got_subcommand("val4")); + CHECK(app.got_subcommand("val1")); + CHECK(app.got_subcommand("val5")); } -TEST_F(TApp, FallThroughRegular) { +TEST_CASE_METHOD(TApp, "FallThroughRegular", "[subcom]") { app.fallthrough(); int val{1}; app.add_option("--val", val); @@ -531,7 +527,7 @@ TEST_F(TApp, FallThroughRegular) { run(); } -TEST_F(TApp, FallThroughShort) { +TEST_CASE_METHOD(TApp, "FallThroughShort", "[subcom]") { app.fallthrough(); int val{1}; app.add_option("-v", val); @@ -543,7 +539,7 @@ TEST_F(TApp, FallThroughShort) { run(); } -TEST_F(TApp, FallThroughPositional) { +TEST_CASE_METHOD(TApp, "FallThroughPositional", "[subcom]") { app.fallthrough(); int val{1}; app.add_option("val", val); @@ -555,7 +551,7 @@ TEST_F(TApp, FallThroughPositional) { run(); } -TEST_F(TApp, FallThroughEquals) { +TEST_CASE_METHOD(TApp, "FallThroughEquals", "[subcom]") { app.fallthrough(); int val{1}; app.add_option("--val", val); @@ -567,7 +563,7 @@ TEST_F(TApp, FallThroughEquals) { run(); } -TEST_F(TApp, EvilParseFallthrough) { +TEST_CASE_METHOD(TApp, "EvilParseFallthrough", "[subcom]") { app.fallthrough(); int val1{0}, val2{0}; app.add_option("--val1", val1); @@ -579,11 +575,11 @@ TEST_F(TApp, EvilParseFallthrough) { // Should not throw run(); - EXPECT_EQ(1, val1); - EXPECT_EQ(2, val2); + CHECK(val1 == 1); + CHECK(val2 == 2); } -TEST_F(TApp, CallbackOrdering) { +TEST_CASE_METHOD(TApp, "CallbackOrdering", "[subcom]") { app.fallthrough(); int val{1}, sub_val{0}; app.add_option("--val", val); @@ -593,16 +589,16 @@ TEST_F(TApp, CallbackOrdering) { args = {"sub", "--val=2"}; run(); - EXPECT_EQ(2, val); - EXPECT_EQ(2, sub_val); + CHECK(val == 2); + CHECK(sub_val == 2); args = {"--val=2", "sub"}; run(); - EXPECT_EQ(2, val); - EXPECT_EQ(2, sub_val); + CHECK(val == 2); + CHECK(sub_val == 2); } -TEST_F(TApp, CallbackOrderingImmediate) { +TEST_CASE_METHOD(TApp, "CallbackOrderingImmediate", "[subcom]") { app.fallthrough(); int val{1}, sub_val{0}; app.add_option("--val", val); @@ -612,16 +608,16 @@ TEST_F(TApp, CallbackOrderingImmediate) { args = {"sub", "--val=2"}; run(); - EXPECT_EQ(2, val); - EXPECT_EQ(1, sub_val); + CHECK(val == 2); + CHECK(sub_val == 1); args = {"--val=2", "sub"}; run(); - EXPECT_EQ(2, val); - EXPECT_EQ(2, sub_val); + CHECK(val == 2); + CHECK(sub_val == 2); } -TEST_F(TApp, CallbackOrderingImmediateMain) { +TEST_CASE_METHOD(TApp, "CallbackOrderingImmediateMain", "[subcom]") { app.fallthrough(); int val{0}, sub_val{0}; @@ -633,24 +629,24 @@ TEST_F(TApp, CallbackOrderingImmediateMain) { app.callback([&val]() { val = 1; }); args = {"sub"}; run(); - EXPECT_EQ(1, val); - EXPECT_EQ(0, sub_val); + CHECK(val == 1); + CHECK(sub_val == 0); // the main app callback should run before the subcommand callbacks app.immediate_callback(); val = 0; // reset value run(); - EXPECT_EQ(2, val); - EXPECT_EQ(1, sub_val); + CHECK(val == 2); + CHECK(sub_val == 1); // the subcommand callback now runs immediately after processing and before the main app callback again sub->immediate_callback(); val = 0; // reset value run(); - EXPECT_EQ(1, val); - EXPECT_EQ(0, sub_val); + CHECK(val == 1); + CHECK(sub_val == 0); } // Test based on issue #308 -TEST_F(TApp, CallbackOrderingImmediateModeOrder) { +TEST_CASE_METHOD(TApp, "CallbackOrderingImmediateModeOrder", "[subcom]") { app.require_subcommand(1, 1); std::vector<int> v; @@ -660,81 +656,81 @@ TEST_F(TApp, CallbackOrderingImmediateModeOrder) { args = {"hello"}; run(); // immediate_callback inherited - ASSERT_EQ(v.size(), 2u); - EXPECT_EQ(v[0], 1); - EXPECT_EQ(v[1], 2); + REQUIRE(2u == v.size()); + CHECK(1 == v[0]); + CHECK(2 == v[1]); v.clear(); sub->immediate_callback(true); run(); // immediate_callback is now triggered for the main first - ASSERT_EQ(v.size(), 2u); - EXPECT_EQ(v[0], 2); - EXPECT_EQ(v[1], 1); + REQUIRE(2u == v.size()); + CHECK(2 == v[0]); + CHECK(1 == v[1]); } -TEST_F(TApp, RequiredSubCom) { +TEST_CASE_METHOD(TApp, "RequiredSubCom", "[subcom]") { app.add_subcommand("sub1"); app.add_subcommand("sub2"); app.require_subcommand(); - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"sub1"}; run(); } -TEST_F(TApp, SubComExtras) { +TEST_CASE_METHOD(TApp, "SubComExtras", "[subcom]") { app.allow_extras(); auto sub = app.add_subcommand("sub"); args = {"extra", "sub"}; run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"extra"})); - EXPECT_EQ(sub->remaining(), std::vector<std::string>()); + CHECK(std::vector<std::string>({"extra"}) == app.remaining()); + CHECK(std::vector<std::string>() == sub->remaining()); args = {"extra1", "extra2", "sub"}; run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"extra1", "extra2"})); - EXPECT_EQ(sub->remaining(), std::vector<std::string>()); + CHECK(std::vector<std::string>({"extra1", "extra2"}) == app.remaining()); + CHECK(std::vector<std::string>() == sub->remaining()); args = {"sub", "extra1", "extra2"}; run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>()); - EXPECT_EQ(sub->remaining(), std::vector<std::string>({"extra1", "extra2"})); + CHECK(std::vector<std::string>() == app.remaining()); + CHECK(std::vector<std::string>({"extra1", "extra2"}) == sub->remaining()); args = {"extra1", "extra2", "sub", "extra3", "extra4"}; run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"extra1", "extra2"})); - EXPECT_EQ(app.remaining(true), std::vector<std::string>({"extra1", "extra2", "extra3", "extra4"})); - EXPECT_EQ(sub->remaining(), std::vector<std::string>({"extra3", "extra4"})); + CHECK(std::vector<std::string>({"extra1", "extra2"}) == app.remaining()); + CHECK(std::vector<std::string>({"extra1", "extra2", "extra3", "extra4"}) == app.remaining(true)); + CHECK(std::vector<std::string>({"extra3", "extra4"}) == sub->remaining()); } -TEST_F(TApp, Required1SubCom) { +TEST_CASE_METHOD(TApp, "Required1SubCom", "[subcom]") { app.require_subcommand(1); app.add_subcommand("sub1"); app.add_subcommand("sub2"); app.add_subcommand("sub3"); - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); args = {"sub1"}; run(); args = {"sub1", "sub2"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(TApp, BadSubcommandSearch) { +TEST_CASE_METHOD(TApp, "BadSubcommandSearch", "[subcom]") { auto one = app.add_subcommand("one"); auto two = one->add_subcommand("two"); - EXPECT_THROW(app.get_subcommand(two), CLI::OptionNotFound); - EXPECT_THROW(app.get_subcommand_ptr(two), CLI::OptionNotFound); + CHECK_THROWS_AS(app.get_subcommand(two), CLI::OptionNotFound); + CHECK_THROWS_AS(app.get_subcommand_ptr(two), CLI::OptionNotFound); } -TEST_F(TApp, PrefixProgram) { +TEST_CASE_METHOD(TApp, "PrefixProgram", "[subcom]") { app.prefix_command(); @@ -743,10 +739,10 @@ TEST_F(TApp, PrefixProgram) { args = {"--simple", "other", "--simple", "--mine"}; run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"other", "--simple", "--mine"})); + CHECK(std::vector<std::string>({"other", "--simple", "--mine"}) == app.remaining()); } -TEST_F(TApp, PrefixNoSeparation) { +TEST_CASE_METHOD(TApp, "PrefixNoSeparation", "[subcom]") { app.prefix_command(); @@ -755,10 +751,10 @@ TEST_F(TApp, PrefixNoSeparation) { args = {"--vals", "1", "2", "3", "other"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, PrefixSeparation) { +TEST_CASE_METHOD(TApp, "PrefixSeparation", "[subcom]") { app.prefix_command(); @@ -769,11 +765,11 @@ TEST_F(TApp, PrefixSeparation) { run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"other"})); - EXPECT_EQ(vals, std::vector<int>({1, 2, 3})); + CHECK(std::vector<std::string>({"other"}) == app.remaining()); + CHECK(std::vector<int>({1, 2, 3}) == vals); } -TEST_F(TApp, PrefixSubcom) { +TEST_CASE_METHOD(TApp, "PrefixSubcom", "[subcom]") { auto subc = app.add_subcommand("subc"); subc->prefix_command(); @@ -782,19 +778,19 @@ TEST_F(TApp, PrefixSubcom) { args = {"--simple", "subc", "other", "--simple", "--mine"}; run(); - EXPECT_EQ(app.remaining_size(), 0u); - EXPECT_EQ(app.remaining_size(true), 3u); - EXPECT_EQ(subc->remaining(), std::vector<std::string>({"other", "--simple", "--mine"})); + CHECK(0u == app.remaining_size()); + CHECK(3u == app.remaining_size(true)); + CHECK(std::vector<std::string>({"other", "--simple", "--mine"}) == subc->remaining()); } -TEST_F(TApp, InheritHelpAllFlag) { +TEST_CASE_METHOD(TApp, "InheritHelpAllFlag", "[subcom]") { app.set_help_all_flag("--help-all"); auto subc = app.add_subcommand("subc"); auto help_opt_list = subc->get_options([](const CLI::Option *opt) { return opt->get_name() == "--help-all"; }); - EXPECT_EQ(help_opt_list.size(), 1u); + CHECK(1u == help_opt_list.size()); } -TEST_F(TApp, RequiredPosInSubcommand) { +TEST_CASE_METHOD(TApp, "RequiredPosInSubcommand", "[subcom]") { app.require_subcommand(); std::string bar; @@ -806,13 +802,13 @@ TEST_F(TApp, RequiredPosInSubcommand) { args = {"foo", "abc"}; run(); - EXPECT_EQ(bar, "abc"); + CHECK("abc" == bar); args = {"baz", "cba"}; run(); - EXPECT_EQ(bar, "cba"); + CHECK("cba" == bar); args = {}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } struct SubcommandProgram : public TApp { @@ -839,54 +835,54 @@ struct SubcommandProgram : public TApp { } }; -TEST_F(SubcommandProgram, Working) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand Working", "[subcom]") { args = {"-d", "start", "-ffilename"}; run(); - EXPECT_EQ(1, dummy); - EXPECT_EQ(start, app.get_subcommands().at(0)); - EXPECT_EQ("filename", file); + CHECK(dummy == 1); + CHECK(app.get_subcommands().at(0) == start); + CHECK(file == "filename"); } -TEST_F(SubcommandProgram, Spare) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand Spare", "[subcom]") { args = {"extra", "-d", "start", "-ffilename"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(SubcommandProgram, SpareSub) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand SpareSub", "[subcom]") { args = {"-d", "start", "spare", "-ffilename"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(SubcommandProgram, Multiple) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand Multiple", "[subcom]") { args = {"-d", "start", "-ffilename", "stop"}; run(); - EXPECT_EQ(2u, app.get_subcommands().size()); - EXPECT_EQ(1, dummy); - EXPECT_EQ("filename", file); + CHECK(app.get_subcommands().size() == 2u); + CHECK(dummy == 1); + CHECK(file == "filename"); } -TEST_F(SubcommandProgram, MultipleOtherOrder) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand MultipleOtherOrder", "[subcom]") { args = {"start", "-d", "-ffilename", "stop"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(SubcommandProgram, MultipleArgs) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand MultipleArgs", "[subcom]") { args = {"start", "stop"}; run(); - EXPECT_EQ(2u, app.get_subcommands().size()); + CHECK(app.get_subcommands().size() == 2u); } -TEST_F(SubcommandProgram, CaseCheck) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand CaseCheck", "[subcom]") { args = {"Start"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"start"}; run(); @@ -898,32 +894,32 @@ TEST_F(SubcommandProgram, CaseCheck) { run(); } -TEST_F(TApp, SubcomInheritCaseCheck) { +TEST_CASE_METHOD(TApp, "SubcomInheritCaseCheck", "[subcom]") { app.ignore_case(); auto sub1 = app.add_subcommand("sub1"); auto sub2 = app.add_subcommand("sub2"); run(); - EXPECT_EQ(0u, app.get_subcommands().size()); - EXPECT_EQ(2u, app.get_subcommands({}).size()); - EXPECT_EQ(1u, app.get_subcommands([](const CLI::App *s) { return s->get_name() == "sub1"; }).size()); + CHECK(app.get_subcommands().size() == 0u); + CHECK(app.get_subcommands({}).size() == 2u); + CHECK(app.get_subcommands([](const CLI::App *s) { return s->get_name() == "sub1"; }).size() == 1u); args = {"SuB1"}; run(); - EXPECT_EQ(sub1, app.get_subcommands().at(0)); - EXPECT_EQ(1u, app.get_subcommands().size()); + CHECK(app.get_subcommands().at(0) == sub1); + CHECK(app.get_subcommands().size() == 1u); app.clear(); - EXPECT_EQ(0u, app.get_subcommands().size()); + CHECK(app.get_subcommands().size() == 0u); args = {"sUb2"}; run(); - EXPECT_EQ(sub2, app.get_subcommands().at(0)); + CHECK(app.get_subcommands().at(0) == sub2); } -TEST_F(SubcommandProgram, UnderscoreCheck) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand UnderscoreCheck", "[subcom]") { args = {"start_"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"start"}; run(); @@ -935,54 +931,54 @@ TEST_F(SubcommandProgram, UnderscoreCheck) { run(); } -TEST_F(TApp, SubcomInheritUnderscoreCheck) { +TEST_CASE_METHOD(TApp, "SubcomInheritUnderscoreCheck", "[subcom]") { app.ignore_underscore(); auto sub1 = app.add_subcommand("sub_option1"); auto sub2 = app.add_subcommand("sub_option2"); run(); - EXPECT_EQ(0u, app.get_subcommands().size()); - EXPECT_EQ(2u, app.get_subcommands({}).size()); - EXPECT_EQ(1u, app.get_subcommands([](const CLI::App *s) { return s->get_name() == "sub_option1"; }).size()); + CHECK(app.get_subcommands().size() == 0u); + CHECK(app.get_subcommands({}).size() == 2u); + CHECK(app.get_subcommands([](const CLI::App *s) { return s->get_name() == "sub_option1"; }).size() == 1u); args = {"suboption1"}; run(); - EXPECT_EQ(sub1, app.get_subcommands().at(0)); - EXPECT_EQ(1u, app.get_subcommands().size()); + CHECK(app.get_subcommands().at(0) == sub1); + CHECK(app.get_subcommands().size() == 1u); app.clear(); - EXPECT_EQ(0u, app.get_subcommands().size()); + CHECK(app.get_subcommands().size() == 0u); args = {"_suboption2"}; run(); - EXPECT_EQ(sub2, app.get_subcommands().at(0)); + CHECK(app.get_subcommands().at(0) == sub2); } -TEST_F(SubcommandProgram, HelpOrder) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand HelpOrder", "[subcom]") { args = {"-h"}; - EXPECT_THROW(run(), CLI::CallForHelp); + CHECK_THROWS_AS(run(), CLI::CallForHelp); args = {"start", "-h"}; - EXPECT_THROW(run(), CLI::CallForHelp); + CHECK_THROWS_AS(run(), CLI::CallForHelp); args = {"-h", "start"}; - EXPECT_THROW(run(), CLI::CallForHelp); + CHECK_THROWS_AS(run(), CLI::CallForHelp); } -TEST_F(SubcommandProgram, HelpAllOrder) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand HelpAllOrder", "[subcom]") { args = {"--help-all"}; - EXPECT_THROW(run(), CLI::CallForAllHelp); + CHECK_THROWS_AS(run(), CLI::CallForAllHelp); args = {"start", "--help-all"}; - EXPECT_THROW(run(), CLI::CallForAllHelp); + CHECK_THROWS_AS(run(), CLI::CallForAllHelp); args = {"--help-all", "start"}; - EXPECT_THROW(run(), CLI::CallForAllHelp); + CHECK_THROWS_AS(run(), CLI::CallForAllHelp); } -TEST_F(SubcommandProgram, Callbacks) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand Callbacks", "[subcom]") { start->callback([]() { throw CLI::Success(); }); @@ -990,63 +986,63 @@ TEST_F(SubcommandProgram, Callbacks) { args = {"start"}; - EXPECT_THROW(run(), CLI::Success); + CHECK_THROWS_AS(run(), CLI::Success); } -TEST_F(SubcommandProgram, Groups) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand Groups", "[subcom]") { std::string help = app.help(); - EXPECT_THAT(help, Not(HasSubstr("More Commands:"))); - EXPECT_THAT(help, HasSubstr("Subcommands:")); + CHECK_THAT(help, !Contains("More Commands:")); + CHECK_THAT(help, Contains("Subcommands:")); start->group("More Commands"); help = app.help(); - EXPECT_THAT(help, HasSubstr("More Commands:")); - EXPECT_THAT(help, HasSubstr("Subcommands:")); + CHECK_THAT(help, Contains("More Commands:")); + CHECK_THAT(help, Contains("Subcommands:")); // Case is ignored but for the first subcommand in a group. stop->group("more commands"); help = app.help(); - EXPECT_THAT(help, HasSubstr("More Commands:")); - EXPECT_THAT(help, Not(HasSubstr("Subcommands:"))); + CHECK_THAT(help, Contains("More Commands:")); + CHECK_THAT(help, !Contains("Subcommands:")); } -TEST_F(SubcommandProgram, ExtrasErrors) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand ExtrasErrors", "[subcom]") { args = {"one", "two", "start", "three", "four"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"start", "three", "four"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"one", "two"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(SubcommandProgram, OrderedExtras) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand OrderedExtras", "[subcom]") { app.allow_extras(); args = {"one", "two", "start", "three", "four"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); start->allow_extras(); run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"one", "two"})); - EXPECT_EQ(start->remaining(), std::vector<std::string>({"three", "four"})); - EXPECT_EQ(app.remaining(true), std::vector<std::string>({"one", "two", "three", "four"})); + CHECK(std::vector<std::string>({"one", "two"}) == app.remaining()); + CHECK(std::vector<std::string>({"three", "four"}) == start->remaining()); + CHECK(std::vector<std::string>({"one", "two", "three", "four"}) == app.remaining(true)); args = {"one", "two", "start", "three", "--", "four"}; run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"one", "two", "four"})); - EXPECT_EQ(start->remaining(), std::vector<std::string>({"three"})); - EXPECT_EQ(app.remaining(true), std::vector<std::string>({"one", "two", "four", "three"})); + CHECK(std::vector<std::string>({"one", "two", "four"}) == app.remaining()); + CHECK(std::vector<std::string>({"three"}) == start->remaining()); + CHECK(std::vector<std::string>({"one", "two", "four", "three"}) == app.remaining(true)); } -TEST_F(SubcommandProgram, MixedOrderExtras) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand MixedOrderExtras", "[subcom]") { app.allow_extras(); start->allow_extras(); @@ -1055,50 +1051,50 @@ TEST_F(SubcommandProgram, MixedOrderExtras) { args = {"one", "two", "start", "three", "four", "stop", "five", "six"}; run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"one", "two"})); - EXPECT_EQ(start->remaining(), std::vector<std::string>({"three", "four"})); - EXPECT_EQ(stop->remaining(), std::vector<std::string>({"five", "six"})); - EXPECT_EQ(app.remaining(true), std::vector<std::string>({"one", "two", "three", "four", "five", "six"})); + CHECK(std::vector<std::string>({"one", "two"}) == app.remaining()); + CHECK(std::vector<std::string>({"three", "four"}) == start->remaining()); + CHECK(std::vector<std::string>({"five", "six"}) == stop->remaining()); + CHECK(std::vector<std::string>({"one", "two", "three", "four", "five", "six"}) == app.remaining(true)); args = {"one", "two", "stop", "three", "four", "start", "five", "six"}; run(); - EXPECT_EQ(app.remaining(), std::vector<std::string>({"one", "two"})); - EXPECT_EQ(stop->remaining(), std::vector<std::string>({"three", "four"})); - EXPECT_EQ(start->remaining(), std::vector<std::string>({"five", "six"})); - EXPECT_EQ(app.remaining(true), std::vector<std::string>({"one", "two", "three", "four", "five", "six"})); + CHECK(std::vector<std::string>({"one", "two"}) == app.remaining()); + CHECK(std::vector<std::string>({"three", "four"}) == stop->remaining()); + CHECK(std::vector<std::string>({"five", "six"}) == start->remaining()); + CHECK(std::vector<std::string>({"one", "two", "three", "four", "five", "six"}) == app.remaining(true)); } -TEST_F(SubcommandProgram, CallbackOrder) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand CallbackOrder", "[subcom]") { std::vector<int> callback_order; start->callback([&callback_order]() { callback_order.push_back(1); }); stop->callback([&callback_order]() { callback_order.push_back(2); }); args = {"start", "stop"}; run(); - EXPECT_EQ(callback_order, std::vector<int>({1, 2})); + CHECK(std::vector<int>({1, 2}) == callback_order); callback_order.clear(); args = {"stop", "start"}; run(); - EXPECT_EQ(callback_order, std::vector<int>({2, 1})); + CHECK(std::vector<int>({2, 1}) == callback_order); } -TEST_F(SubcommandProgram, CallbackOrderImmediate) { +TEST_CASE_METHOD(SubcommandProgram, "Subcommand CallbackOrderImmediate", "[subcom]") { std::vector<int> callback_order; start->callback([&callback_order]() { callback_order.push_back(1); })->immediate_callback(); stop->callback([&callback_order]() { callback_order.push_back(2); }); args = {"start", "stop", "start"}; run(); - EXPECT_EQ(callback_order, std::vector<int>({1, 1, 2})); + CHECK(std::vector<int>({1, 1, 2}) == callback_order); callback_order.clear(); args = {"stop", "start", "stop", "start"}; run(); - EXPECT_EQ(callback_order, std::vector<int>({1, 1, 2})); + CHECK(std::vector<int>({1, 1, 2}) == callback_order); } struct ManySubcommands : public TApp { @@ -1121,137 +1117,137 @@ struct ManySubcommands : public TApp { ManySubcommands &operator=(const ManySubcommands &) = delete; }; -TEST_F(ManySubcommands, Required1Exact) { +TEST_CASE_METHOD(ManySubcommands, "Required1Exact", "[subcom]") { app.require_subcommand(1); run(); - EXPECT_EQ(sub1->remaining(), vs_t({"sub2", "sub3"})); - EXPECT_EQ(app.remaining(true), vs_t({"sub2", "sub3"})); + CHECK(vs_t({"sub2", "sub3"}) == sub1->remaining()); + CHECK(vs_t({"sub2", "sub3"}) == app.remaining(true)); } -TEST_F(ManySubcommands, Required2Exact) { +TEST_CASE_METHOD(ManySubcommands, "Required2Exact", "[subcom]") { app.require_subcommand(2); run(); - EXPECT_EQ(sub2->remaining(), vs_t({"sub3"})); + CHECK(vs_t({"sub3"}) == sub2->remaining()); } -TEST_F(ManySubcommands, Required4Failure) { +TEST_CASE_METHOD(ManySubcommands, "Required4Failure", "[subcom]") { app.require_subcommand(4); - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } -TEST_F(ManySubcommands, RemoveSub) { +TEST_CASE_METHOD(ManySubcommands, "RemoveSub", "[subcom]") { run(); - EXPECT_EQ(app.remaining_size(true), 0u); + CHECK(0u == app.remaining_size(true)); app.remove_subcommand(sub1); app.allow_extras(); run(); - EXPECT_EQ(app.remaining_size(true), 1u); + CHECK(1u == app.remaining_size(true)); } -TEST_F(ManySubcommands, RemoveSubFail) { +TEST_CASE_METHOD(ManySubcommands, "RemoveSubFail", "[subcom]") { auto sub_sub = sub1->add_subcommand("subsub"); - EXPECT_FALSE(app.remove_subcommand(sub_sub)); - EXPECT_TRUE(sub1->remove_subcommand(sub_sub)); - EXPECT_FALSE(app.remove_subcommand(nullptr)); + CHECK(!app.remove_subcommand(sub_sub)); + CHECK(sub1->remove_subcommand(sub_sub)); + CHECK(!app.remove_subcommand(nullptr)); } -TEST_F(ManySubcommands, manyIndexQuery) { +TEST_CASE_METHOD(ManySubcommands, "manyIndexQuery", "[subcom]") { auto s1 = app.get_subcommand(0); auto s2 = app.get_subcommand(1); auto s3 = app.get_subcommand(2); auto s4 = app.get_subcommand(3); - EXPECT_EQ(s1, sub1); - EXPECT_EQ(s2, sub2); - EXPECT_EQ(s3, sub3); - EXPECT_EQ(s4, sub4); - EXPECT_THROW(app.get_subcommand(4), CLI::OptionNotFound); + CHECK(sub1 == s1); + CHECK(sub2 == s2); + CHECK(sub3 == s3); + CHECK(sub4 == s4); + CHECK_THROWS_AS(app.get_subcommand(4), CLI::OptionNotFound); auto s0 = app.get_subcommand(); - EXPECT_EQ(s0, sub1); + CHECK(sub1 == s0); } -TEST_F(ManySubcommands, manyIndexQueryPtr) { +TEST_CASE_METHOD(ManySubcommands, "manyIndexQueryPtr", "[subcom]") { auto s1 = app.get_subcommand_ptr(0); auto s2 = app.get_subcommand_ptr(1); auto s3 = app.get_subcommand_ptr(2); auto s4 = app.get_subcommand_ptr(3); - EXPECT_EQ(s1.get(), sub1); - EXPECT_EQ(s2.get(), sub2); - EXPECT_EQ(s3.get(), sub3); - EXPECT_EQ(s4.get(), sub4); - EXPECT_THROW(app.get_subcommand_ptr(4), CLI::OptionNotFound); + CHECK(sub1 == s1.get()); + CHECK(sub2 == s2.get()); + CHECK(sub3 == s3.get()); + CHECK(sub4 == s4.get()); + CHECK_THROWS_AS(app.get_subcommand_ptr(4), CLI::OptionNotFound); } -TEST_F(ManySubcommands, Required1Fuzzy) { +TEST_CASE_METHOD(ManySubcommands, "Required1Fuzzy", "[subcom]") { app.require_subcommand(0, 1); run(); - EXPECT_EQ(sub1->remaining(), vs_t({"sub2", "sub3"})); + CHECK(vs_t({"sub2", "sub3"}) == sub1->remaining()); app.require_subcommand(-1); run(); - EXPECT_EQ(sub1->remaining(), vs_t({"sub2", "sub3"})); + CHECK(vs_t({"sub2", "sub3"}) == sub1->remaining()); } -TEST_F(ManySubcommands, Required2Fuzzy) { +TEST_CASE_METHOD(ManySubcommands, "Required2Fuzzy", "[subcom]") { app.require_subcommand(0, 2); run(); - EXPECT_EQ(sub2->remaining(), vs_t({"sub3"})); - EXPECT_EQ(app.remaining(true), vs_t({"sub3"})); + CHECK(vs_t({"sub3"}) == sub2->remaining()); + CHECK(vs_t({"sub3"}) == app.remaining(true)); app.require_subcommand(-2); run(); - EXPECT_EQ(sub2->remaining(), vs_t({"sub3"})); + CHECK(vs_t({"sub3"}) == sub2->remaining()); } -TEST_F(ManySubcommands, Unlimited) { +TEST_CASE_METHOD(ManySubcommands, "Unlimited", "[subcom]") { run(); - EXPECT_EQ(app.remaining(true), vs_t()); + CHECK(vs_t() == app.remaining(true)); app.require_subcommand(); run(); - EXPECT_EQ(app.remaining(true), vs_t()); + CHECK(vs_t() == app.remaining(true)); app.require_subcommand(2, 0); // 2 or more run(); - EXPECT_EQ(app.remaining(true), vs_t()); + CHECK(vs_t() == app.remaining(true)); } -TEST_F(ManySubcommands, HelpFlags) { +TEST_CASE_METHOD(ManySubcommands, "HelpFlags", "[subcom]") { args = {"-h"}; - EXPECT_THROW(run(), CLI::CallForHelp); + CHECK_THROWS_AS(run(), CLI::CallForHelp); args = {"sub2", "-h"}; - EXPECT_THROW(run(), CLI::CallForHelp); + CHECK_THROWS_AS(run(), CLI::CallForHelp); args = {"-h", "sub2"}; - EXPECT_THROW(run(), CLI::CallForHelp); + CHECK_THROWS_AS(run(), CLI::CallForHelp); } -TEST_F(ManySubcommands, MaxCommands) { +TEST_CASE_METHOD(ManySubcommands, "MaxCommands", "[subcom]") { app.require_subcommand(2); args = {"sub1", "sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); // The extra subcommand counts as an extra args = {"sub1", "sub2", "sub3"}; - EXPECT_NO_THROW(run()); - EXPECT_EQ(sub2->remaining().size(), 1u); - EXPECT_EQ(app.count_all(), 2u); + CHECK_NOTHROW(run()); + CHECK(1u == sub2->remaining().size()); + CHECK(2u == app.count_all()); // Currently, setting sub2 to throw causes an extras error // In the future, would passing on up to app's extras be better? @@ -1262,31 +1258,31 @@ TEST_F(ManySubcommands, MaxCommands) { args = {"sub1", "sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub1", "sub2", "sub3"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(ManySubcommands, SubcommandExclusion) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandExclusion", "[subcom]") { sub1->excludes(sub3); sub2->excludes(sub3); args = {"sub1", "sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub1", "sub2", "sub3"}; - EXPECT_THROW(run(), CLI::ExcludesError); + CHECK_THROWS_AS(run(), CLI::ExcludesError); args = {"sub1", "sub2", "sub4"}; - EXPECT_NO_THROW(run()); - EXPECT_EQ(app.count_all(), 3u); + CHECK_NOTHROW(run()); + CHECK(3u == app.count_all()); args = {"sub3", "sub4"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(ManySubcommands, SubcommandOptionExclusion) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandOptionExclusion", "[subcom]") { auto excluder_flag = app.add_flag("--exclude"); sub1->excludes(excluder_flag)->fallthrough(); @@ -1294,78 +1290,78 @@ TEST_F(ManySubcommands, SubcommandOptionExclusion) { sub3->fallthrough(); sub4->fallthrough(); args = {"sub3", "sub4", "--exclude"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub1", "sub3", "--exclude"}; - EXPECT_THROW(run(), CLI::ExcludesError); - EXPECT_TRUE(sub1->remove_excludes(excluder_flag)); - EXPECT_NO_THROW(run()); - EXPECT_FALSE(sub1->remove_excludes(excluder_flag)); + CHECK_THROWS_AS(run(), CLI::ExcludesError); + CHECK(sub1->remove_excludes(excluder_flag)); + CHECK_NOTHROW(run()); + CHECK(!sub1->remove_excludes(excluder_flag)); args = {"--exclude", "sub2", "sub4"}; - EXPECT_THROW(run(), CLI::ExcludesError); - EXPECT_EQ(sub1->excludes(excluder_flag), sub1); + CHECK_THROWS_AS(run(), CLI::ExcludesError); + CHECK(sub1 == sub1->excludes(excluder_flag)); args = {"sub1", "--exclude", "sub2", "sub4"}; try { run(); } catch(const CLI::ExcludesError &ee) { - EXPECT_NE(std::string(ee.what()).find("sub1"), std::string::npos); + CHECK(std::string::npos != std::string(ee.what()).find("sub1")); } } -TEST_F(ManySubcommands, SubcommandNeeds) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandNeeds", "[subcom]") { sub1->needs(sub2); args = {"sub1", "sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub1"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); sub1->needs(sub3); args = {"sub1", "sub2", "sub3"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub1", "sub2", "sub4"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"sub1", "sub2", "sub4"}; sub1->remove_needs(sub3); - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(ManySubcommands, SubcommandNeedsOptions) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandNeedsOptions", "[subcom]") { auto opt = app.add_flag("--subactive"); sub1->needs(opt); sub1->fallthrough(); args = {"sub1", "--subactive"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub1"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--subactive"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); auto opt2 = app.add_flag("--subactive2"); sub1->needs(opt2); args = {"sub1", "--subactive"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); args = {"--subactive", "--subactive2", "sub1"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); sub1->remove_needs(opt2); args = {"sub1", "--subactive"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(ManySubcommands, SubcommandNeedsOptionsCallbackOrdering) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandNeedsOptionsCallbackOrdering", "[subcom]") { int count{0}; auto opt = app.add_flag("--subactive"); app.add_flag("--flag1"); @@ -1373,135 +1369,135 @@ TEST_F(ManySubcommands, SubcommandNeedsOptionsCallbackOrdering) { sub1->fallthrough(); sub1->parse_complete_callback([&count]() { ++count; }); args = {"sub1", "--flag1", "sub1", "--subactive"}; - EXPECT_THROW(run(), CLI::RequiresError); + CHECK_THROWS_AS(run(), CLI::RequiresError); // the subcommand has to pass validation by the first callback sub1->immediate_callback(false); // now since the callback executes after - EXPECT_NO_THROW(run()); - EXPECT_EQ(count, 1); + CHECK_NOTHROW(run()); + CHECK(1 == count); sub1->immediate_callback(); args = {"--subactive", "sub1"}; // now the required is processed first - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(ManySubcommands, SubcommandNeedsFail) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandNeedsFail", "[subcom]") { auto opt = app.add_flag("--subactive"); auto opt2 = app.add_flag("--dummy"); sub1->needs(opt); - EXPECT_THROW(sub1->needs((CLI::Option *)nullptr), CLI::OptionNotFound); - EXPECT_THROW(sub1->needs((CLI::App *)nullptr), CLI::OptionNotFound); - EXPECT_THROW(sub1->needs(sub1), CLI::OptionNotFound); + CHECK_THROWS_AS(sub1->needs((CLI::Option *)nullptr), CLI::OptionNotFound); + CHECK_THROWS_AS(sub1->needs((CLI::App *)nullptr), CLI::OptionNotFound); + CHECK_THROWS_AS(sub1->needs(sub1), CLI::OptionNotFound); - EXPECT_TRUE(sub1->remove_needs(opt)); - EXPECT_FALSE(sub1->remove_needs(opt2)); - EXPECT_FALSE(sub1->remove_needs(sub1)); + CHECK(sub1->remove_needs(opt)); + CHECK(!sub1->remove_needs(opt2)); + CHECK(!sub1->remove_needs(sub1)); } -TEST_F(ManySubcommands, SubcommandRequired) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandRequired", "[subcom]") { sub1->required(); args = {"sub1", "sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub1", "sub2", "sub3"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub3", "sub4"}; - EXPECT_THROW(run(), CLI::RequiredError); + CHECK_THROWS_AS(run(), CLI::RequiredError); } -TEST_F(ManySubcommands, SubcommandDisabled) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandDisabled", "[subcom]") { sub3->disabled(); args = {"sub1", "sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub1", "sub2", "sub3"}; app.allow_extras(false); sub2->allow_extras(false); - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"sub3", "sub4"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); sub3->disabled(false); args = {"sub3", "sub4"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); } -TEST_F(ManySubcommands, SubcommandTriggeredOff) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandTriggeredOff", "[subcom]") { app.allow_extras(false); sub1->allow_extras(false); sub2->allow_extras(false); CLI::TriggerOff(sub1, sub2); args = {"sub1", "sub2"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"sub2", "sub1", "sub3"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); CLI::TriggerOff(sub1, {sub3, sub4}); - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"sub1", "sub2", "sub4"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(ManySubcommands, SubcommandTriggeredOn) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandTriggeredOn", "[subcom]") { app.allow_extras(false); sub1->allow_extras(false); sub2->allow_extras(false); CLI::TriggerOn(sub1, sub2); args = {"sub1", "sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub2", "sub1", "sub4"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); CLI::TriggerOn(sub1, {sub3, sub4}); sub2->disabled_by_default(false); sub2->disabled(false); - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"sub3", "sub1", "sub2"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(ManySubcommands, SubcommandSilence) { +TEST_CASE_METHOD(ManySubcommands, "SubcommandSilence", "[subcom]") { sub1->silent(); args = {"sub1", "sub2"}; - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); auto subs = app.get_subcommands(); - EXPECT_EQ(subs.size(), 1U); + CHECK(1U == subs.size()); sub1->silent(false); - EXPECT_FALSE(sub1->get_silent()); + CHECK(!sub1->get_silent()); run(); subs = app.get_subcommands(); - EXPECT_EQ(subs.size(), 2U); + CHECK(2U == subs.size()); } -TEST_F(TApp, UnnamedSub) { +TEST_CASE_METHOD(TApp, "UnnamedSub", "[subcom]") { double val{0.0}; auto sub = app.add_subcommand("", "empty name"); auto opt = sub->add_option("-v,--value", val); args = {"-v", "4.56"}; run(); - EXPECT_EQ(val, 4.56); + CHECK(4.56 == val); // make sure unnamed sub options can be found from the main app auto opt2 = app.get_option("-v"); - EXPECT_EQ(opt, opt2); + CHECK(opt2 == opt); - EXPECT_THROW(app.get_option("--vvvv"), CLI::OptionNotFound); + CHECK_THROWS_AS(app.get_option("--vvvv"), CLI::OptionNotFound); // now test in the constant context const auto &appC = app; auto opt3 = appC.get_option("-v"); - EXPECT_EQ(opt3->get_name(), "--value"); - EXPECT_THROW(appC.get_option("--vvvv"), CLI::OptionNotFound); + CHECK("--value" == opt3->get_name()); + CHECK_THROWS_AS(appC.get_option("--vvvv"), CLI::OptionNotFound); } -TEST_F(TApp, UnnamedSubMix) { +TEST_CASE_METHOD(TApp, "UnnamedSubMix", "[subcom]") { double val{0.0}, val2{0.0}, val3{0.0}; app.add_option("-t", val2); auto sub1 = app.add_subcommand("", "empty name"); @@ -1511,13 +1507,13 @@ TEST_F(TApp, UnnamedSubMix) { args = {"-m", "4.56", "-t", "5.93", "-v", "-3"}; run(); - EXPECT_EQ(val, -3.0); - EXPECT_EQ(val2, 5.93); - EXPECT_EQ(val3, 4.56); - EXPECT_EQ(app.count_all(), 3u); + CHECK(-3.0 == val); + CHECK(5.93 == val2); + CHECK(4.56 == val3); + CHECK(3u == app.count_all()); } -TEST_F(TApp, UnnamedSubMixExtras) { +TEST_CASE_METHOD(TApp, "UnnamedSubMixExtras", "[subcom]") { double val{0.0}, val2{0.0}; app.add_option("-t", val2); auto sub = app.add_subcommand("", "empty name"); @@ -1525,26 +1521,26 @@ TEST_F(TApp, UnnamedSubMixExtras) { args = {"-m", "4.56", "-t", "5.93", "-v", "-3"}; app.allow_extras(); run(); - EXPECT_EQ(val, -3.0); - EXPECT_EQ(val2, 5.93); - EXPECT_EQ(app.remaining_size(), 2u); - EXPECT_EQ(sub->remaining_size(), 0u); + CHECK(-3.0 == val); + CHECK(5.93 == val2); + CHECK(2u == app.remaining_size()); + CHECK(0u == sub->remaining_size()); } -TEST_F(TApp, UnnamedSubNoExtras) { +TEST_CASE_METHOD(TApp, "UnnamedSubNoExtras", "[subcom]") { double val{0.0}, val2{0.0}; app.add_option("-t", val2); auto sub = app.add_subcommand(); sub->add_option("-v,--value", val); args = {"-t", "5.93", "-v", "-3"}; run(); - EXPECT_EQ(val, -3.0); - EXPECT_EQ(val2, 5.93); - EXPECT_EQ(app.remaining_size(), 0u); - EXPECT_EQ(sub->remaining_size(), 0u); + CHECK(-3.0 == val); + CHECK(5.93 == val2); + CHECK(0u == app.remaining_size()); + CHECK(0u == sub->remaining_size()); } -TEST_F(TApp, SubcommandAlias) { +TEST_CASE_METHOD(TApp, "SubcommandAlias", "[subcom]") { double val{0.0}; auto sub = app.add_subcommand("sub1"); sub->alias("sub2"); @@ -1552,27 +1548,27 @@ TEST_F(TApp, SubcommandAlias) { sub->add_option("-v,--value", val); args = {"sub1", "-v", "-3"}; run(); - EXPECT_EQ(val, -3.0); + CHECK(-3.0 == val); args = {"sub2", "--value", "-5"}; run(); - EXPECT_EQ(val, -5.0); + CHECK(-5.0 == val); args = {"sub3", "-v", "7"}; run(); - EXPECT_EQ(val, 7); + CHECK(7 == val); auto &al = sub->get_aliases(); - ASSERT_GE(al.size(), 2U); + REQUIRE(2U <= al.size()); - EXPECT_EQ(al[0], "sub2"); - EXPECT_EQ(al[1], "sub3"); + CHECK("sub2" == al[0]); + CHECK("sub3" == al[1]); sub->clear_aliases(); - EXPECT_TRUE(al.empty()); + CHECK(al.empty()); } -TEST_F(TApp, SubcommandAliasIgnoreCaseUnderscore) { +TEST_CASE_METHOD(TApp, "SubcommandAliasIgnoreCaseUnderscore", "[subcom]") { double val{0.0}; auto sub = app.add_subcommand("sub1"); sub->alias("sub2"); @@ -1581,63 +1577,63 @@ TEST_F(TApp, SubcommandAliasIgnoreCaseUnderscore) { sub->add_option("-v,--value", val); args = {"sub1", "-v", "-3"}; run(); - EXPECT_EQ(val, -3.0); + CHECK(-3.0 == val); args = {"SUB2", "--value", "-5"}; run(); - EXPECT_EQ(val, -5.0); + CHECK(-5.0 == val); args = {"sUb3", "-v", "7"}; run(); - EXPECT_EQ(val, 7); + CHECK(7 == val); sub->ignore_underscore(); args = {"sub_1", "-v", "-3"}; run(); - EXPECT_EQ(val, -3.0); + CHECK(-3.0 == val); args = {"SUB_2", "--value", "-5"}; run(); - EXPECT_EQ(val, -5.0); + CHECK(-5.0 == val); args = {"sUb_3", "-v", "7"}; run(); - EXPECT_EQ(val, 7); + CHECK(7 == val); sub->ignore_case(false); args = {"sub_1", "-v", "-3"}; run(); - EXPECT_EQ(val, -3.0); + CHECK(-3.0 == val); args = {"SUB_2", "--value", "-5"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"sUb_3", "-v", "7"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); } -TEST_F(TApp, OptionGroupAlias) { +TEST_CASE_METHOD(TApp, "OptionGroupAlias", "[subcom]") { double val{0.0}; auto sub = app.add_option_group("sub1"); sub->alias("sub2"); sub->alias("sub3"); sub->add_option("-v,--value", val); args = {"sub1", "-v", "-3"}; - EXPECT_THROW(run(), CLI::ExtrasError); + CHECK_THROWS_AS(run(), CLI::ExtrasError); args = {"sub2", "--value", "-5"}; run(); - EXPECT_EQ(val, -5.0); + CHECK(-5.0 == val); args = {"sub3", "-v", "7"}; run(); - EXPECT_EQ(val, 7); + CHECK(7 == val); args = {"-v", "-3"}; run(); - EXPECT_EQ(val, -3); + CHECK(-3 == val); } -TEST_F(TApp, subcommand_help) { +TEST_CASE_METHOD(TApp, "subcommand_help", "[subcom]") { auto sub1 = app.add_subcommand("help")->silent(); bool flag{false}; app.add_flag("--one", flag, "FLAGGER"); @@ -1650,52 +1646,52 @@ TEST_F(TApp, subcommand_help) { called = true; } auto helpstr = app.help(); - EXPECT_THAT(helpstr, HasSubstr("FLAGGER")); - EXPECT_TRUE(called); + CHECK_THAT(helpstr, Contains("FLAGGER")); + CHECK(called); } -TEST_F(TApp, AliasErrors) { +TEST_CASE_METHOD(TApp, "AliasErrors", "[subcom]") { auto sub1 = app.add_subcommand("sub1"); auto sub2 = app.add_subcommand("sub2"); - EXPECT_THROW(sub2->alias("this is a not a valid alias"), CLI::IncorrectConstruction); - EXPECT_THROW(sub2->alias("-alias"), CLI::IncorrectConstruction); - EXPECT_THROW(sub2->alias("alia$"), CLI::IncorrectConstruction); + CHECK_THROWS_AS(sub2->alias("this is a not a valid alias"), CLI::IncorrectConstruction); + CHECK_THROWS_AS(sub2->alias("-alias"), CLI::IncorrectConstruction); + CHECK_THROWS_AS(sub2->alias("alia$"), CLI::IncorrectConstruction); - EXPECT_THROW(app.add_subcommand("--bad_subcommand_name", "documenting the bad subcommand"), - CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_subcommand("--bad_subcommand_name", "documenting the bad subcommand"), + CLI::IncorrectConstruction); - EXPECT_THROW(app.add_subcommand("documenting a subcommand", "sub3"), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app.add_subcommand("documenting a subcommand", "sub3"), CLI::IncorrectConstruction); // cannot alias to an existing subcommand - EXPECT_THROW(sub2->alias("sub1"), CLI::OptionAlreadyAdded); - EXPECT_THROW(sub1->alias("sub2"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub2->alias("sub1"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub1->alias("sub2"), CLI::OptionAlreadyAdded); // aliasing to an existing name should be allowed - EXPECT_NO_THROW(sub1->alias(sub1->get_name())); + CHECK_NOTHROW(sub1->alias(sub1->get_name())); sub1->alias("les1")->alias("les2")->alias("les_3"); sub2->alias("s2les1")->alias("s2les2")->alias("s2les3"); - EXPECT_THROW(sub2->alias("les2"), CLI::OptionAlreadyAdded); - EXPECT_THROW(sub1->alias("s2les2"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub2->alias("les2"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub1->alias("s2les2"), CLI::OptionAlreadyAdded); - EXPECT_THROW(sub2->name("sub1"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub2->name("sub1"), CLI::OptionAlreadyAdded); sub2->ignore_underscore(); - EXPECT_THROW(sub2->alias("les3"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub2->alias("les3"), CLI::OptionAlreadyAdded); } // test adding a subcommand via the pointer -TEST_F(TApp, ExistingSubcommandMatch) { +TEST_CASE_METHOD(TApp, "ExistingSubcommandMatch", "[subcom]") { auto sshared = std::make_shared<CLI::App>("documenting the subcommand", "sub1"); sshared->alias("sub2")->alias("sub3"); - EXPECT_EQ(sshared->get_name(), "sub1"); + CHECK("sub1" == sshared->get_name()); app.add_subcommand("sub1"); try { app.add_subcommand(sshared); // this should throw the next line should never be reached - EXPECT_FALSE(true); + CHECK(!true); } catch(const CLI::OptionAlreadyAdded &oaa) { - EXPECT_THAT(oaa.what(), HasSubstr("sub1")); + CHECK_THAT(oaa.what(), Contains("sub1")); } sshared->name("osub"); app.add_subcommand("sub2"); @@ -1703,35 +1699,35 @@ TEST_F(TApp, ExistingSubcommandMatch) { try { app.add_subcommand(sshared); // this should throw the next line should never be reached - EXPECT_FALSE(true); + CHECK(!true); } catch(const CLI::OptionAlreadyAdded &oaa) { - EXPECT_THAT(oaa.what(), HasSubstr("sub2")); + CHECK_THAT(oaa.what(), Contains("sub2")); } // now check that disabled subcommands can be added regardless of name sshared->name("sub1"); sshared->disabled(); - EXPECT_NO_THROW(app.add_subcommand(sshared)); + CHECK_NOTHROW(app.add_subcommand(sshared)); } -TEST_F(TApp, AliasErrorsInOptionGroup) { +TEST_CASE_METHOD(TApp, "AliasErrorsInOptionGroup", "[subcom]") { auto sub1 = app.add_subcommand("sub1"); auto g2 = app.add_option_group("g1"); auto sub2 = g2->add_subcommand("sub2"); // cannot alias to an existing subcommand even if it is in an option group - EXPECT_THROW(sub2->alias("sub1"), CLI::OptionAlreadyAdded); - EXPECT_THROW(sub1->alias("sub2"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub2->alias("sub1"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub1->alias("sub2"), CLI::OptionAlreadyAdded); sub1->alias("les1")->alias("les2")->alias("les3"); sub2->alias("s2les1")->alias("s2les2")->alias("s2les3"); - EXPECT_THROW(sub2->alias("les2"), CLI::OptionAlreadyAdded); - EXPECT_THROW(sub1->alias("s2les2"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub2->alias("les2"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub1->alias("s2les2"), CLI::OptionAlreadyAdded); - EXPECT_THROW(sub2->name("sub1"), CLI::OptionAlreadyAdded); + CHECK_THROWS_AS(sub2->name("sub1"), CLI::OptionAlreadyAdded); } -TEST(SharedSubTests, SharedSubcommand) { +TEST_CASE("SharedSubTests: SharedSubcommand", "[subcom]") { double val{0.0}, val2{0.0}, val3{0.0}, val4{0.0}; CLI::App app1{"test program1"}; @@ -1745,7 +1741,7 @@ TEST(SharedSubTests, SharedSubcommand) { auto subown = app1.get_subcommand_ptr(sub); // add the extracted subcommand to a different app app2.add_subcommand(std::move(subown)); - EXPECT_THROW(app2.add_subcommand(CLI::App_p{}), CLI::IncorrectConstruction); + CHECK_THROWS_AS(app2.add_subcommand(CLI::App_p{}), CLI::IncorrectConstruction); input_t args1 = {"-m", "4.56", "-t", "5.93", "-v", "-3"}; input_t args2 = {"-m", "4.56", "-g", "8.235"}; std::reverse(std::begin(args1), std::end(args1)); @@ -1755,13 +1751,13 @@ TEST(SharedSubTests, SharedSubcommand) { app2.parse(args2); - EXPECT_EQ(val, -3.0); - EXPECT_EQ(val2, 5.93); - EXPECT_EQ(val3, 4.56); - EXPECT_EQ(val4, 8.235); + CHECK(-3.0 == val); + CHECK(5.93 == val2); + CHECK(4.56 == val3); + CHECK(8.235 == val4); } -TEST(SharedSubTests, SharedSubIndependent) { +TEST_CASE("SharedSubTests: SharedSubIndependent", "[subcom]") { double val{0.0}, val2{0.0}, val4{0.0}; CLI::App_p app1 = std::make_shared<CLI::App>("test program1"); app1->allow_extras(); @@ -1784,12 +1780,12 @@ TEST(SharedSubTests, SharedSubIndependent) { // parse with the extracted subcommand subown->parse(args2); - EXPECT_EQ(val, -3.0); - EXPECT_EQ(val2, 5.93); - EXPECT_EQ(val4, 8.235); + CHECK(-3.0 == val); + CHECK(5.93 == val2); + CHECK(8.235 == val4); } -TEST(SharedSubTests, SharedSubIndependentReuse) { +TEST_CASE("SharedSubTests: SharedSubIndependentReuse", "[subcom]") { double val{0.0}, val2{0.0}, val4{0.0}; CLI::App_p app1 = std::make_shared<CLI::App>("test program1"); app1->allow_extras(); @@ -1809,60 +1805,60 @@ TEST(SharedSubTests, SharedSubIndependentReuse) { // parse with the extracted subcommand subown->parse("program1 -m 4.56 -g 8.235", true); - EXPECT_EQ(val, -3.0); - EXPECT_EQ(val2, 5.93); - EXPECT_EQ(val4, 8.235); + CHECK(-3.0 == val); + CHECK(5.93 == val2); + CHECK(8.235 == val4); val = 0.0; val2 = 0.0; - EXPECT_EQ(subown->get_name(), "program1"); + CHECK("program1" == subown->get_name()); // this tests the name reset in subcommand since it was automatic app1->parse(args2); - EXPECT_EQ(val, -3.0); - EXPECT_EQ(val2, 5.93); + CHECK(-3.0 == val); + CHECK(5.93 == val2); } -TEST_F(ManySubcommands, getSubtests) { +TEST_CASE_METHOD(ManySubcommands, "getSubtests", "[subcom]") { CLI::App_p sub2p = app.get_subcommand_ptr(sub2); - EXPECT_EQ(sub2p.get(), sub2); - EXPECT_THROW(app.get_subcommand_ptr(nullptr), CLI::OptionNotFound); - EXPECT_THROW(app.get_subcommand(nullptr), CLI::OptionNotFound); + CHECK(sub2 == sub2p.get()); + CHECK_THROWS_AS(app.get_subcommand_ptr(nullptr), CLI::OptionNotFound); + CHECK_THROWS_AS(app.get_subcommand(nullptr), CLI::OptionNotFound); CLI::App_p sub3p = app.get_subcommand_ptr(2); - EXPECT_EQ(sub3p.get(), sub3); + CHECK(sub3 == sub3p.get()); } -TEST_F(ManySubcommands, defaultDisabledSubcommand) { +TEST_CASE_METHOD(ManySubcommands, "defaultDisabledSubcommand", "[subcom]") { sub1->fallthrough(); sub2->disabled_by_default(); run(); auto rem = app.remaining(); - EXPECT_EQ(rem.size(), 1u); - EXPECT_EQ(rem[0], "sub2"); - EXPECT_TRUE(sub2->get_disabled_by_default()); + CHECK(1u == rem.size()); + CHECK("sub2" == rem[0]); + CHECK(sub2->get_disabled_by_default()); sub2->disabled(false); - EXPECT_FALSE(sub2->get_disabled()); + CHECK(!sub2->get_disabled()); run(); // this should disable it again even though it was disabled rem = app.remaining(); - EXPECT_EQ(rem.size(), 1u); - EXPECT_EQ(rem[0], "sub2"); - EXPECT_TRUE(sub2->get_disabled_by_default()); - EXPECT_TRUE(sub2->get_disabled()); + CHECK(1u == rem.size()); + CHECK("sub2" == rem[0]); + CHECK(sub2->get_disabled_by_default()); + CHECK(sub2->get_disabled()); } -TEST_F(ManySubcommands, defaultEnabledSubcommand) { +TEST_CASE_METHOD(ManySubcommands, "defaultEnabledSubcommand", "[subcom]") { sub2->enabled_by_default(); run(); auto rem = app.remaining(); - EXPECT_EQ(rem.size(), 0u); - EXPECT_TRUE(sub2->get_enabled_by_default()); + CHECK(0u == rem.size()); + CHECK(sub2->get_enabled_by_default()); sub2->disabled(); - EXPECT_TRUE(sub2->get_disabled()); + CHECK(sub2->get_disabled()); run(); // this should disable it again even though it was disabled rem = app.remaining(); - EXPECT_EQ(rem.size(), 0u); - EXPECT_TRUE(sub2->get_enabled_by_default()); - EXPECT_FALSE(sub2->get_disabled()); + CHECK(0u == rem.size()); + CHECK(sub2->get_enabled_by_default()); + CHECK(!sub2->get_disabled()); } diff --git a/packages/CLI11/tests/TimerTest.cpp b/packages/CLI11/tests/TimerTest.cpp index 51d8bb09b50c79b79ff62a37224ef79253c4dbed..8c88f478ace08a3e192c0439717ec44ad32b9ab9 100644 --- a/packages/CLI11/tests/TimerTest.cpp +++ b/packages/CLI11/tests/TimerTest.cpp @@ -5,66 +5,66 @@ // SPDX-License-Identifier: BSD-3-Clause #include "CLI/Timer.hpp" -#include "gmock/gmock.h" -#include "gtest/gtest.h" + +#include "catch.hpp" #include <chrono> #include <sstream> #include <string> #include <thread> -using ::testing::HasSubstr; +using Catch::Matchers::Contains; -TEST(Timer, MSTimes) { +TEST_CASE("Timer: MSTimes", "[timer]") { CLI::Timer timer{"My Timer"}; std::this_thread::sleep_for(std::chrono::milliseconds(123)); std::string output = timer.to_string(); std::string new_output = (timer / 1000000).to_string(); - EXPECT_THAT(output, HasSubstr("My Timer")); - EXPECT_THAT(output, HasSubstr(" ms")); - EXPECT_THAT(new_output, HasSubstr(" ns")); + CHECK_THAT(output, Contains("My Timer")); + CHECK_THAT(output, Contains(" ms")); + CHECK_THAT(new_output, Contains(" ns")); } /* Takes too long -TEST(Timer, STimes) { +TEST_CASE("Timer: STimes", "[timer]") { CLI::Timer timer; std::this_thread::sleep_for(std::chrono::seconds(1)); std::string output = timer.to_string(); - EXPECT_THAT(output, HasSubstr(" s")); + CHECK_THAT (output, Contains(" s")); } */ // Fails on Windows -// TEST(Timer, UStimes) { +// TEST_CASE("Timer: UStimes", "[timer]") { // CLI::Timer timer; // std::this_thread::sleep_for(std::chrono::microseconds(2)); // std::string output = timer.to_string(); -// EXPECT_THAT(output, HasSubstr(" ms")); +// CHECK_THAT (output, Contains(" ms")); //} -TEST(Timer, BigTimer) { +TEST_CASE("Timer: BigTimer", "[timer]") { CLI::Timer timer{"My Timer", CLI::Timer::Big}; std::string output = timer.to_string(); - EXPECT_THAT(output, HasSubstr("Time =")); - EXPECT_THAT(output, HasSubstr("-----------")); + CHECK_THAT(output, Contains("Time =")); + CHECK_THAT(output, Contains("-----------")); } -TEST(Timer, AutoTimer) { +TEST_CASE("Timer: AutoTimer", "[timer]") { CLI::AutoTimer timer; std::string output = timer.to_string(); - EXPECT_THAT(output, HasSubstr("Timer")); + CHECK_THAT(output, Contains("Timer")); } -TEST(Timer, PrintTimer) { +TEST_CASE("Timer: PrintTimer", "[timer]") { std::stringstream out; CLI::AutoTimer timer; out << timer; std::string output = out.str(); - EXPECT_THAT(output, HasSubstr("Timer")); + CHECK_THAT(output, Contains("Timer")); } -TEST(Timer, TimeItTimer) { +TEST_CASE("Timer: TimeItTimer", "[timer]") { CLI::Timer timer; std::string output = timer.time_it([]() { std::this_thread::sleep_for(std::chrono::milliseconds(10)); }, .1); std::cout << output << std::endl; - EXPECT_THAT(output, HasSubstr("ms")); + CHECK_THAT(output, Contains("ms")); } diff --git a/packages/CLI11/tests/TransformTest.cpp b/packages/CLI11/tests/TransformTest.cpp index 53df504acadf14e91cfb7b58bde88917593664b9..84ac544a4d003778cbcd73797034eb70c5612d98 100644 --- a/packages/CLI11/tests/TransformTest.cpp +++ b/packages/CLI11/tests/TransformTest.cpp @@ -20,98 +20,98 @@ #endif #endif -TEST_F(TApp, SimpleTransform) { +TEST_CASE_METHOD(TApp, "SimpleTransform", "[transform]") { int value{0}; auto opt = app.add_option("-s", value)->transform(CLI::Transformer({{"one", std::string("1")}})); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } -TEST_F(TApp, SimpleTransformInitList) { +TEST_CASE_METHOD(TApp, "SimpleTransformInitList", "[transform]") { int value{0}; auto opt = app.add_option("-s", value)->transform(CLI::Transformer({{"one", "1"}})); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } -TEST_F(TApp, SimpleNumericalTransform) { +TEST_CASE_METHOD(TApp, "SimpleNumericalTransform", "[transform]") { int value{0}; auto opt = app.add_option("-s", value)->transform(CLI::Transformer(CLI::TransformPairs<int>{{"one", 1}})); args = {"-s", "one"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } -TEST_F(TApp, EnumTransform) { - enum class test : std::int16_t { val1 = 3, val2 = 4, val3 = 17 }; - test value{test::val2}; +TEST_CASE_METHOD(TApp, "EnumTransform", "[transform]") { + enum class test_cli : std::int16_t { val1 = 3, val2 = 4, val3 = 17 }; + test_cli value{test_cli::val2}; auto opt = app.add_option("-s", value) - ->transform(CLI::Transformer( - CLI::TransformPairs<test>{{"val1", test::val1}, {"val2", test::val2}, {"val3", test::val3}})); + ->transform(CLI::Transformer(CLI::TransformPairs<test_cli>{ + {"val1", test_cli::val1}, {"val2", test_cli::val2}, {"val3", test_cli::val3}})); args = {"-s", "val1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, test::val1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(test_cli::val1 == value); args = {"-s", "val2"}; run(); - EXPECT_EQ(value, test::val2); + CHECK(test_cli::val2 == value); args = {"-s", "val3"}; run(); - EXPECT_EQ(value, test::val3); + CHECK(test_cli::val3 == value); args = {"-s", "val4"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); // transformer doesn't do any checking so this still works args = {"-s", "5"}; run(); - EXPECT_EQ(static_cast<std::int16_t>(value), std::int16_t(5)); + CHECK(std::int16_t(5) == static_cast<std::int16_t>(value)); } -TEST_F(TApp, EnumCheckedTransform) { - enum class test : std::int16_t { val1 = 3, val2 = 4, val3 = 17 }; - test value{test::val1}; +TEST_CASE_METHOD(TApp, "EnumCheckedTransform", "[transform]") { + enum class test_cli : std::int16_t { val1 = 3, val2 = 4, val3 = 17 }; + test_cli value{test_cli::val1}; auto opt = app.add_option("-s", value) - ->transform(CLI::CheckedTransformer( - CLI::TransformPairs<test>{{"val1", test::val1}, {"val2", test::val2}, {"val3", test::val3}})); + ->transform(CLI::CheckedTransformer(CLI::TransformPairs<test_cli>{ + {"val1", test_cli::val1}, {"val2", test_cli::val2}, {"val3", test_cli::val3}})); args = {"-s", "val1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, test::val1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(test_cli::val1 == value); args = {"-s", "val2"}; run(); - EXPECT_EQ(value, test::val2); + CHECK(test_cli::val2 == value); args = {"-s", "val3"}; run(); - EXPECT_EQ(value, test::val3); + CHECK(test_cli::val3 == value); args = {"-s", "17"}; run(); - EXPECT_EQ(value, test::val3); + CHECK(test_cli::val3 == value); args = {"-s", "val4"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-s", "5"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } // from jzakrzewski Issue #330 -TEST_F(TApp, EnumCheckedDefaultTransform) { +TEST_CASE_METHOD(TApp, "EnumCheckedDefaultTransform", "[transform]") { enum class existing : std::int16_t { abort, overwrite, remove }; app.add_option("--existing", "What to do if file already exists in the destination") ->transform( @@ -122,14 +122,14 @@ TEST_F(TApp, EnumCheckedDefaultTransform) { ->default_val("abort"); args = {"--existing", "overwrite"}; run(); - EXPECT_EQ(app.get_option("--existing")->as<existing>(), existing::overwrite); + CHECK(existing::overwrite == app.get_option("--existing")->as<existing>()); args.clear(); run(); - EXPECT_EQ(app.get_option("--existing")->as<existing>(), existing::abort); + CHECK(existing::abort == app.get_option("--existing")->as<existing>()); } // test from https://github.com/CLIUtils/CLI11/issues/369 [Jakub Zakrzewski](https://github.com/jzakrzewski) -TEST_F(TApp, EnumCheckedDefaultTransformCallback) { +TEST_CASE_METHOD(TApp, "EnumCheckedDefaultTransformCallback", "[transform]") { enum class existing : std::int16_t { abort, overwrite, remove }; auto cmd = std::make_shared<CLI::App>("deploys the repository somewhere", "deploy"); cmd->add_option("--existing", "What to do if file already exists in the destination") @@ -140,60 +140,60 @@ TEST_F(TApp, EnumCheckedDefaultTransformCallback) { {"remove", existing::remove}})) ->default_val("abort"); - cmd->callback([cmd]() { EXPECT_EQ(cmd->get_option("--existing")->as<existing>(), existing::abort); }); + cmd->callback([cmd]() { CHECK(cmd->get_option("--existing")->as<existing>() == existing::abort); }); app.add_subcommand(cmd); args = {"deploy"}; run(); } -TEST_F(TApp, SimpleTransformFn) { +TEST_CASE_METHOD(TApp, "SimpleTransformFn", "[transform]") { int value{0}; auto opt = app.add_option("-s", value)->transform(CLI::Transformer({{"one", "1"}}, CLI::ignore_case)); args = {"-s", "ONE"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } #if defined(CLI11_HAS_STRING_VIEW) -TEST_F(TApp, StringViewTransformFn) { +TEST_CASE_METHOD(TApp, "StringViewTransformFn", "[transform]") { std::string value; std::map<std::string_view, std::string_view> map = {// key length > std::string().capacity() [SSO length] {"a-rather-long-argument", "mapped"}}; app.add_option("-s", value)->transform(CLI::CheckedTransformer(map)); args = {"-s", "a-rather-long-argument"}; run(); - EXPECT_EQ(value, "mapped"); + CHECK("mapped" == value); } #endif -TEST_F(TApp, SimpleNumericalTransformFn) { +TEST_CASE_METHOD(TApp, "SimpleNumericalTransformFn", "[transform]") { int value{0}; auto opt = app.add_option("-s", value) ->transform(CLI::Transformer(std::vector<std::pair<std::string, int>>{{"one", 1}}, CLI::ignore_case)); args = {"-s", "ONe"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } -TEST_F(TApp, SimpleNumericalTransformFnVector) { +TEST_CASE_METHOD(TApp, "SimpleNumericalTransformFnVector", "[transform]") { std::vector<std::pair<std::string, int>> conversions{{"one", 1}, {"two", 2}}; int value{0}; auto opt = app.add_option("-s", value)->transform(CLI::Transformer(conversions, CLI::ignore_case)); args = {"-s", "ONe"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } -TEST_F(TApp, SimpleNumericalTransformFnArray) { +TEST_CASE_METHOD(TApp, "SimpleNumericalTransformFnArray", "[transform]") { std::array<std::pair<std::string, int>, 2> conversions; conversions[0] = std::make_pair(std::string("one"), 1); conversions[1] = std::make_pair(std::string("two"), 2); @@ -202,14 +202,14 @@ TEST_F(TApp, SimpleNumericalTransformFnArray) { auto opt = app.add_option("-s", value)->transform(CLI::Transformer(conversions, CLI::ignore_case)); args = {"-s", "ONe"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); } #ifdef CLI11_CPP14 // zero copy constexpr array operation with transformer example and test -TEST_F(TApp, SimpleNumericalTransformFnconstexprArray) { +TEST_CASE_METHOD(TApp, "SimpleNumericalTransformFnconstexprArray", "[transform]") { constexpr std::pair<const char *, int> p1{"one", 1}; constexpr std::pair<const char *, int> p2{"two", 2}; constexpr std::array<std::pair<const char *, int>, 2> conversions_c{{p1, p2}}; @@ -218,128 +218,129 @@ TEST_F(TApp, SimpleNumericalTransformFnconstexprArray) { auto opt = app.add_option("-s", value)->transform(CLI::Transformer(&conversions_c, CLI::ignore_case)); args = {"-s", "ONe"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(1 == value); args = {"-s", "twO"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, 2); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(2 == value); } #endif -TEST_F(TApp, EnumTransformFn) { - enum class test : std::int16_t { val1 = 3, val2 = 4, val3 = 17 }; - test value{test::val2}; +TEST_CASE_METHOD(TApp, "EnumTransformFn", "[transform]") { + enum class test_cli : std::int16_t { val1 = 3, val2 = 4, val3 = 17 }; + test_cli value{test_cli::val2}; auto opt = app.add_option("-s", value) - ->transform(CLI::Transformer( - CLI::TransformPairs<test>{{"val1", test::val1}, {"val2", test::val2}, {"val3", test::val3}}, - CLI::ignore_case, - CLI::ignore_underscore)); + ->transform(CLI::Transformer(CLI::TransformPairs<test_cli>{{"val1", test_cli::val1}, + {"val2", test_cli::val2}, + {"val3", test_cli::val3}}, + CLI::ignore_case, + CLI::ignore_underscore)); args = {"-s", "val_1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, test::val1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(test_cli::val1 == value); args = {"-s", "VAL_2"}; run(); - EXPECT_EQ(value, test::val2); + CHECK(test_cli::val2 == value); args = {"-s", "VAL3"}; run(); - EXPECT_EQ(value, test::val3); + CHECK(test_cli::val3 == value); args = {"-s", "val_4"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, EnumTransformFnMap) { - enum class test : std::int16_t { val1 = 3, val2 = 4, val3 = 17 }; - std::map<std::string, test> map{{"val1", test::val1}, {"val2", test::val2}, {"val3", test::val3}}; - test value{test::val3}; +TEST_CASE_METHOD(TApp, "EnumTransformFnMap", "[transform]") { + enum class test_cli : std::int16_t { val1 = 3, val2 = 4, val3 = 17 }; + std::map<std::string, test_cli> map{{"val1", test_cli::val1}, {"val2", test_cli::val2}, {"val3", test_cli::val3}}; + test_cli value{test_cli::val3}; auto opt = app.add_option("-s", value)->transform(CLI::Transformer(map, CLI::ignore_case, CLI::ignore_underscore)); args = {"-s", "val_1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, test::val1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(test_cli::val1 == value); args = {"-s", "VAL_2"}; run(); - EXPECT_EQ(value, test::val2); + CHECK(test_cli::val2 == value); args = {"-s", "VAL3"}; run(); - EXPECT_EQ(value, test::val3); + CHECK(test_cli::val3 == value); args = {"-s", "val_4"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); } -TEST_F(TApp, EnumTransformFnPtrMap) { - enum class test : std::int16_t { val1 = 3, val2 = 4, val3 = 17, val4 = 37 }; - std::map<std::string, test> map{{"val1", test::val1}, {"val2", test::val2}, {"val3", test::val3}}; - test value{test::val2}; +TEST_CASE_METHOD(TApp, "EnumTransformFnPtrMap", "[transform]") { + enum class test_cli : std::int16_t { val1 = 3, val2 = 4, val3 = 17, val4 = 37 }; + std::map<std::string, test_cli> map{{"val1", test_cli::val1}, {"val2", test_cli::val2}, {"val3", test_cli::val3}}; + test_cli value{test_cli::val2}; auto opt = app.add_option("-s", value)->transform(CLI::Transformer(&map, CLI::ignore_case, CLI::ignore_underscore)); args = {"-s", "val_1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, test::val1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(test_cli::val1 == value); args = {"-s", "VAL_2"}; run(); - EXPECT_EQ(value, test::val2); + CHECK(test_cli::val2 == value); args = {"-s", "VAL3"}; run(); - EXPECT_EQ(value, test::val3); + CHECK(test_cli::val3 == value); args = {"-s", "val_4"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); - map["val4"] = test::val4; + map["val4"] = test_cli::val4; run(); - EXPECT_EQ(value, test::val4); + CHECK(test_cli::val4 == value); } -TEST_F(TApp, EnumTransformFnSharedPtrMap) { - enum class test : std::int16_t { val1 = 3, val2 = 4, val3 = 17, val4 = 37 }; - auto map = std::make_shared<std::unordered_map<std::string, test>>(); +TEST_CASE_METHOD(TApp, "EnumTransformFnSharedPtrMap", "[transform]") { + enum class test_cli : std::int16_t { val1 = 3, val2 = 4, val3 = 17, val4 = 37 }; + auto map = std::make_shared<std::unordered_map<std::string, test_cli>>(); auto &mp = *map; - mp["val1"] = test::val1; - mp["val2"] = test::val2; - mp["val3"] = test::val3; + mp["val1"] = test_cli::val1; + mp["val2"] = test_cli::val2; + mp["val3"] = test_cli::val3; - test value{test::val2}; + test_cli value{test_cli::val2}; auto opt = app.add_option("-s", value)->transform(CLI::Transformer(map, CLI::ignore_case, CLI::ignore_underscore)); args = {"-s", "val_1"}; run(); - EXPECT_EQ(1u, app.count("-s")); - EXPECT_EQ(1u, opt->count()); - EXPECT_EQ(value, test::val1); + CHECK(app.count("-s") == 1u); + CHECK(opt->count() == 1u); + CHECK(test_cli::val1 == value); args = {"-s", "VAL_2"}; run(); - EXPECT_EQ(value, test::val2); + CHECK(test_cli::val2 == value); args = {"-s", "VAL3"}; run(); - EXPECT_EQ(value, test::val3); + CHECK(test_cli::val3 == value); args = {"-s", "val_4"}; - EXPECT_THROW(run(), CLI::ConversionError); + CHECK_THROWS_AS(run(), CLI::ConversionError); - mp["val4"] = test::val4; + mp["val4"] = test_cli::val4; run(); - EXPECT_EQ(value, test::val4); + CHECK(test_cli::val4 == value); } // Test a cascade of transform functions -TEST_F(TApp, TransformCascade) { +TEST_CASE_METHOD(TApp, "TransformCascade", "[transform]") { std::string output; auto opt = app.add_option("-s", output); @@ -350,23 +351,23 @@ TEST_F(TApp, TransformCascade) { opt->check(CLI::IsMember({"abcd", "bbcd", "cbcd"})); args = {"-s", "abcd"}; run(); - EXPECT_EQ(output, "abcd"); + CHECK("abcd" == output); args = {"-s", "Bbc"}; run(); - EXPECT_EQ(output, "bbcd"); + CHECK("bbcd" == output); args = {"-s", "C_B"}; run(); - EXPECT_EQ(output, "cbcd"); + CHECK("cbcd" == output); args = {"-s", "A"}; run(); - EXPECT_EQ(output, "abcd"); + CHECK("abcd" == output); } // Test a cascade of transform functions -TEST_F(TApp, TransformCascadeDeactivate) { +TEST_CASE_METHOD(TApp, "TransformCascadeDeactivate", "[transform]") { std::string output; auto opt = app.add_option("-s", output); @@ -380,70 +381,70 @@ TEST_F(TApp, TransformCascadeDeactivate) { opt->check(CLI::IsMember({"abcd", "bbcd", "cbcd"}).name("check")); args = {"-s", "abcd"}; run(); - EXPECT_EQ(output, "abcd"); + CHECK("abcd" == output); args = {"-s", "Bbc"}; run(); - EXPECT_EQ(output, "bbcd"); + CHECK("bbcd" == output); args = {"-s", "C_B"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); auto validator = opt->get_validator("tform2"); - EXPECT_FALSE(validator->get_active()); - EXPECT_EQ(validator->get_name(), "tform2"); + CHECK(!validator->get_active()); + CHECK("tform2" == validator->get_name()); validator->active(); - EXPECT_TRUE(validator->get_active()); + CHECK(validator->get_active()); args = {"-s", "C_B"}; run(); - EXPECT_EQ(output, "cbcd"); + CHECK("cbcd" == output); opt->get_validator("check")->active(false); args = {"-s", "gsdgsgs"}; run(); - EXPECT_EQ(output, "gsdgsgs"); + CHECK("gsdgsgs" == output); - EXPECT_THROW(opt->get_validator("sdfsdf"), CLI::OptionNotFound); + CHECK_THROWS_AS(opt->get_validator("sdfsdf"), CLI::OptionNotFound); } -TEST_F(TApp, IntTransformFn) { +TEST_CASE_METHOD(TApp, "IntTransformFn", "[transform]") { std::string value; app.add_option("-s", value) ->transform( CLI::CheckedTransformer(std::map<int, int>{{15, 5}, {18, 6}, {21, 7}}, [](int in) { return in - 10; })); args = {"-s", "25"}; run(); - EXPECT_EQ(value, "5"); + CHECK("5" == value); args = {"-s", "6"}; run(); - EXPECT_EQ(value, "6"); + CHECK("6" == value); args = {"-s", "45"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-s", "val_4"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, IntTransformNonConvertible) { +TEST_CASE_METHOD(TApp, "IntTransformNonConvertible", "[transform]") { std::string value; app.add_option("-s", value)->transform(CLI::Transformer(std::map<int, int>{{15, 5}, {18, 6}, {21, 7}})); args = {"-s", "15"}; run(); - EXPECT_EQ(value, "5"); + CHECK("5" == value); args = {"-s", "18"}; run(); - EXPECT_EQ(value, "6"); + CHECK("6" == value); // value can't be converted to int so it is just ignored args = {"-s", "abcd"}; run(); - EXPECT_EQ(value, "abcd"); + CHECK("abcd" == value); } -TEST_F(TApp, IntTransformNonMerge) { +TEST_CASE_METHOD(TApp, "IntTransformNonMerge", "[transform]") { std::string value; app.add_option("-s", value) ->transform(CLI::Transformer(std::map<int, int>{{15, 5}, {18, 6}, {21, 7}}) & @@ -451,39 +452,39 @@ TEST_F(TApp, IntTransformNonMerge) { "merge"); args = {"-s", "15"}; run(); - EXPECT_EQ(value, "5"); + CHECK("5" == value); args = {"-s", "18"}; run(); - EXPECT_EQ(value, "6"); + CHECK("6" == value); // value can't be converted to int so it is just ignored args = {"-s", "abcd"}; run(); - EXPECT_EQ(value, "abcd"); + CHECK("abcd" == value); args = {"-s", "25"}; run(); - EXPECT_EQ(value, "5"); + CHECK("5" == value); args = {"-s", "31"}; run(); - EXPECT_EQ(value, "7"); + CHECK("7" == value); auto help = app.help(); - EXPECT_TRUE(help.find("15->5") != std::string::npos); - EXPECT_TRUE(help.find("25->5") != std::string::npos); + CHECK(help.find("15->5") != std::string::npos); + CHECK(help.find("25->5") != std::string::npos); auto validator = app.get_option("-s")->get_validator(); help = validator->get_description(); - EXPECT_TRUE(help.find("15->5") != std::string::npos); - EXPECT_TRUE(help.find("25->5") != std::string::npos); + CHECK(help.find("15->5") != std::string::npos); + CHECK(help.find("25->5") != std::string::npos); auto validator2 = app.get_option("-s")->get_validator("merge"); - EXPECT_EQ(validator2, validator); + CHECK(validator == validator2); } -TEST_F(TApp, IntTransformMergeWithCustomValidator) { +TEST_CASE_METHOD(TApp, "IntTransformMergeWithCustomValidator", "[transform]") { std::string value; auto opt = app.add_option("-s", value) ->transform(CLI::Transformer(std::map<int, int>{{15, 5}, {18, 6}, {21, 7}}) | @@ -498,57 +499,57 @@ TEST_F(TApp, IntTransformMergeWithCustomValidator) { "check"); args = {"-s", "15"}; run(); - EXPECT_EQ(value, "5"); + CHECK("5" == value); args = {"-s", "18"}; run(); - EXPECT_EQ(value, "6"); + CHECK("6" == value); // value can't be converted to int so it is just ignored args = {"-s", "frog"}; run(); - EXPECT_EQ(value, "hops"); + CHECK("hops" == value); args = {"-s", "25"}; run(); - EXPECT_EQ(value, "25"); + CHECK("25" == value); auto help = app.help(); - EXPECT_TRUE(help.find("15->5") != std::string::npos); - EXPECT_TRUE(help.find("OR") == std::string::npos); + CHECK(help.find("15->5") != std::string::npos); + CHECK(help.find("OR") == std::string::npos); auto validator = opt->get_validator("check"); - EXPECT_EQ(validator->get_name(), "check"); + CHECK("check" == validator->get_name()); validator->active(false); help = app.help(); - EXPECT_TRUE(help.find("15->5") == std::string::npos); + CHECK(help.find("15->5") == std::string::npos); } -TEST_F(TApp, BoundTests) { +TEST_CASE_METHOD(TApp, "BoundTests", "[transform]") { double value; app.add_option("-s", value)->transform(CLI::Bound(3.4, 5.9)); args = {"-s", "15"}; run(); - EXPECT_EQ(value, 5.9); + CHECK(5.9 == value); args = {"-s", "3.689"}; run(); - EXPECT_EQ(value, std::stod("3.689")); + CHECK(std::stod("3.689") == value); // value can't be converted to int so it is just ignored args = {"-s", "abcd"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-s", "2.5"}; run(); - EXPECT_EQ(value, 3.4); + CHECK(3.4 == value); auto help = app.help(); - EXPECT_TRUE(help.find("bounded to") != std::string::npos); - EXPECT_TRUE(help.find("[3.4 - 5.9]") != std::string::npos); + CHECK(help.find("bounded to") != std::string::npos); + CHECK(help.find("[3.4 - 5.9]") != std::string::npos); } -TEST_F(TApp, NumberWithUnitCorrectlySplitNumber) { +TEST_CASE_METHOD(TApp, "NumberWithUnitCorrectlySplitNumber", "[transform]") { std::map<std::string, int> mapping{{"a", 10}, {"b", 100}, {"cc", 1000}}; int value = 0; @@ -556,43 +557,43 @@ TEST_F(TApp, NumberWithUnitCorrectlySplitNumber) { args = {"-n", "42"}; run(); - EXPECT_EQ(value, 42); + CHECK(42 == value); args = {"-n", "42a"}; run(); - EXPECT_EQ(value, 420); + CHECK(420 == value); args = {"-n", " 42 cc "}; run(); - EXPECT_EQ(value, 42000); + CHECK(42000 == value); args = {"-n", " -42 cc "}; run(); - EXPECT_EQ(value, -42000); + CHECK(-42000 == value); } -TEST_F(TApp, NumberWithUnitFloatTest) { +TEST_CASE_METHOD(TApp, "NumberWithUnitFloatTest", "[transform]") { std::map<std::string, double> mapping{{"a", 10}, {"b", 100}, {"cc", 1000}}; double value{0.0}; app.add_option("-n", value)->transform(CLI::AsNumberWithUnit(mapping)); args = {"-n", "42"}; run(); - EXPECT_DOUBLE_EQ(value, 42); + CHECK(42 == Approx(value)); args = {"-n", ".5"}; run(); - EXPECT_DOUBLE_EQ(value, .5); + CHECK(.5 == Approx(value)); args = {"-n", "42.5 a"}; run(); - EXPECT_DOUBLE_EQ(value, 425); + CHECK(425 == Approx(value)); args = {"-n", "42.cc"}; run(); - EXPECT_DOUBLE_EQ(value, 42000); + CHECK(42000 == Approx(value)); } -TEST_F(TApp, NumberWithUnitCaseSensitive) { +TEST_CASE_METHOD(TApp, "NumberWithUnitCaseSensitive", "[transform]") { std::map<std::string, int> mapping{{"a", 10}, {"A", 100}}; int value{0}; @@ -600,14 +601,14 @@ TEST_F(TApp, NumberWithUnitCaseSensitive) { args = {"-n", "42a"}; run(); - EXPECT_EQ(value, 420); + CHECK(420 == value); args = {"-n", "42A"}; run(); - EXPECT_EQ(value, 4200); + CHECK(4200 == value); } -TEST_F(TApp, NumberWithUnitCaseInsensitive) { +TEST_CASE_METHOD(TApp, "NumberWithUnitCaseInsensitive", "[transform]") { std::map<std::string, int> mapping{{"a", 10}, {"B", 100}}; int value{0}; @@ -615,22 +616,22 @@ TEST_F(TApp, NumberWithUnitCaseInsensitive) { args = {"-n", "42a"}; run(); - EXPECT_EQ(value, 420); + CHECK(420 == value); args = {"-n", "42A"}; run(); - EXPECT_EQ(value, 420); + CHECK(420 == value); args = {"-n", "42b"}; run(); - EXPECT_EQ(value, 4200); + CHECK(4200 == value); args = {"-n", "42B"}; run(); - EXPECT_EQ(value, 4200); + CHECK(4200 == value); } -TEST_F(TApp, NumberWithUnitMandatoryUnit) { +TEST_CASE_METHOD(TApp, "NumberWithUnitMandatoryUnit", "[transform]") { std::map<std::string, int> mapping{{"a", 10}, {"A", 100}}; int value{0}; @@ -641,17 +642,17 @@ TEST_F(TApp, NumberWithUnitMandatoryUnit) { args = {"-n", "42a"}; run(); - EXPECT_EQ(value, 420); + CHECK(420 == value); args = {"-n", "42A"}; run(); - EXPECT_EQ(value, 4200); + CHECK(4200 == value); args = {"-n", "42"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, NumberWithUnitMandatoryUnit2) { +TEST_CASE_METHOD(TApp, "NumberWithUnitMandatoryUnit2", "[transform]") { std::map<std::string, int> mapping{{"a", 10}, {"B", 100}}; int value{0}; @@ -662,49 +663,49 @@ TEST_F(TApp, NumberWithUnitMandatoryUnit2) { args = {"-n", "42A"}; run(); - EXPECT_EQ(value, 420); + CHECK(420 == value); args = {"-n", "42b"}; run(); - EXPECT_EQ(value, 4200); + CHECK(4200 == value); args = {"-n", "42"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, NumberWithUnitBadMapping) { - EXPECT_THROW(CLI::AsNumberWithUnit(std::map<std::string, int>{{"a", 10}, {"A", 100}}, - CLI::AsNumberWithUnit::CASE_INSENSITIVE), - CLI::ValidationError); - EXPECT_THROW(CLI::AsNumberWithUnit(std::map<std::string, int>{{"a", 10}, {"9", 100}}), CLI::ValidationError); - EXPECT_THROW(CLI::AsNumberWithUnit(std::map<std::string, int>{{"a", 10}, {"AA A", 100}}), CLI::ValidationError); - EXPECT_THROW(CLI::AsNumberWithUnit(std::map<std::string, int>{{"a", 10}, {"", 100}}), CLI::ValidationError); +TEST_CASE_METHOD(TApp, "NumberWithUnitBadMapping", "[transform]") { + CHECK_THROWS_AS(CLI::AsNumberWithUnit(std::map<std::string, int>{{"a", 10}, {"A", 100}}, + CLI::AsNumberWithUnit::CASE_INSENSITIVE), + CLI::ValidationError); + CHECK_THROWS_AS(CLI::AsNumberWithUnit(std::map<std::string, int>{{"a", 10}, {"9", 100}}), CLI::ValidationError); + CHECK_THROWS_AS(CLI::AsNumberWithUnit(std::map<std::string, int>{{"a", 10}, {"AA A", 100}}), CLI::ValidationError); + CHECK_THROWS_AS(CLI::AsNumberWithUnit(std::map<std::string, int>{{"a", 10}, {"", 100}}), CLI::ValidationError); } -TEST_F(TApp, NumberWithUnitBadInput) { +TEST_CASE_METHOD(TApp, "NumberWithUnitBadInput", "[transform]") { std::map<std::string, int> mapping{{"a", 10}, {"b", 100}}; int value{0}; app.add_option("-n", value)->transform(CLI::AsNumberWithUnit(mapping)); args = {"-n", "13 a b"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-n", "13 c"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-n", "a"}; // Assume 1.0 unit - EXPECT_NO_THROW(run()); + CHECK_NOTHROW(run()); args = {"-n", "12.0a"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-n", "a5"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-n", ""}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-n", "13 a-"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, NumberWithUnitIntOverflow) { +TEST_CASE_METHOD(TApp, "NumberWithUnitIntOverflow", "[transform]") { std::map<std::string, int> mapping{{"a", 1000000}, {"b", 100}, {"c", 101}}; std::int32_t value; @@ -712,147 +713,147 @@ TEST_F(TApp, NumberWithUnitIntOverflow) { args = {"-n", "1000 a"}; run(); - EXPECT_EQ(value, 1000000000); + CHECK(1000000000 == value); args = {"-n", "1000000 a"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-n", "-1000000 a"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-n", "21474836 b"}; run(); - EXPECT_EQ(value, 2147483600); + CHECK(2147483600 == value); args = {"-n", "21474836 c"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); } -TEST_F(TApp, NumberWithUnitFloatOverflow) { +TEST_CASE_METHOD(TApp, "NumberWithUnitFloatOverflow", "[transform]") { std::map<std::string, float> mapping{{"a", 2.f}, {"b", 1.f}, {"c", 0.f}}; float value{0.0F}; app.add_option("-n", value)->transform(CLI::AsNumberWithUnit(mapping)); args = {"-n", "3e+38 a"}; - EXPECT_THROW(run(), CLI::ValidationError); + CHECK_THROWS_AS(run(), CLI::ValidationError); args = {"-n", "3e+38 b"}; run(); - EXPECT_FLOAT_EQ(value, 3e+38f); + CHECK(3e+38f == Approx(value)); args = {"-n", "3e+38 c"}; run(); - EXPECT_FLOAT_EQ(value, 0.f); + CHECK(0.f == Approx(value)); } -TEST_F(TApp, AsSizeValue1000_1024) { +TEST_CASE_METHOD(TApp, "AsSizeValue1000_1024", "[transform]") { std::uint64_t value{0}; app.add_option("-s", value)->transform(CLI::AsSizeValue(true)); args = {"-s", "10240"}; run(); - EXPECT_EQ(value, 10240u); + CHECK(10240u == value); args = {"-s", "1b"}; run(); - EXPECT_EQ(value, 1u); + CHECK(1u == value); std::uint64_t k_value{1000u}; std::uint64_t ki_value{1024u}; args = {"-s", "1k"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1kb"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1 Kb"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1ki"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1kib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); k_value = 1000ull * 1000u; ki_value = 1024ull * 1024u; args = {"-s", "1m"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1mb"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1mi"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1mib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); k_value = 1000ull * 1000u * 1000u; ki_value = 1024ull * 1024u * 1024u; args = {"-s", "1g"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1gb"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1gi"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1gib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); k_value = 1000ull * 1000u * 1000u * 1000u; ki_value = 1024ull * 1024u * 1024u * 1024u; args = {"-s", "1t"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1tb"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1ti"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1tib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); k_value = 1000ull * 1000u * 1000u * 1000u * 1000u; ki_value = 1024ull * 1024u * 1024u * 1024u * 1024u; args = {"-s", "1p"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1pb"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1pi"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1pib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); k_value = 1000ull * 1000u * 1000u * 1000u * 1000u * 1000u; ki_value = 1024ull * 1024u * 1024u * 1024u * 1024u * 1024u; args = {"-s", "1e"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1eb"}; run(); - EXPECT_EQ(value, k_value); + CHECK(k_value == value); args = {"-s", "1ei"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1eib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); } -TEST_F(TApp, duration_test) { +TEST_CASE_METHOD(TApp, "duration_test", "[transform]") { std::chrono::seconds duration{1}; app.option_defaults()->ignore_case(); @@ -863,107 +864,107 @@ TEST_F(TApp, duration_test) { ->capture_default_str() ->transform(CLI::AsNumberWithUnit( std::map<std::string, std::size_t>{{"sec", 1}, {"min", 60}, {"h", 3600}, {"day", 24 * 3600}})); - EXPECT_NO_THROW(app.parse(std::vector<std::string>{"1 day", "--duration"})); + CHECK_NOTHROW(app.parse(std::vector<std::string>{"1 day", "--duration"})); - EXPECT_EQ(duration, std::chrono::seconds(86400)); + CHECK(std::chrono::seconds(86400) == duration); } -TEST_F(TApp, AsSizeValue1024) { +TEST_CASE_METHOD(TApp, "AsSizeValue1024", "[transform]") { std::uint64_t value{0}; app.add_option("-s", value)->transform(CLI::AsSizeValue(false)); args = {"-s", "10240"}; run(); - EXPECT_EQ(value, 10240u); + CHECK(10240u == value); args = {"-s", "1b"}; run(); - EXPECT_EQ(value, 1u); + CHECK(1u == value); std::uint64_t ki_value{1024u}; args = {"-s", "1k"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1kb"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1 Kb"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1ki"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1kib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); ki_value = 1024ull * 1024u; args = {"-s", "1m"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1mb"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1mi"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1mib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); ki_value = 1024ull * 1024u * 1024u; args = {"-s", "1g"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1gb"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1gi"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1gib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); ki_value = 1024ull * 1024u * 1024u * 1024u; args = {"-s", "1t"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1tb"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1ti"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1tib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); ki_value = 1024ull * 1024u * 1024u * 1024u * 1024u; args = {"-s", "1p"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1pb"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1pi"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1pib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); ki_value = 1024ull * 1024u * 1024u * 1024u * 1024u * 1024u; args = {"-s", "1e"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1eb"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1ei"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); args = {"-s", "1eib"}; run(); - EXPECT_EQ(value, ki_value); + CHECK(ki_value == value); } diff --git a/packages/CLI11/tests/TrueFalseTest.cpp b/packages/CLI11/tests/TrueFalseTest.cpp index 7c37d456dc79c9cacb409573a3ce184e02a90a8c..aa8886c092ab21c1b464ec833c3fd00924378924 100644 --- a/packages/CLI11/tests/TrueFalseTest.cpp +++ b/packages/CLI11/tests/TrueFalseTest.cpp @@ -6,31 +6,24 @@ #include "app_helper.hpp" -/// This allows a set of strings to be run over by a test -struct TApp_TBO : public TApp_base, testing::TestWithParam<const char *> {}; - -TEST_P(TApp_TBO, TrueBoolOption) { +TEST_CASE_METHOD(TApp, "True Bool Option", "[bool][flag]") { + // Strings needed here due to MSVC 2015. + auto param = GENERATE(as<std::string>{}, "true", "on", "True", "ON"); bool value{false}; // Not used, but set just in case app.add_option("-b,--bool", value); - args = {"--bool", GetParam()}; + args = {"--bool", param}; run(); - EXPECT_EQ(1u, app.count("--bool")); - EXPECT_TRUE(value); + CHECK(app.count("--bool") == 1u); + CHECK(value); } -// Change to INSTANTIATE_TEST_SUITE_P in GTest master -INSTANTIATE_TEST_SUITE_P(TrueBoolOptions_test, TApp_TBO, testing::Values("true", "on", "True", "ON")); - -/// This allows a set of strings to be run over by a test -struct TApp_FBO : public TApp_base, public ::testing::TestWithParam<const char *> {}; +TEST_CASE_METHOD(TApp, "False Bool Option", "[bool][flag]") { + auto param = GENERATE(as<std::string>{}, "false", "off", "False", "OFF"); -TEST_P(TApp_FBO, FalseBoolOptions) { bool value{true}; // Not used, but set just in case app.add_option("-b,--bool", value); - args = {"--bool", GetParam()}; + args = {"--bool", param}; run(); - EXPECT_EQ(1u, app.count("--bool")); - EXPECT_FALSE(value); + CHECK(app.count("--bool") == 1u); + CHECK_FALSE(value); } - -INSTANTIATE_TEST_SUITE_P(FalseBoolOptions_test, TApp_FBO, ::testing::Values("false", "off", "False", "OFF")); diff --git a/packages/CLI11/tests/WindowsTest.cpp b/packages/CLI11/tests/WindowsTest.cpp index 41053bbe9212c6c1f6134c9e230d22c77a04cc3f..847266649965e6fc03176cda5862d46d07dbe103 100644 --- a/packages/CLI11/tests/WindowsTest.cpp +++ b/packages/CLI11/tests/WindowsTest.cpp @@ -10,10 +10,10 @@ // This test verifies that CLI11 still works if // Windows.h is included. #145 -TEST_F(TApp, WindowsTestSimple) { +TEST_CASE_METHOD(TApp, "WindowsTestSimple", "[windows]") { app.add_flag("-c,--count"); args = {"-c"}; run(); - EXPECT_EQ(1u, app.count("-c")); - EXPECT_EQ(1u, app.count("--count")); + CHECK(app.count("-c") == 1u); + CHECK(app.count("--count") == 1u); } diff --git a/packages/CLI11/tests/app_helper.hpp b/packages/CLI11/tests/app_helper.hpp index 6b250a422cb5f13eb549d9a5bf9d91f58f029953..0f72adda27064e17d3cb97f242526e5b1b2b4501 100644 --- a/packages/CLI11/tests/app_helper.hpp +++ b/packages/CLI11/tests/app_helper.hpp @@ -12,7 +12,7 @@ #include "CLI/CLI.hpp" #endif -#include "gtest/gtest.h" +#include "catch.hpp" #include <iostream> #include <string> #include <utility> @@ -20,11 +20,11 @@ using input_t = std::vector<std::string>; -class TApp_base { +class TApp { public: CLI::App app{"My Test Program"}; input_t args{}; - virtual ~TApp_base() = default; + virtual ~TApp() = default; void run() { // It is okay to re-parse - clear is called automatically before a parse. input_t newargs = args; @@ -33,8 +33,6 @@ class TApp_base { } }; -class TApp : public TApp_base, public ::testing::Test {}; - class TempFile { std::string _name{}; diff --git a/packages/CLI11/tests/link_test_2.cpp b/packages/CLI11/tests/link_test_2.cpp index ba4cc8fe1d01be7bce5276c8b3fcd5349afb7790..b8544ab47a8104b831b66cd409490c200261bcb7 100644 --- a/packages/CLI11/tests/link_test_2.cpp +++ b/packages/CLI11/tests/link_test_2.cpp @@ -6,12 +6,12 @@ #include "CLI/CLI.hpp" #include "CLI/Timer.hpp" -#include <gtest/gtest.h> +#include "catch.hpp" int do_nothing(); // Verifies there are no unguarded inlines -TEST(Link, DoNothing) { +TEST_CASE("Link: DoNothing", "[link]") { int a = do_nothing(); - EXPECT_EQ(7, a); + CHECK(a == 7); } diff --git a/packages/CLI11/tests/main.cpp b/packages/CLI11/tests/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f1c2725f53e260069f3cc867bb1398ac21cb4b2f --- /dev/null +++ b/packages/CLI11/tests/main.cpp @@ -0,0 +1,8 @@ +// Copyright (c) 2017-2020, University of Cincinnati, developed by Henry Schreiner +// under NSF AWARD 1414736 and by the respective contributors. +// All rights reserved. +// +// SPDX-License-Identifier: BSD-3-Clause + +#define CATCH_CONFIG_MAIN +#include "catch.hpp" diff --git a/packages/PEGTL/.github/workflows/clang-analyze.yml b/packages/PEGTL/.github/workflows/clang-analyze.yml index 2226ea7b00338b26996a6ea8bc19d93eb10e4a8e..523b105564db92155b5cac93281e9e5d36776868 100644 --- a/packages/PEGTL/.github/workflows/clang-analyze.yml +++ b/packages/PEGTL/.github/workflows/clang-analyze.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: clang-analyze: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/packages/PEGTL/.github/workflows/clang-tidy.yml b/packages/PEGTL/.github/workflows/clang-tidy.yml index 41e1003c48514127943e3f2da59e119b7c75e670..4da8af6ff8b1695835e65404a825f51d673bc49a 100644 --- a/packages/PEGTL/.github/workflows/clang-tidy.yml +++ b/packages/PEGTL/.github/workflows/clang-tidy.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: clang-tidy: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/packages/PEGTL/.github/workflows/linux.yml b/packages/PEGTL/.github/workflows/linux.yml index ba30d3d98d9b4753649bd71fc5bdfa984c05971e..139982af19acf195bf688fc94db6ac22e85b841e 100644 --- a/packages/PEGTL/.github/workflows/linux.yml +++ b/packages/PEGTL/.github/workflows/linux.yml @@ -8,16 +8,14 @@ jobs: fail-fast: false matrix: compiler: - - g++-7 - - g++-8 - g++-9 - g++-10 - - clang++-8 - clang++-9 - clang++-10 + - clang++-11 build_type: [Debug, Release] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: CXX: ${{ matrix.compiler }} @@ -36,22 +34,34 @@ jobs: - working-directory: build/ run: ctest --output-on-failure - linux-gcc-extra: + linux-old: strategy: fail-fast: false matrix: - flags: ["-fno-rtti"] + compiler: + - g++-7 + - g++-8 + - clang++-6.0 + - clang++-7 + - clang++-8 build_type: [Debug, Release] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest + + env: + CXX: ${{ matrix.compiler }} steps: - uses: actions/checkout@v2 + - run: sudo apt-get update + + - run: sudo apt-get install -y ${{ matrix.compiler }} + - run: cmake -E make_directory build - working-directory: build/ - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_FLAGS="${{ matrix.flags }}" + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - working-directory: build/ run: cmake --build . @@ -59,17 +69,14 @@ jobs: - working-directory: build/ run: ctest --output-on-failure - linux-clang-extra: + linux-gcc-extra: strategy: fail-fast: false matrix: - flags: ["-fno-rtti", "-fms-extensions"] + flags: ["-fno-rtti"] build_type: [Debug, Release] - runs-on: ubuntu-20.04 - - env: - CXX: clang++ + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -85,32 +92,25 @@ jobs: - working-directory: build/ run: ctest --output-on-failure - clang-conanio: + linux-clang-extra: strategy: fail-fast: false matrix: - image: - # List: https://github.com/conan-io/conan-docker-tools - - clang50 - - clang60 - - clang7 - - clang9-x86 - - clang11 + flags: ["-fno-rtti", "-fms-extensions"] build_type: [Debug, Release] - container: - image: conanio/${{ matrix.image }} - options: --user root - runs-on: ubuntu-latest + env: + CXX: clang++ + steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - run: cmake -E make_directory build - working-directory: build/ - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_FLAGS="${{ matrix.flags }}" - working-directory: build/ run: cmake --build . diff --git a/packages/PEGTL/.github/workflows/no-exceptions.yml b/packages/PEGTL/.github/workflows/no-exceptions.yml index 3dd3d74b835d02a9b06dfa404b44793ac44900a3..eeb964f2540b1a6711afc9c689f155c7ee63b827 100644 --- a/packages/PEGTL/.github/workflows/no-exceptions.yml +++ b/packages/PEGTL/.github/workflows/no-exceptions.yml @@ -7,10 +7,10 @@ jobs: strategy: fail-fast: false matrix: - compiler: [g++-10, clang++-10] + compiler: [g++, clang++] build_type: [Debug, Release] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: CXX: ${{ matrix.compiler }} diff --git a/packages/PEGTL/.github/workflows/sanitizer.yml b/packages/PEGTL/.github/workflows/sanitizer.yml index 1f478cb067072b19416fde873c39725e1a3f03d9..61d61aebdfa1b6640934ed95dfeeb4581a6e8b4a 100644 --- a/packages/PEGTL/.github/workflows/sanitizer.yml +++ b/packages/PEGTL/.github/workflows/sanitizer.yml @@ -10,7 +10,7 @@ jobs: cxx: [g++, clang++] sanitizer: [address, undefined] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: CXX: ${{ matrix.cxx }} diff --git a/packages/PEGTL/.gitrepo b/packages/PEGTL/.gitrepo index 393af709f67ffe786ffbcab99ba7af6772acfce4..d60e16e83cde84179e4ec27115e58d51dd0cd242 100644 --- a/packages/PEGTL/.gitrepo +++ b/packages/PEGTL/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:taocpp/PEGTL.git branch = master - commit = 57f8ebe0045d7e35cbb251536146a57bc0cf9db5 - parent = 0a259f7e3e4fe2364b8d45b641c7f48ff3bc7341 - cmdver = 0.4.3 + commit = c131c2e2aad67037285ef39d11ec4f1d28d4fc73 + parent = 2f2fa0e22bd114f44f78c5bee89bc13bd0959d1d method = merge + cmdver = 0.4.3 diff --git a/packages/PEGTL/README.md b/packages/PEGTL/README.md index 64d090c994d932cb813415879576ff82443506ed..e613c97f9b5a542d542103b3251bf9067806bdc6 100644 --- a/packages/PEGTL/README.md +++ b/packages/PEGTL/README.md @@ -112,6 +112,7 @@ In appreciation of all contributions here are the people that have [directly con [<img alt="pauloscustodio" src="https://avatars.githubusercontent.com/u/70773" width="120">](https://github.com/pauloscustodio) [<img alt="pleroux0" src="https://avatars.githubusercontent.com/u/39619854" width="120">](https://github.com/pleroux0) [<img alt="quadfault" src="https://avatars.githubusercontent.com/u/30195320" width="120">](https://github.com/quadfault) +[<img alt="quarticcat" src="https://avatars.githubusercontent.com/u/70888415" width="120">](https://github.com/quarticcat) [<img alt="ras0219" src="https://avatars.githubusercontent.com/u/533828" width="120">](https://github.com/ras0219) [<img alt="redmercury" src="https://avatars.githubusercontent.com/u/4424222" width="120">](https://github.com/redmercury) [<img alt="robertcampion" src="https://avatars.githubusercontent.com/u/4220569" width="120">](https://github.com/robertcampion) diff --git a/packages/PEGTL/doc/Actions-and-States.md b/packages/PEGTL/doc/Actions-and-States.md index 951720b7d7fcb073b47a94ac797e4b5c684a26d6..4c8101682972bde687cad06b2a1d96024e7afd79 100644 --- a/packages/PEGTL/doc/Actions-and-States.md +++ b/packages/PEGTL/doc/Actions-and-States.md @@ -24,6 +24,7 @@ When an action is *applied*, the corresponding function receives the *states*, a * [Changing Actions and States](#changing-actions-and-states) * [Match](#match) * [Nothing](#nothing) +* [Backtracking](#backtracking) * [Troubleshooting](#troubleshooting) * [Boolean Return](#boolean-return) * [State Mismatch](#state-mismatch) @@ -285,7 +286,7 @@ std::string unescape( const std::string& escaped ) At the end of the parsing run, the complete unescaped string can be found in the aptly named variable `unescaped`. -A more complete example of how to unescape strings can be found in `src/examples/pegtl/unescape.cpp`. +A more complete example of how to unescape strings can be found in `src/example/pegtl/unescape.cpp`. ## Specialising @@ -506,8 +507,72 @@ For example when a class `b` is derived from `change_state`, it also gains that At this point `b` is allowed to either have or not have an `apply()` or `apply0()`. By letting `b` also derive from one of the three mentioned classes, the `maybe_nothing` will be ignored and `b` will be checked to have or not have the functions as dictated by the respective additional base class. +## Backtracking + +Sometimes there can be *backtracking* during a parsing run which can lead to Actions being called in places where their effects are undesired. +While it might be intuitively clear what backtracking is, for the purpose of the following discussion we give a slightly more formal definition. + +We speak of *backtracking* across a rule `S` when there is a rule `R` of which `S` is a (direct or indirect) sub-rule and during a parsing run +1. `R` returns local failure after +2. `S` succeeded and its success is a requirement for the success of `R` and +3. it is "still possible" for the top-level grammar rule of the parsing run to succeed. + +In this case the input will have been rewound to the point at which `R` was attempted to match and all effects of `S` on the Input will have been undone, however, and this is the subject of this section, any action attached to `S` will have been already performed without there being an automatic "undo". + +#### The AAC-Problem + +In some cases it is easy to rewrite the grammar in a way that prevents backtracking. +This simultaneously removes the issue of having to undo actions and improves parsing performance. + +The prototypical case for which such a rewrite can be done is `R = sor< seq< A, B >, seq< A, C > >` where `A`, `B` and `C` are arbitrary rules. + +If during a parsing run there are actions attached to `A` and `C`, and the input matches `seq< A, C >` but not `seq< A, B >`, then the action for `A` will be called *twice* before the action for `C`, which gives this problem its "AAC" name, given that what happens is: + +* Begin `sor< seq< A, B >, seq< A, C > >` +* Begin `seq< A, B >` +* Begin `A` +* Success `A` with action called +* Begin `B` +* Failure `B` +* Failure `seq< A, B >` +* Begin `seq< A, C >` +* Begin `A` at the same position as the begin `A` above +* Success `A` with action called again on the same input +* Begin `C` +* Success `C` +* Success `seq< A, C >` +* Success `sor< seq< A, B >, seq< A, C > >` + +#### Rewriting + +In practice the structure of the rule might be more complicated than the pure AAC-problem which will make it harder to recognise the pattern. +One solution is to rewrite `R` as `R' = seq< A, sor< B, C > >` where of course any action for `A` will be called at most once for every successful match of `R'`. + +#### Manual Undo + +Another solution is to undo the effects of the Action attached to `A` in case the encompassing `seq< A, B >` (or `seq< A, C >`) fail. + +The advantage of this approach is that the implementation of the Action for `A` can pretend that is only called when really needed. +The disadvantage is that there is no function on the Action that is called in the case of failure which requires the user to either write a custom `match()` function in the Action for `seq< A, B >` or to implement the `failure()` function in a custom [Control class](Control-and-Debug.md). + +#### Manual Commit + +A further solution is to let the Action for `A` perform its job "to the side", and only "commit" the effects to the target data structure in the Action for `seq< A, B >`. + +For example if the Action attached to `A` takes the matched portion of the Input as `std::string` and appends it to `std::vector< std::string >` one could change said Action for `A` to only fill some temporary string in one of the States, and create an Action for `seq< A, B >` that, after it is called on success of that rule, appends the aforementioned temporary string to the target vector. + +#### Looking Ahead + +When everything else fails and a quick-and-dirty solution to Actions being called too often in the presence of backtracking is required and/or performance is not of prime importance it is relatively easy to solve the problem by employing the infinite look-ahead capability of PEGs. + +When backtracking across `S` is a problem because an Action attached to `S` can be called when `S` succeeds even though there is a higher-up rule `R` that can still fail then simply replace `R` with `seq< at< R >, R >` in the grammar. + +Remembering that `at` disables all Actions explains how this solves the problem; we first verify without Actions that `R` will indeed match at this point and only then match `R` again with Actions enabled. + ## Troubleshooting +The following lists a couple of frequently encountered Action-related errors and how to fix them. + ### Boolean Return Actions returning `bool` are an advanced use case that should be used with caution. diff --git a/packages/PEGTL/doc/Changelog.md b/packages/PEGTL/doc/Changelog.md index d1b57f131b5aa72f8221d024e3a2c705ed8bc77d..cc194318f8cd00dc5a894e713003369036b02506 100644 --- a/packages/PEGTL/doc/Changelog.md +++ b/packages/PEGTL/doc/Changelog.md @@ -1,5 +1,11 @@ # Changelog +## 3.2.1 + +**Not yet released** + +* Added an optional limiter to guard against infinite recursion. + ## 3.2.0 Released 2021-01-15 diff --git a/packages/PEGTL/doc/Errors-and-Exceptions.md b/packages/PEGTL/doc/Errors-and-Exceptions.md index f080c0917885c1f9b1793e81acaf4222f89dee3d..7b8f018d84e757c919c2eae8f91864ccac87a060 100644 --- a/packages/PEGTL/doc/Errors-and-Exceptions.md +++ b/packages/PEGTL/doc/Errors-and-Exceptions.md @@ -162,7 +162,7 @@ This is often insufficient and one would like to provide more meaningful error m A practical technique to provide customised error messages for all `must<>` error points uses the `must_if<>` helper. -For an example of this method see `src/examples/pegtl/json_errors.hpp`, where all errors that might occur in the supplied JSON grammar are customised like this: +For an example of this method see `src/example/pegtl/json_errors.hpp`, where all errors that might occur in the supplied JSON grammar are customised like this: ```c++ template< typename > inline constexpr const char* error_message = nullptr; diff --git a/packages/PEGTL/doc/Grammar-Analysis.md b/packages/PEGTL/doc/Grammar-Analysis.md index 964adb9c19e27963a9ec98202208e98b086e7894..629982d8ec624f154c86fb95e16edacec6704a5a 100644 --- a/packages/PEGTL/doc/Grammar-Analysis.md +++ b/packages/PEGTL/doc/Grammar-Analysis.md @@ -56,7 +56,7 @@ This support automatically extends to all custom rules built "the usual way" via For true custom rules, i.e. rules that implement their own `match()` function, the following steps need to be taken for them to work with the grammar analysis. -1. The rule needs a `rule_t` that, usually for true custom rules, is a type alias for the grammar rule itself. +1. The rule needs a [`rule_t`](Meta-Data-and-Visit.md#rule-type) that, usually for true custom rules, is a type alias for the grammar rule itself. 2. There needs to be a specialisation of the `analyze_traits<>` for the custom rule, with an additional first template parameter: Assuming a custom rule like the following diff --git a/packages/PEGTL/doc/Parse-Tree.md b/packages/PEGTL/doc/Parse-Tree.md index 039208294c18c3a201492e6f8c9da75affcc5bd5..baa3b24a7fc1e4e54daac1e1e56d0f461900f336 100644 --- a/packages/PEGTL/doc/Parse-Tree.md +++ b/packages/PEGTL/doc/Parse-Tree.md @@ -18,6 +18,7 @@ It provides the basic infrastructure to build a parse tree that * [Transformers](#transformers) * [`tao::pegtl::parse_tree::node`](#taopegtlparse_treenode) * [Custom Node Class](#custom-node-class) +* [Requirements](#requirements) ## Full Parse Tree @@ -235,4 +236,8 @@ struct my_node }; ``` +## Requirements + +The parse tree uses a rule's meta data supplied by [`subs_t`](Meta-Data-and-Visit.md#sub-rules) for internal optimizations. + Copyright (c) 2018-2021 Dr. Colin Hirsch and Daniel Frey diff --git a/packages/PEGTL/doc/README.md b/packages/PEGTL/doc/README.md index 09b6fbcf15a32a3202faaa073dd9c96f1fbde813..7c028ccae0f67e1a8581605c48f4fb5f44491f1a 100644 --- a/packages/PEGTL/doc/README.md +++ b/packages/PEGTL/doc/README.md @@ -44,6 +44,7 @@ * [Changing Actions and States](Actions-and-States.md#changing-actions-and-states) * [Match](Actions-and-States.md#match) * [Nothing](Actions-and-States.md#nothing) + * [Backtracking](Actions-and-States.md#backtracking) * [Troubleshooting](Actions-and-States.md#troubleshooting) * [Boolean Return](Actions-and-States.md#boolean-return) * [State Mismatch](Actions-and-States.md#state-mismatch) @@ -103,6 +104,7 @@ * [Transformer](Parse-Tree.md#transformer) * [`tao::pegtl::parse_tree::node`](Parse-Tree.md#taopegtlparse_treenode) * [Custom Node Class](Parse-Tree.md#custom-node-class) + * [Requirements](Parse-Tree.md#requirements) * [Meta Data and Visit](Meta-Data-and-Visit.md) * [Internals](Meta-Data-and-Visit.md#internals) * [Rule Type](Meta-Data-and-Visit.md#rule-type) diff --git a/packages/PEGTL/doc/Rule-Reference.md b/packages/PEGTL/doc/Rule-Reference.md index a39700edf41e97cddcce989d0d67cd0c5cefe1eb..79dd46ccf969050b4f40fbafc9ebe56bee2c5699 100644 --- a/packages/PEGTL/doc/Rule-Reference.md +++ b/packages/PEGTL/doc/Rule-Reference.md @@ -100,7 +100,7 @@ These rules are in namespace `tao::pegtl`. * Enables all actions (if any). * [Meta data] and [implementation] mapping: - `enable<>::rule_t` is `internal::success` - - `enable< R >::rule_t` is `internal::enable<, R >` + - `enable< R >::rule_t` is `internal::enable< R >` - `enable< R >::subs_t` is `type_list< R >` - `enable< R... >::rule_t` is `internal::enable< internal::seq< R... > >` - `enable< R... >::subs_t` is `type_list< internal::seq< R... > >` @@ -368,6 +368,8 @@ Note that the `true` template parameter to `internal::if_must` corresponds to th - `rematch< R, S... >::rule_t` is `internal::rematch< R, S... >` - `rematch< R, S... >::subs_t` is `type_list< R, S... >` +Note that the `S` do *not* need to match *all* of the input matched by `R` (which is why `minus` uses `eof` in its implementation). + ###### `rep< Num, R... >` * Matches `seq< R... >` for `Num` times without checking for further matches. diff --git a/packages/PEGTL/include/tao/pegtl/buffer_input.hpp b/packages/PEGTL/include/tao/pegtl/buffer_input.hpp index 0003442b0a07c0953ded634e27819f0331a14157..60d39d34935d21fe33be102ac7146ff4aa5e3a84 100644 --- a/packages/PEGTL/include/tao/pegtl/buffer_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/buffer_input.hpp @@ -66,8 +66,8 @@ namespace TAO_PEGTL_NAMESPACE ~buffer_input() = default; - void operator=( const buffer_input& ) = delete; - void operator=( buffer_input&& ) = delete; + buffer_input& operator=( const buffer_input& ) = delete; + buffer_input& operator=( buffer_input&& ) = delete; [[nodiscard]] bool empty() { @@ -216,6 +216,9 @@ namespace TAO_PEGTL_NAMESPACE iterator_t m_current; char* m_end; const Source m_source; + + public: + std::size_t private_depth = 0; }; } // namespace TAO_PEGTL_NAMESPACE diff --git a/packages/PEGTL/include/tao/pegtl/contrib/analyze.hpp b/packages/PEGTL/include/tao/pegtl/contrib/analyze.hpp index 586710735f9225d65381cf547af2c56d929bd8ed..0bfbd18c2fd77615ab5ff3eb7d1aec8914221807 100644 --- a/packages/PEGTL/include/tao/pegtl/contrib/analyze.hpp +++ b/packages/PEGTL/include/tao/pegtl/contrib/analyze.hpp @@ -44,8 +44,8 @@ namespace TAO_PEGTL_NAMESPACE ~analyze_cycles_impl() = default; - void operator=( analyze_cycles_impl&& ) = delete; - void operator=( const analyze_cycles_impl& ) = delete; + analyze_cycles_impl& operator=( analyze_cycles_impl&& ) = delete; + analyze_cycles_impl& operator=( const analyze_cycles_impl& ) = delete; [[nodiscard]] std::size_t problems() { diff --git a/packages/PEGTL/include/tao/pegtl/contrib/check_bytes.hpp b/packages/PEGTL/include/tao/pegtl/contrib/check_bytes.hpp new file mode 100644 index 0000000000000000000000000000000000000000..84333c7d7ab6425ab0b923b88eb68d9046cc5668 --- /dev/null +++ b/packages/PEGTL/include/tao/pegtl/contrib/check_bytes.hpp @@ -0,0 +1,55 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#ifndef TAO_PEGTL_CONTRIB_CHECK_BYTES_HPP +#define TAO_PEGTL_CONTRIB_CHECK_BYTES_HPP + +#include "../apply_mode.hpp" +#include "../config.hpp" +#include "../match.hpp" +#include "../nothing.hpp" +#include "../rewind_mode.hpp" + +#if defined( __cpp_exceptions ) +#include "../parse_error.hpp" +#else +#include <cstdio> +#include <exception> +#endif + +namespace TAO_PEGTL_NAMESPACE +{ + template< std::size_t Maximum > + struct check_bytes + : maybe_nothing + { + template< typename Rule, + pegtl::apply_mode A, + pegtl::rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename... States > + static bool match( ParseInput& in, States&&... st ) + { + const auto* start = in.current(); + if( TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ) ) { + if( std::size_t( in.current() - start ) > Maximum ) { +#if defined( __cpp_exceptions ) + throw TAO_PEGTL_NAMESPACE::parse_error( "maximum allowed rule consumption exceeded", in ); +#else + std::fputs( "maximum allowed rule consumption exceeded\n", stderr ); + std::terminate(); +#endif + } + return true; + } + return false; + } + }; + +} // namespace TAO_PEGTL_NAMESPACE + +#endif diff --git a/packages/PEGTL/include/tao/pegtl/contrib/instantiate.hpp b/packages/PEGTL/include/tao/pegtl/contrib/instantiate.hpp index c04db3f541138c79fe50813a2b6f6e133ffd8251..ebf6e7f1ca1730a47617ccaea29d565270b5a888 100644 --- a/packages/PEGTL/include/tao/pegtl/contrib/instantiate.hpp +++ b/packages/PEGTL/include/tao/pegtl/contrib/instantiate.hpp @@ -26,7 +26,7 @@ namespace TAO_PEGTL_NAMESPACE class Control, typename ParseInput, typename... States > - [[nodiscard]] static bool match( ParseInput& in, States&... st ) + [[nodiscard]] static bool match( ParseInput& in, States&&... st ) { const T t( static_cast< const ParseInput& >( in ), st... ); return TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ); diff --git a/packages/PEGTL/include/tao/pegtl/contrib/internal/set_stack_guard.hpp b/packages/PEGTL/include/tao/pegtl/contrib/internal/set_stack_guard.hpp index 94df74cab2edf16ab5ebed16a5eeb6af4db21873..c0071e959d0c3bd52f405b5800e608107716a91c 100644 --- a/packages/PEGTL/include/tao/pegtl/contrib/internal/set_stack_guard.hpp +++ b/packages/PEGTL/include/tao/pegtl/contrib/internal/set_stack_guard.hpp @@ -12,7 +12,7 @@ namespace TAO_PEGTL_NAMESPACE::internal { template< typename... Cs > - class set_stack_guard + class [[nodiscard]] set_stack_guard { public: template< typename... Ts > @@ -24,8 +24,8 @@ namespace TAO_PEGTL_NAMESPACE::internal set_stack_guard( set_stack_guard&& ) = delete; set_stack_guard( const set_stack_guard& ) = delete; - void operator=( set_stack_guard&& ) = delete; - void operator=( const set_stack_guard& ) = delete; + set_stack_guard& operator=( set_stack_guard&& ) = delete; + set_stack_guard& operator=( const set_stack_guard& ) = delete; ~set_stack_guard() { diff --git a/packages/PEGTL/include/tao/pegtl/contrib/internal/vector_stack_guard.hpp b/packages/PEGTL/include/tao/pegtl/contrib/internal/vector_stack_guard.hpp index 146da0f1afe49f72005fc05389da261d144457f4..4b0cfbe8bf730bdac56b39c6e09cd627b5f928b5 100644 --- a/packages/PEGTL/include/tao/pegtl/contrib/internal/vector_stack_guard.hpp +++ b/packages/PEGTL/include/tao/pegtl/contrib/internal/vector_stack_guard.hpp @@ -12,7 +12,7 @@ namespace TAO_PEGTL_NAMESPACE::internal { template< typename... Cs > - class vector_stack_guard + class [[nodiscard]] vector_stack_guard { public: template< typename... Ts > @@ -25,8 +25,8 @@ namespace TAO_PEGTL_NAMESPACE::internal vector_stack_guard( vector_stack_guard&& ) = delete; vector_stack_guard( const vector_stack_guard& ) = delete; - void operator=( vector_stack_guard&& ) = delete; - void operator=( const vector_stack_guard& ) = delete; + vector_stack_guard& operator=( vector_stack_guard&& ) = delete; + vector_stack_guard& operator=( const vector_stack_guard& ) = delete; ~vector_stack_guard() { diff --git a/packages/PEGTL/include/tao/pegtl/contrib/limit_bytes.hpp b/packages/PEGTL/include/tao/pegtl/contrib/limit_bytes.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cea99fc6fef85375352e6a377c9db42d19201ff5 --- /dev/null +++ b/packages/PEGTL/include/tao/pegtl/contrib/limit_bytes.hpp @@ -0,0 +1,88 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#ifndef TAO_PEGTL_CONTRIB_LIMIT_BYTES_HPP +#define TAO_PEGTL_CONTRIB_LIMIT_BYTES_HPP + +#include <algorithm> + +#include "../apply_mode.hpp" +#include "../config.hpp" +#include "../match.hpp" +#include "../nothing.hpp" +#include "../rewind_mode.hpp" + +#if defined( __cpp_exceptions ) +#include "../parse_error.hpp" +#else +#include <cstdio> +#include <exception> +#endif + +namespace TAO_PEGTL_NAMESPACE +{ + namespace internal + { + template< std::size_t Maximum, typename MemoryInput > + struct [[nodiscard]] bytes_guard + { + MemoryInput& m_in; + const char* m_end; + + explicit bytes_guard( MemoryInput& in_in ) noexcept + : m_in( in_in ), + m_end( in_in.end() ) + { + m_in.private_set_end( m_in.begin() + std::min( m_in.size(), Maximum ) ); + } + + bytes_guard( bytes_guard&& ) = delete; + bytes_guard( const bytes_guard& ) = delete; + + ~bytes_guard() + { + m_in.private_set_end( m_end ); + } + + bytes_guard& operator=( bytes_guard&& ) = delete; + bytes_guard& operator=( const bytes_guard& ) = delete; + }; + + // C++17 does not allow for partial deduction guides. + + } // namespace internal + + template< std::size_t Maximum > + struct limit_bytes + : maybe_nothing + { + template< typename Rule, + apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename... States > + [[nodiscard]] static bool match( ParseInput& in, States&&... st ) + { + internal::bytes_guard< Maximum, ParseInput > bg( in ); + if( TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ) ) { + if( in.empty() && ( bg.m_end != in.current() ) ) { +#if defined( __cpp_exceptions ) + throw TAO_PEGTL_NAMESPACE::parse_error( "maximum allowed rule consumption reached", in ); +#else + std::fputs( "maximum allowed rule consumption reached\n", stderr ); + std::terminate(); +#endif + } + return true; + } + return false; + } + }; + +} // namespace TAO_PEGTL_NAMESPACE + +#endif diff --git a/packages/PEGTL/include/tao/pegtl/contrib/limit_depth.hpp b/packages/PEGTL/include/tao/pegtl/contrib/limit_depth.hpp new file mode 100644 index 0000000000000000000000000000000000000000..447bebff7cc21a78f631e37ac23800a2741fd671 --- /dev/null +++ b/packages/PEGTL/include/tao/pegtl/contrib/limit_depth.hpp @@ -0,0 +1,83 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#ifndef TAO_PEGTL_CONTRIB_LIMIT_DEPTH_HPP +#define TAO_PEGTL_CONTRIB_LIMIT_DEPTH_HPP + +#include "../apply_mode.hpp" +#include "../config.hpp" +#include "../match.hpp" +#include "../nothing.hpp" +#include "../rewind_mode.hpp" + +#if defined( __cpp_exceptions ) +#include "../parse_error.hpp" +#else +#include <cstdio> +#include <exception> +#endif + +namespace TAO_PEGTL_NAMESPACE +{ + namespace internal + { + struct [[nodiscard]] depth_guard + { + std::size_t& m_depth; + + explicit depth_guard( std::size_t& depth ) noexcept + : m_depth( depth ) + { + ++m_depth; + } + + depth_guard( depth_guard&& ) = delete; + depth_guard( const depth_guard& ) = delete; + + ~depth_guard() + { + --m_depth; + } + + depth_guard& operator=( depth_guard&& ) = delete; + depth_guard& operator=( const depth_guard& ) = delete; + }; + + } // namespace internal + + template< std::size_t Maximum > + struct limit_depth + : maybe_nothing + { + template< typename Rule, + apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename... States > + [[nodiscard]] static bool match( ParseInput& in, States&&... st ) + { + if constexpr( Control< Rule >::enable ) { + const internal::depth_guard dg( in.private_depth ); + if( in.private_depth > Maximum ) { +#if defined( __cpp_exceptions ) + throw TAO_PEGTL_NAMESPACE::parse_error( "maximum parser rule nesting depth exceeded", in ); +#else + std::fputs( "maximum parser rule nesting depth exceeded\n", stderr ); + std::terminate(); +#endif + } + return TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ); + } + else { + return TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ); + } + } + }; + +} // namespace TAO_PEGTL_NAMESPACE + +#endif diff --git a/packages/PEGTL/include/tao/pegtl/contrib/skip.hpp b/packages/PEGTL/include/tao/pegtl/contrib/skip.hpp deleted file mode 100644 index 8aea252e83710abc24afb6319d73a67da4c9fca4..0000000000000000000000000000000000000000 --- a/packages/PEGTL/include/tao/pegtl/contrib/skip.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey -// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ - -#ifndef TAO_PEGTL_CONTRIB_SKIP_HPP -#define TAO_PEGTL_CONTRIB_SKIP_HPP - -#include "../apply_mode.hpp" -#include "../config.hpp" -#include "../match.hpp" -#include "../normal.hpp" -#include "../nothing.hpp" -#include "../rewind_mode.hpp" - -namespace TAO_PEGTL_NAMESPACE -{ - // this is currently experimental and may change at any time - template< typename How, typename Where, template< typename... > class Base = normal > - struct skip - { - template< typename Rule > - struct control - : Base< Rule > - { - template< apply_mode A, - rewind_mode M, - template< typename... > - class Action, - template< typename... > - class Control, - typename ParseInput, - typename... States > - [[nodiscard]] static bool match( ParseInput& in, States&&... st ) - { - // TODO: if we only skip after but not before the actual rule, - // we would not need this marker. - auto m = in.template mark< M >(); - - // TODO: different conditions for before/after skipping? - if( Where::template value< Rule > ) { - // TODO: assert on result to be successful? - (void)TAO_PEGTL_NAMESPACE::match< How, apply_mode::nothing, M, nothing, normal >( in ); - } - - const bool result = Base< Rule >::template match< A, M, Action, Control >( in, st... ); - - // TODO: different conditions for before/after skipping? - if( result && Where::template value< Rule > ) { - // TODO: assert on result to be successful? - (void)TAO_PEGTL_NAMESPACE::match< How, apply_mode::nothing, M, nothing, normal >( in ); - } - - return m( result ); - } - }; - }; - -} // namespace TAO_PEGTL_NAMESPACE - -#endif diff --git a/packages/PEGTL/include/tao/pegtl/internal/file_mapper_posix.hpp b/packages/PEGTL/include/tao/pegtl/internal/file_mapper_posix.hpp index ad8147a7dbf0d47ee8c3597735d78000d8c2b6b0..0c60e3a4a6a85eb28e419826072e36e6ffb62cc5 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/file_mapper_posix.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/file_mapper_posix.hpp @@ -38,8 +38,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::close( m_fd ); } - void operator=( const file_opener& ) = delete; - void operator=( file_opener&& ) = delete; + file_opener& operator=( const file_opener& ) = delete; + file_opener& operator=( file_opener&& ) = delete; [[nodiscard]] std::size_t size() const { @@ -118,8 +118,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::munmap( const_cast< char* >( m_data ), m_size ); } - void operator=( const file_mapper& ) = delete; - void operator=( file_mapper&& ) = delete; + file_mapper& operator=( const file_mapper& ) = delete; + file_mapper& operator=( file_mapper&& ) = delete; [[nodiscard]] bool empty() const noexcept { diff --git a/packages/PEGTL/include/tao/pegtl/internal/file_mapper_win32.hpp b/packages/PEGTL/include/tao/pegtl/internal/file_mapper_win32.hpp index a8a471fd9ed881c2f36ca09517d07ec971517735..73c27dec90556f0b091ed191a28ca6c4557e3d95 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/file_mapper_win32.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/file_mapper_win32.hpp @@ -52,8 +52,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::CloseHandle( m_handle ); } - void operator=( const file_opener& ) = delete; - void operator=( file_opener&& ) = delete; + file_opener& operator=( const file_opener& ) = delete; + file_opener& operator=( file_opener&& ) = delete; [[nodiscard]] std::size_t size() const { @@ -134,8 +134,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::CloseHandle( m_handle ); } - void operator=( const win32_file_mapper& ) = delete; - void operator=( win32_file_mapper&& ) = delete; + win32_file_mapper& operator=( const win32_file_mapper& ) = delete; + win32_file_mapper& operator=( win32_file_mapper&& ) = delete; const size_t m_size; const HANDLE m_handle; @@ -202,8 +202,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::UnmapViewOfFile( LPCVOID( m_data ) ); } - void operator=( const file_mapper& ) = delete; - void operator=( file_mapper&& ) = delete; + file_mapper& operator=( const file_mapper& ) = delete; + file_mapper& operator=( file_mapper&& ) = delete; [[nodiscard]] bool empty() const noexcept { diff --git a/packages/PEGTL/include/tao/pegtl/internal/file_reader.hpp b/packages/PEGTL/include/tao/pegtl/internal/file_reader.hpp index 7b846e1bf246cec4086d6333a9021f810928e865..7c2b054942877d96d308ea4be0a27e49e1917f00 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/file_reader.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/file_reader.hpp @@ -78,8 +78,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ~file_reader() = default; - void operator=( const file_reader& ) = delete; - void operator=( file_reader&& ) = delete; + file_reader& operator=( const file_reader& ) = delete; + file_reader& operator=( file_reader&& ) = delete; [[nodiscard]] std::size_t size() const { diff --git a/packages/PEGTL/include/tao/pegtl/internal/marker.hpp b/packages/PEGTL/include/tao/pegtl/internal/marker.hpp index 4beb9293a17f18e6d9eac6370ead5d0a53866746..a9ffa824f356e85addebb0c99ddab3373c62b3df 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/marker.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/marker.hpp @@ -10,7 +10,7 @@ namespace TAO_PEGTL_NAMESPACE::internal { template< typename Iterator, rewind_mode M > - class marker + class [[nodiscard]] marker { public: static constexpr rewind_mode next_rewind_mode = M; @@ -23,8 +23,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ~marker() = default; - void operator=( const marker& ) = delete; - void operator=( marker&& ) = delete; + marker& operator=( const marker& ) = delete; + marker& operator=( marker&& ) = delete; [[nodiscard]] bool operator()( const bool result ) const noexcept { @@ -33,7 +33,7 @@ namespace TAO_PEGTL_NAMESPACE::internal }; template< typename Iterator > - class marker< Iterator, rewind_mode::required > + class [[nodiscard]] marker< Iterator, rewind_mode::required > { public: static constexpr rewind_mode next_rewind_mode = rewind_mode::active; @@ -53,8 +53,8 @@ namespace TAO_PEGTL_NAMESPACE::internal } } - void operator=( const marker& ) = delete; - void operator=( marker&& ) = delete; + marker& operator=( const marker& ) = delete; + marker& operator=( marker&& ) = delete; [[nodiscard]] bool operator()( const bool result ) noexcept { diff --git a/packages/PEGTL/include/tao/pegtl/internal/ranges.hpp b/packages/PEGTL/include/tao/pegtl/internal/ranges.hpp index 1af27d7552ac0df06d72b882e47ff35a3a1e1c97..bf57db611e00058ae681096676cb5893efe7b355 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/ranges.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/ranges.hpp @@ -6,6 +6,8 @@ #include "../config.hpp" +#include <utility> + #include "bump_help.hpp" #include "enable_control.hpp" #include "failure.hpp" @@ -16,37 +18,12 @@ namespace TAO_PEGTL_NAMESPACE::internal { - template< typename Char, Char... Cs > - struct ranges_impl; - - template< typename Char > - struct ranges_impl< Char > - { - [[nodiscard]] static constexpr bool test( const Char /*unused*/ ) noexcept - { - return false; - } - }; - - template< typename Char, Char Eq > - struct ranges_impl< Char, Eq > - { - [[nodiscard]] static constexpr bool test( const Char c ) noexcept - { - return c == Eq; - } - }; - - template< typename Char, Char Lo, Char Hi, Char... Cs > - struct ranges_impl< Char, Lo, Hi, Cs... > + template< typename Char, Char Lo, Char Hi > + constexpr bool validate_range( Char c ) noexcept { - static_assert( Lo <= Hi, "invalid range detected" ); - - [[nodiscard]] static constexpr bool test( const Char c ) noexcept - { - return ( ( Lo <= c ) && ( c <= Hi ) ) || ranges_impl< Char, Cs... >::test( c ); - } - }; + static_assert( Lo <= Hi, "invalid range" ); + return ( Lo <= c ) && ( c <= Hi ); + } template< typename Peek, typename Peek::data_t... Cs > struct ranges @@ -57,9 +34,21 @@ namespace TAO_PEGTL_NAMESPACE::internal using rule_t = ranges; using subs_t = empty_list; + template< std::size_t... Is > + [[nodiscard]] static constexpr bool test( std::index_sequence< Is... > /*unused*/, const data_t c ) noexcept + { + constexpr const data_t cs[] = { Cs... }; + if constexpr( sizeof...( Cs ) % 2 == 0 ) { + return ( validate_range< data_t, cs[ 2 * Is ], cs[ 2 * Is + 1 ] >( c ) || ... ); + } + else { + return ( validate_range< data_t, cs[ 2 * Is ], cs[ 2 * Is + 1 ] >( c ) || ... ) || ( c == cs[ sizeof...( Cs ) - 1 ] ); + } + } + [[nodiscard]] static constexpr bool test( const data_t c ) noexcept { - return ranges_impl< data_t, Cs... >::test( c ); + return test( std::make_index_sequence< sizeof...( Cs ) / 2 >(), c ); } template< int Eol > diff --git a/packages/PEGTL/include/tao/pegtl/memory_input.hpp b/packages/PEGTL/include/tao/pegtl/memory_input.hpp index 675e8b32fdee7203df98f20622ca50f934fc7137..7cdc9f9f88aa5a2adbaf3158730217d3a2176334 100644 --- a/packages/PEGTL/include/tao/pegtl/memory_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/memory_input.hpp @@ -62,8 +62,8 @@ namespace TAO_PEGTL_NAMESPACE ~memory_input_base() = default; - memory_input_base operator=( const memory_input_base& ) = delete; - memory_input_base operator=( memory_input_base&& ) = delete; + memory_input_base& operator=( const memory_input_base& ) = delete; + memory_input_base& operator=( memory_input_base&& ) = delete; [[nodiscard]] const char* current() const noexcept { @@ -124,13 +124,17 @@ namespace TAO_PEGTL_NAMESPACE m_current.byte = in_byte; m_current.line = in_line; m_current.column = in_column; + private_depth = 0; } protected: const char* const m_begin; iterator_t m_current; - const char* const m_end; + const char* m_end; const Source m_source; + + public: + std::size_t private_depth = 0; }; template< typename Eol, typename Source > @@ -160,8 +164,8 @@ namespace TAO_PEGTL_NAMESPACE ~memory_input_base() = default; - memory_input_base operator=( const memory_input_base& ) = delete; - memory_input_base operator=( memory_input_base&& ) = delete; + memory_input_base& operator=( const memory_input_base& ) = delete; + memory_input_base& operator=( memory_input_base&& ) = delete; [[nodiscard]] const char* current() const noexcept { @@ -208,13 +212,17 @@ namespace TAO_PEGTL_NAMESPACE void restart() { m_current = m_begin.data; + private_depth = 0; } protected: const internal::iterator m_begin; iterator_t m_current; - const char* const m_end; + const char* m_end; const Source m_source; + + public: + std::size_t private_depth = 0; }; } // namespace internal @@ -268,8 +276,8 @@ namespace TAO_PEGTL_NAMESPACE ~memory_input() = default; - memory_input operator=( const memory_input& ) = delete; - memory_input operator=( memory_input&& ) = delete; + memory_input& operator=( const memory_input& ) = delete; + memory_input& operator=( memory_input&& ) = delete; [[nodiscard]] const Source& source() const noexcept { @@ -355,6 +363,11 @@ namespace TAO_PEGTL_NAMESPACE const char* b = begin_of_line( p ); return std::string_view( b, static_cast< std::size_t >( end_of_line( p ) - b ) ); } + + void private_set_end( const char* new_end ) noexcept + { + this->m_end = new_end; + } }; template< typename... Ts > diff --git a/packages/PEGTL/include/tao/pegtl/mmap_input.hpp b/packages/PEGTL/include/tao/pegtl/mmap_input.hpp index 1bd4dce191141af073cf5a0d7eba3c00298e9334..2362b6c765747a6ccbe7964417434f32fe2bf04f 100644 --- a/packages/PEGTL/include/tao/pegtl/mmap_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/mmap_input.hpp @@ -42,8 +42,8 @@ namespace TAO_PEGTL_NAMESPACE ~mmap_holder() = default; - void operator=( const mmap_holder& ) = delete; - void operator=( mmap_holder&& ) = delete; + mmap_holder& operator=( const mmap_holder& ) = delete; + mmap_holder& operator=( mmap_holder&& ) = delete; }; } // namespace internal @@ -67,8 +67,8 @@ namespace TAO_PEGTL_NAMESPACE ~mmap_input() = default; - void operator=( const mmap_input& ) = delete; - void operator=( mmap_input&& ) = delete; + mmap_input& operator=( const mmap_input& ) = delete; + mmap_input& operator=( mmap_input&& ) = delete; }; template< typename... Ts > diff --git a/packages/PEGTL/include/tao/pegtl/parse.hpp b/packages/PEGTL/include/tao/pegtl/parse.hpp index 9634655c56791406a8d93101514335dc5b044312..3888fb3fc3ed619423ad4a3af5abaa2510fa9e3c 100644 --- a/packages/PEGTL/include/tao/pegtl/parse.hpp +++ b/packages/PEGTL/include/tao/pegtl/parse.hpp @@ -4,7 +4,7 @@ #ifndef TAO_PEGTL_PARSE_HPP #define TAO_PEGTL_PARSE_HPP -#include <utility> +#include <type_traits> #include "apply_mode.hpp" #include "config.hpp" @@ -16,17 +16,20 @@ namespace TAO_PEGTL_NAMESPACE { - template< typename Rule, - template< typename... > class Action = nothing, - template< typename... > class Control = normal, - apply_mode A = apply_mode::action, - rewind_mode M = rewind_mode::required, - typename ParseInput, - typename... States > - auto parse( ParseInput&& in, States&&... st ) + namespace internal { - return Control< Rule >::template match< A, M, Action, Control >( in, st... ); - } + [[nodiscard]] inline auto get_position( const position& p ) noexcept( std::is_nothrow_copy_constructible_v< position > ) + { + return p; + } + + template< typename ParseInput > + [[nodiscard]] position get_position( const ParseInput& in ) noexcept( noexcept( position( in.position() ) ) ) + { + return in.position(); + } + + } // namespace internal template< typename Rule, template< typename... > class Action = nothing, @@ -35,46 +38,31 @@ namespace TAO_PEGTL_NAMESPACE rewind_mode M = rewind_mode::required, typename ParseInput, typename... States > - auto parse_nested( position op, ParseInput&& in, States&&... st ) + auto parse( ParseInput&& in, States&&... st ) { -#if defined( __cpp_exceptions ) - try { - return parse< Rule, Action, Control, A, M >( in, st... ); - } - catch( parse_error& e ) { - e.add_position( std::move( op ) ); - throw; - } -#else - (void)op; - return parse< Rule, Action, Control, A, M >( in, st... ); -#endif + return Control< Rule >::template match< A, M, Action, Control >( in, st... ); } - // NOTE: The oi.position() in the version below can be expensive for lazy - // inputs, which is why the version below does not simply call the version - // above with said oi.position() as first parameter. - template< typename Rule, template< typename... > class Action = nothing, template< typename... > class Control = normal, apply_mode A = apply_mode::action, rewind_mode M = rewind_mode::required, - typename OuterInput, + typename Outer, typename ParseInput, typename... States > - auto parse_nested( const OuterInput& oi, ParseInput&& in, States&&... st ) + auto parse_nested( const Outer& o, ParseInput&& in, States&&... st ) { #if defined( __cpp_exceptions ) try { return parse< Rule, Action, Control, A, M >( in, st... ); } catch( parse_error& e ) { - e.add_position( oi.position() ); + e.add_position( internal::get_position( o ) ); throw; } #else - (void)oi; + (void)o; return parse< Rule, Action, Control, A, M >( in, st... ); #endif } diff --git a/packages/PEGTL/include/tao/pegtl/parse_error.hpp b/packages/PEGTL/include/tao/pegtl/parse_error.hpp index f471fb4205a64f3fd1e228af750e65f5c7677638..df62ce6811129c929d2bd2edec924c08fa24fc8a 100644 --- a/packages/PEGTL/include/tao/pegtl/parse_error.hpp +++ b/packages/PEGTL/include/tao/pegtl/parse_error.hpp @@ -107,6 +107,11 @@ namespace TAO_PEGTL_NAMESPACE } m_impl->add_position( std::move( p ) ); } + + void add_position( const position& p ) + { + add_position( position( p ) ); + } }; } // namespace TAO_PEGTL_NAMESPACE diff --git a/packages/PEGTL/include/tao/pegtl/read_input.hpp b/packages/PEGTL/include/tao/pegtl/read_input.hpp index 7b748d0930c3feabcd4ac64f60d2d2df47b78c33..019c9cd20b806f28772825a1bd9fa319b4c818a8 100644 --- a/packages/PEGTL/include/tao/pegtl/read_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/read_input.hpp @@ -42,8 +42,8 @@ namespace TAO_PEGTL_NAMESPACE ~read_input() = default; - void operator=( const read_input& ) = delete; - void operator=( read_input&& ) = delete; + read_input& operator=( const read_input& ) = delete; + read_input& operator=( read_input&& ) = delete; }; template< typename... Ts > diff --git a/packages/PEGTL/include/tao/pegtl/string_input.hpp b/packages/PEGTL/include/tao/pegtl/string_input.hpp index 2d83a6334e887b7b429aa420046abb3dbb488f8b..43349eb6f0e76480ff879cba04eb79eecda54152 100644 --- a/packages/PEGTL/include/tao/pegtl/string_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/string_input.hpp @@ -30,8 +30,8 @@ namespace TAO_PEGTL_NAMESPACE ~string_holder() = default; - void operator=( const string_holder& ) = delete; - void operator=( string_holder&& ) = delete; + string_holder& operator=( const string_holder& ) = delete; + string_holder& operator=( string_holder&& ) = delete; }; } // namespace internal @@ -52,8 +52,8 @@ namespace TAO_PEGTL_NAMESPACE ~string_input() = default; - void operator=( const string_input& ) = delete; - void operator=( string_input&& ) = delete; + string_input& operator=( const string_input& ) = delete; + string_input& operator=( string_input&& ) = delete; }; template< typename... Ts > diff --git a/packages/PEGTL/include/tao/pegtl/version.hpp b/packages/PEGTL/include/tao/pegtl/version.hpp index 4d460c536ae80908476227a9aef03d48929d3bea..2ffc6b953b1869b690588655bf26281f84fb68d5 100644 --- a/packages/PEGTL/include/tao/pegtl/version.hpp +++ b/packages/PEGTL/include/tao/pegtl/version.hpp @@ -4,10 +4,10 @@ #ifndef TAO_PEGTL_VERSION_HPP #define TAO_PEGTL_VERSION_HPP -#define TAO_PEGTL_VERSION "3.2.0" +#define TAO_PEGTL_VERSION "3.2.1" #define TAO_PEGTL_VERSION_MAJOR 3 #define TAO_PEGTL_VERSION_MINOR 2 -#define TAO_PEGTL_VERSION_PATCH 0 +#define TAO_PEGTL_VERSION_PATCH 1 #endif diff --git a/packages/PEGTL/src/example/pegtl/CMakeLists.txt b/packages/PEGTL/src/example/pegtl/CMakeLists.txt index 46ffeb0685ed9dc45b9022c90460bb68e0fcc00c..64bb7380dfb645e72c027fb5c7cc511f62ee03a3 100644 --- a/packages/PEGTL/src/example/pegtl/CMakeLists.txt +++ b/packages/PEGTL/src/example/pegtl/CMakeLists.txt @@ -8,6 +8,7 @@ set(example_sources csv1.cpp csv2.cpp dynamic_match.cpp + expression.cpp hello_world.cpp indent_aware.cpp json_analyze.cpp @@ -29,7 +30,6 @@ set(example_sources peg2pegtl.cpp proto3.cpp recover.cpp - skipper.cpp s_expression.cpp sum.cpp symbol_table.cpp diff --git a/packages/PEGTL/src/example/pegtl/expression.cpp b/packages/PEGTL/src/example/pegtl/expression.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5d490f93f70fa150298f700ad48a416d97842ab6 --- /dev/null +++ b/packages/PEGTL/src/example/pegtl/expression.cpp @@ -0,0 +1,613 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#if !defined( __cpp_exceptions ) +#include <iostream> +int main() +{ + std::cerr << "Exception support required, example unavailable." << std::endl; + return 1; +} +#else + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iomanip> +#include <iostream> +#include <stdexcept> +#include <tuple> +#include <variant> +#include <vector> + +#include <tao/pegtl.hpp> + +namespace TAO_PEGTL_NAMESPACE::expression +{ + // Expression parsing with prefix, postfix and infix operators, ternary + // operator and a couple of other special cases supported. + + // The handling of operator precedences with left and right binding power is + // based on https://github.com/matklad/minipratt/blob/master/src/bin/pratt.rs + + // It correctly recognises all operators with their precedence and associativity, + // however is still very much work-in-progress regarding a lot of details... + + // TODO: Fix missing whitespace-skip before infix/postfix operators. + // TODO: Decide whether to use must everywhere or nowhere? + // TODO: Decide whether to suppress actions for sub-rules. + // TODO: Finalise the event-style interface or change to fake actions or actions with ops? + // TODO: Decide on where to use config vs. where to use grammar template parameters. + // TODO: Choose customisation points vs. copy-n-paste customisation. + // TODO: Constexpr-ify where possible with C++20. + + namespace internal + { + struct prefix_info + { + prefix_info( const std::string_view n, const std::uint8_t pbp ) noexcept + : name( n ), + prefix_binding_power( pbp ) + { + assert( pbp ); + } + + std::string name; + + std::uint8_t prefix_binding_power; + }; + + struct infix_postfix_info + { + infix_postfix_info( const std::string_view n, const std::uint8_t lbp, const std::uint8_t rbp = 0 ) noexcept + : infix_postfix_info( n, std::string_view(), lbp, rbp ) + {} + + infix_postfix_info( const std::string_view n, const std::string_view o, const std::uint8_t lbp, const std::uint8_t rbp = 0 ) noexcept + : name( n ), + other( o ), + left_binding_power( lbp ), + right_binding_power( rbp ) + { + if( right_binding_power > 0 ) { + assert( std::min( left_binding_power, right_binding_power ) & 1 ); + assert( 2 * std::min( left_binding_power, right_binding_power ) + 1 == left_binding_power + right_binding_power ); + } + assert( left_binding_power > 0 ); + } + + [[nodiscard]] bool is_infix() const noexcept + { + return right_binding_power != 0; + } + + [[nodiscard]] bool is_postfix() const noexcept + { + return right_binding_power == 0; + } + + std::string name; + std::string other; // Used for the ':' of the ternary operator etc. + + std::uint8_t left_binding_power; + std::uint8_t right_binding_power; + }; + + template< typename ParseInput > + [[nodiscard]] bool match_string_view( ParseInput& in, const std::string_view sv ) + { + if( in.size( sv.size() ) >= sv.size() ) { + if( std::memcmp( in.current(), sv.data(), sv.size() ) == 0 ) { + in.bump( sv.size() ); + return true; + } + } + return false; + } + + template< typename ParseInput, typename OperatorInfo > + [[nodiscard]] const OperatorInfo* match_prefix( ParseInput& in, const std::size_t max_length, const std::vector< OperatorInfo >& ops ) + { + const std::size_t max = std::min( max_length, in.size( max_length ) ); + for( std::string op( in.current(), max ); !op.empty(); op.pop_back() ) { + if( const auto i = std::find_if( ops.begin(), ops.end(), [ = ]( const OperatorInfo& info ) { return info.name == op; } ); i != ops.end() ) { + in.bump( op.size() ); + return &*i; + } + } + return nullptr; + } + + template< typename ParseInput, typename OperatorInfo > + [[nodiscard]] const OperatorInfo* match_infix_postfix( ParseInput& in, const std::size_t max_length, const std::vector< OperatorInfo >& ops, const std::uint8_t min_precedence ) + { + const std::size_t max = std::min( max_length, in.size( max_length ) ); + for( std::string op( in.current(), max ); !op.empty(); op.pop_back() ) { + if( const auto i = std::find_if( ops.begin(), ops.end(), [ = ]( const OperatorInfo& info ) { return info.name == op; } ); ( i != ops.end() ) && ( i->left_binding_power >= min_precedence ) ) { + in.bump( op.size() ); + return &*i; + } + } + return nullptr; + } + + template< typename T > + [[nodiscard]] std::vector< T > sorted_operator_vector( const std::initializer_list< T >& t ) + { + std::vector< T > v{ t }; + const auto less = []( const auto& l, const auto& r ) { return l.name < r.name; }; + std::sort( v.begin(), v.end(), less ); + return v; + } + + struct operator_maps + { + // clang-format off + operator_maps() + : prefix( sorted_operator_vector( { + prefix_info( "!", 80 ), + prefix_info( "+", 80 ), + prefix_info( "-", 80 ), + prefix_info( "~", 80 ), + prefix_info( "*", 80 ), + prefix_info( "&", 80 ), + prefix_info( "++", 80 ), + prefix_info( "--", 80 ) + } ) ), + infix_postfix( sorted_operator_vector( { + infix_postfix_info( "::", 99, 100 ), // Special: Followed by identifier (or template-space-identifer, which we don't support yet). + infix_postfix_info( ".*", 37, 38 ), + infix_postfix_info( "->*", 37, 38 ), + infix_postfix_info( "*", 35, 36 ), + infix_postfix_info( "/", 35, 36 ), + infix_postfix_info( "%", 35, 36 ), + infix_postfix_info( "+", 33, 34 ), + infix_postfix_info( "-", 33, 34 ), + infix_postfix_info( "<<", 31, 32 ), + infix_postfix_info( ">>", 31, 32 ), + infix_postfix_info( "<=>", 29, 30 ), + infix_postfix_info( "<", 27, 28 ), + infix_postfix_info( "<=", 27, 28 ), + infix_postfix_info( ">", 27, 28 ), + infix_postfix_info( ">=", 27, 28 ), + infix_postfix_info( "==", 25, 26 ), + infix_postfix_info( "!=", 25, 26 ), + infix_postfix_info( "&", 23, 24 ), + infix_postfix_info( "^", 21, 22 ), + infix_postfix_info( "|", 19, 20 ), + infix_postfix_info( "&&", 17, 18 ), + infix_postfix_info( "||", 15, 16 ), + infix_postfix_info( "?", ":", 14, 13 ), // Special: Ternary operator. + infix_postfix_info( "=", 12, 11 ), + infix_postfix_info( "+=", 12, 11 ), + infix_postfix_info( "-=", 12, 11 ), + infix_postfix_info( "*=", 12, 11 ), + infix_postfix_info( "/=", 12, 11 ), + infix_postfix_info( "%=", 12, 11 ), + infix_postfix_info( "<<=", 12, 11 ), + infix_postfix_info( ">>=", 12, 11 ), + infix_postfix_info( "&=", 12, 11 ), + infix_postfix_info( "^=", 12, 11 ), + infix_postfix_info( "|=", 12, 11 ), + // infix_postfix_info( ",", 9, 10 ), // TODO: Enable, but forbid in function argument list. + infix_postfix_info( "[", "]", 90 ), // Special: Argument list. + infix_postfix_info( "(", ")", 90 ), // Special: Argument list. + infix_postfix_info( ".", 90 ), // Special: Followed by identifier. + infix_postfix_info( "->", 90 ), // Special: Followed by identifier. + infix_postfix_info( "++", 90 ), + infix_postfix_info( "--", 90 ) + } ) ), + max_prefix_length( std::max_element( prefix.begin(), prefix.end(), []( const auto& l, const auto& r ) { return l.name.size() < r.name.size(); } )->name.size() ), + max_infix_postfix_length( std::max_element( infix_postfix.begin(), infix_postfix.end(), []( const auto& l, const auto& r ) { return l.name.size() < r.name.size(); } )->name.size() ) + { + // These are C++20 operators with the correct associativity and relative precedence, however some are still missing: + // TODO: Compound literal (C99), _Alignof (C11), Functional cast, sizeof, co_await, co_yield, throw, new, new[], delete, delete[], C-style casts. + } + // clang-format on + + const std::vector< prefix_info > prefix; + const std::vector< infix_postfix_info > infix_postfix; + + const std::size_t max_prefix_length; + const std::size_t max_infix_postfix_length; + }; + + struct string_view_rule + { + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput > + [[nodiscard]] static bool match( ParseInput& in, const std::string_view sv ) noexcept( noexcept( match_string_view( in, sv ) ) ) + { + return match_string_view( in, sv ); + } + }; + + struct comment + : seq< one< '#' >, until< eolf > > + {}; + + struct ignored + : sor< space, comment > + {}; + + template< typename Literal, typename Identifier > + struct expression; + + template< typename Literal, typename Identifier > + struct bracket_expression + { + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result, + typename Config > + [[nodiscard]] static bool match( ParseInput& in, Result& res, const Config& cfg, const std::uint8_t /*unused*/ ) + { + return Control< if_must< one< '(' >, star< ignored >, expression< Literal, Identifier >, star< ignored >, one< ')' > > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + } + }; + + template< typename Literal, typename Identifier > + struct prefix_expression + { + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result, + typename Config > + [[nodiscard]] static bool match( ParseInput& in, Result& res, const Config& cfg, const std::uint8_t /*unused*/ ) + { + if( const auto* info = match_prefix( in, cfg.max_prefix_length, cfg.prefix ) ) { + (void)Control< must< star< ignored >, expression< Literal, Identifier > > >::template match< A, M, Action, Control >( in, res, cfg, info->prefix_binding_power ); + if constexpr( A == apply_mode::action ) { + res.prefix( info->name ); + } + return true; + } + return false; + } + }; + + template< typename Literal, typename Identifier > + struct infix_postfix_expression + { + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result, + typename Config > + [[nodiscard]] static bool match( ParseInput& in, Result& res, const Config& cfg, const std::uint8_t min ) + { + if( const auto* info = match_infix_postfix( in, cfg.max_infix_postfix_length, cfg.infix_postfix, min ) ) { + if( info->name == "?" ) { + (void)Control< must< star< ignored >, expression< Literal, Identifier > > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + (void)Control< must< star< ignored >, string_view_rule > >::template match< A, M, Action, Control >( in, info->other ); + (void)Control< must< star< ignored >, expression< Literal, Identifier > > >::template match< A, M, Action, Control >( in, res, cfg, info->right_binding_power ); + if constexpr( A == apply_mode::action ) { + res.ternary( info->name, info->other ); + } + return true; + } + if( ( info->name == "." ) || ( info->name == "::" ) || ( info->name == "->" ) ) { + (void)Control< must< star< ignored >, Identifier > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + if constexpr( A == apply_mode::action ) { + res.infix( info->name ); + } + return true; + } + if( ( info->name == "(" ) || ( info->name == "[" ) ) { + const std::size_t size = res.term_stack.size(); // TODO: Determine number of arguments without relying on res!? + (void)Control< must< star< ignored >, opt< list_must< expression< Literal, Identifier >, one< ',' >, ignored > > > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + (void)Control< must< star< ignored >, string_view_rule > >::template match< A, M, Action, Control >( in, info->other ); + if constexpr( A == apply_mode::action ) { + res.call( info->name, info->other, res.term_stack.size() - size ); + } + return true; + } + if( info->is_infix() ) { + (void)Control< must< star< ignored >, expression< Literal, Identifier > > >::template match< A, M, Action, Control >( in, res, cfg, info->right_binding_power ); + if constexpr( A == apply_mode::action ) { + res.infix( info->name ); + } + return true; + } + if( info->is_postfix() ) { + if constexpr( A == apply_mode::action ) { + res.postfix( info->name ); + } + return true; + } + } + return false; + } + }; + + template< typename Literal, typename Identifier > + struct first_expression + : sor< Literal, Identifier, bracket_expression< Literal, Identifier >, prefix_expression< Literal, Identifier > > + {}; + + template< typename Literal, typename Identifier > + struct expression + : seq< first_expression< Literal, Identifier >, star< infix_postfix_expression< Literal, Identifier > > > + {}; + + } // namespace internal + + template< typename Literal, typename Identifier > + struct grammar + { + using rule_t = grammar; + using subs_t = type_list< internal::expression< Literal, Identifier > >; + + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result > + [[nodiscard]] static bool match( ParseInput& in, Result& res ) + { + const internal::operator_maps cfg; + return match< A, M, Action, Control >( in, res, cfg ); + } + + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result, + typename Config > + [[nodiscard]] static bool match( ParseInput& in, Result& res, const Config& cfg ) + { + return Control< internal::expression< Literal, Identifier > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + } + }; + +} // namespace TAO_PEGTL_NAMESPACE::expression + +namespace application +{ + namespace pegtl = TAO_PEGTL_NAMESPACE; + + struct term_t; + + using tuple_t = std::tuple< std::string, std::vector< term_t > >; + using variant_t = std::variant< std::int64_t, std::string, tuple_t >; + + struct term_t + { + explicit term_t( const std::int64_t l ) noexcept + : variant( l ) + {} + + explicit term_t( std::string&& s ) noexcept + : variant( std::move( s ) ) + {} + + explicit term_t( variant_t&& v ) noexcept + : variant( std::move( v ) ) + {} + + variant_t variant; + }; + + [[nodiscard]] inline std::string operator+( const char* l, const std::string_view r ) + { + return std::string( l ) + " '" + std::string( r ) + "'"; + } + + struct result + { + void infix( const std::string_view op ) + { + assert( term_stack.size() >= 2 ); + { + variant_t tmp = tuple_t( "infix" + op, { std::move( term_stack.at( term_stack.size() - 2 ) ), std::move( term_stack.at( term_stack.size() - 1 ) ) } ); + term_stack.pop_back(); + term_stack.back().variant = std::move( tmp ); + } + assert( string_stack.size() >= 2 ); + { + std::string tmp = "( " + string_stack.at( string_stack.size() - 2 ) + " " + std::string( op ) + " " + string_stack.at( string_stack.size() - 1 ) + " )"; + string_stack.pop_back(); + string_stack.back() = std::move( tmp ); + } + } + + void prefix( const std::string_view op ) + { + assert( term_stack.size() >= 1 ); // NOLINT(readability-container-size-empty) + { + variant_t tmp = tuple_t( "prefix" + op, { std::move( term_stack.at( term_stack.size() - 1 ) ) } ); + term_stack.back().variant = std::move( tmp ); + } + assert( string_stack.size() >= 1 ); // NOLINT(readability-container-size-empty) + { + std::string tmp = std::string( op ) + "( " + string_stack.at( string_stack.size() - 1 ) + " )"; + string_stack.back() = std::move( tmp ); + } + } + + void postfix( const std::string_view op ) + { + assert( term_stack.size() >= 1 ); // NOLINT(readability-container-size-empty) + { + variant_t tmp = tuple_t( "postfix" + op, { std::move( term_stack.at( term_stack.size() - 1 ) ) } ); + term_stack.back().variant = std::move( tmp ); + } + assert( string_stack.size() >= 1 ); // NOLINT(readability-container-size-empty) + { + std::string tmp = "( " + string_stack.at( string_stack.size() - 1 ) + " )" + std::string( op ); + string_stack.back() = std::move( tmp ); + } + } + + void ternary( const std::string_view op, const std::string_view o2 ) + { + assert( term_stack.size() >= 2 ); + { + variant_t tmp = tuple_t( "ternary", { std::move( term_stack.at( term_stack.size() - 3 ) ), std::move( term_stack.at( term_stack.size() - 2 ) ), std::move( term_stack.at( term_stack.size() - 1 ) ) } ); + term_stack.pop_back(); + term_stack.pop_back(); + term_stack.back().variant = std::move( tmp ); + } + assert( string_stack.size() >= 2 ); + { + std::string tmp = "( " + string_stack.at( string_stack.size() - 3 ) + " " + std::string( op ) + " " + string_stack.at( string_stack.size() - 2 ) + " " + std::string( o2 ) + " " + string_stack.at( string_stack.size() - 1 ) + " )"; + string_stack.pop_back(); + string_stack.pop_back(); + string_stack.back() = std::move( tmp ); + } + } + + void call( const std::string_view op, const std::string_view o2, const std::size_t args ) + { + assert( term_stack.size() > args ); + { + variant_t tmp = tuple_t( "call '" + std::string( op ) + std::string( o2 ) + "'", std::vector< term_t >( term_stack.end() - args - 1, term_stack.end() ) ); + for( std::size_t i = 0; i < args; ++i ) { + term_stack.pop_back(); + } + term_stack.back().variant = ( std::move( tmp ) ); + } + assert( string_stack.size() > args ); + { + std::string tmp = *( string_stack.end() - args - 1 ) + std::string( op ) + " "; + for( std::size_t i = 0; i < args; ++i ) { + if( i > 0 ) { + tmp += ", "; + } + tmp += *( string_stack.end() - args + i ); + } + tmp += " " + std::string( o2 ); + string_stack.resize( string_stack.size() - args ); + string_stack.back() = std::move( tmp ); + } + } + + void number( const std::int64_t l ) + { + term_stack.emplace_back( l ); + string_stack.emplace_back( std::to_string( l ) ); + } + + void identifier( const std::string& id ) + { + term_stack.emplace_back( id ); + string_stack.emplace_back( id ); + } + + std::vector< term_t > term_stack; + std::vector< std::string > string_stack; + }; + + inline std::ostream& operator<<( std::ostream& o, const term_t& t ); + + inline std::ostream& operator<<( std::ostream& o, const tuple_t& t ) + { + o << "{ " << std::get< 0 >( t ); + for( const auto& res : std::get< 1 >( t ) ) { + o << " " << res; + } + o << " }"; + return o; + } + + inline std::ostream& operator<<( std::ostream& o, const variant_t& v ) + { + std::visit( [ & ]( const auto& t ) { o << t; }, v ); + return o; + } + + inline std::ostream& operator<<( std::ostream& o, const term_t& t ) + { + o << t.variant; + return o; + } + + struct literal + : pegtl::plus< pegtl::digit > + {}; + + struct grammar + : pegtl::must< pegtl::expression::grammar< literal, pegtl::identifier >, pegtl::eof > + {}; + + template< typename Rule > + struct action + : pegtl::nothing< Rule > + {}; + + template<> + struct action< literal > + { + template< typename Input, typename... States > + static void apply( const Input& in, result& res, States&&... /*unused*/ ) + { + res.number( std::stoll( in.string() ) ); + } + }; + + template<> + struct action< pegtl::identifier > + { + template< typename Input, typename... States > + static void apply( const Input& in, result& res, States&&... /*unused*/ ) + { + res.identifier( in.string() ); + } + }; + +} // namespace application + +int main( int argc, char** argv ) +{ + // if( TAO_PEGTL_NAMESPACE::analyze< application::grammar >() != 0 ) { + // return 1; + // } + for( int i = 1; i < argc; ++i ) { + TAO_PEGTL_NAMESPACE::argv_input in( argv, i ); + try { + application::result res; + TAO_PEGTL_NAMESPACE::parse< application::grammar, application::action >( in, res ); + std::cout << "Input: " << argv[ i ] << std::endl; + assert( res.term_stack.size() == 1 ); + assert( res.string_stack.size() == 1 ); + std::cout << "Result: " << res.string_stack.at( 0 ) << std::endl; + std::cout << "Result: " << res.term_stack.at( 0 ) << std::endl; + } + catch( const TAO_PEGTL_NAMESPACE::parse_error& e ) { + const auto p = e.positions().front(); + std::cerr << e.what() << '\n' + << in.line_at( p ) << '\n' + << std::setw( p.column ) << '^' << '\n'; + } + } + return 0; +} + +#endif diff --git a/packages/PEGTL/src/example/pegtl/json_classes.hpp b/packages/PEGTL/src/example/pegtl/json_classes.hpp index c9f26de16fc3945cea12f3dcd780e89e47f5c72a..3e69515b0e35066c3c6b6f141a1a0c91e85a7f11 100644 --- a/packages/PEGTL/src/example/pegtl/json_classes.hpp +++ b/packages/PEGTL/src/example/pegtl/json_classes.hpp @@ -35,8 +35,8 @@ namespace example json_base( const json_base& ) = delete; json_base( json_base&& ) = delete; - void operator=( const json_base& ) = delete; - void operator=( json_base&& ) = delete; + json_base& operator=( const json_base& ) = delete; + json_base& operator=( json_base&& ) = delete; virtual void stream( std::ostream& ) const = 0; diff --git a/packages/PEGTL/src/example/pegtl/json_parse.cpp b/packages/PEGTL/src/example/pegtl/json_parse.cpp index f3c572d12e984765fac7715e966c20a13dfd4248..240a8898c38744eff056e5cd885a4464e93450de 100644 --- a/packages/PEGTL/src/example/pegtl/json_parse.cpp +++ b/packages/PEGTL/src/example/pegtl/json_parse.cpp @@ -6,6 +6,7 @@ #include <tao/pegtl.hpp> #include <tao/pegtl/contrib/json.hpp> +#include <tao/pegtl/contrib/limit_depth.hpp> #include <tao/pegtl/contrib/trace.hpp> #include "json_errors.hpp" @@ -16,6 +17,15 @@ namespace example { using grammar = pegtl::seq< pegtl::json::text, pegtl::eof >; + template< typename > + struct action + {}; + + template<> + struct action< pegtl::json::value > + : pegtl::limit_depth< 42 > + {}; + } // namespace example int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) @@ -30,7 +40,7 @@ int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) pegtl::argv_input in( argv, 1 ); #if defined( __cpp_exceptions ) try { - pegtl::parse< example::grammar, pegtl::nothing, example::control >( in ); + pegtl::parse< example::grammar, example::action, example::control >( in ); } catch( const pegtl::parse_error& e ) { const auto p = e.positions().front(); @@ -40,7 +50,7 @@ int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) return 1; } #else - if( !pegtl::parse< example::grammar, pegtl::nothing, example::control >( in ) ) { + if( !pegtl::parse< example::grammar, example::action, example::control >( in ) ) { std::cerr << "error occurred" << std::endl; return 1; } diff --git a/packages/PEGTL/src/example/pegtl/skipper.cpp b/packages/PEGTL/src/example/pegtl/skipper.cpp deleted file mode 100644 index fd11200bbd16442ff4d90ddd815bc8664b625fb4..0000000000000000000000000000000000000000 --- a/packages/PEGTL/src/example/pegtl/skipper.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey -// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ - -#include <iostream> -#include <string> - -#include <tao/pegtl.hpp> -#include <tao/pegtl/contrib/skip.hpp> - -namespace pegtl = TAO_PEGTL_NAMESPACE; - -namespace demo -{ - // define your grammar without concerning yourself with skipping whitespace - - // clang-format off - struct key : pegtl::identifier {}; - struct value : pegtl::identifier {}; - struct assign : pegtl::one< '=' > {}; - struct grammar : pegtl::seq< key, assign, value, pegtl::eof > {}; - // clang-format on - - // define your actions as usual - - template< typename Rule > - struct action - {}; - - template<> - struct action< key > - { - template< typename ActionInput > - static void apply( const ActionInput& in ) - { - std::cout << "key: '" << in.string() << "'\n"; - } - }; - - template<> - struct action< value > - { - template< typename ActionInput > - static void apply( const ActionInput& in ) - { - std::cout << "value: '" << in.string() << "'\n"; - } - }; - - // now specify how and where to skip whitespace - - using skip_how = pegtl::star< pegtl::sor< pegtl::space, pegtl::eol > >; - - // clang-format off - template< typename > inline constexpr bool where = false; - template<> inline constexpr bool where< key > = true; - template<> inline constexpr bool where< value > = true; - // clang-format on - - // as 'skip<>' can not take 'where' as a template parameter directly, we need to wrap it. - struct skip_where - { - // when to skip - template< typename Rule > - static constexpr auto value = where< Rule >; - }; - - template< typename Rule > - using control = pegtl::skip< skip_how, skip_where >::control< Rule >; - - // The above is a first step, the helper (skip<>) may change in the future - // to allow separate skipping before and after and also support different skip - // rules for each rule/set-of-rules. Also, a more convenient way to specify - // the whole thing. - -} // namespace demo - -int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) -{ - if( argc > 1 ) { - pegtl::argv_input in( argv, 1 ); - if( pegtl::parse< demo::grammar, demo::action, demo::control >( in ) ) { - std::cout << "success!" << std::endl; - } - else { - std::cerr << "failure." << std::endl; - } - } -} diff --git a/packages/PEGTL/src/example/pegtl/token_input.cpp b/packages/PEGTL/src/example/pegtl/token_input.cpp index 6f03dad7ec91668e27c9fe9b3157a9d21f0cdf95..7d2795a3fb9bf453bbb3aa222ab2185d2a2b9517 100644 --- a/packages/PEGTL/src/example/pegtl/token_input.cpp +++ b/packages/PEGTL/src/example/pegtl/token_input.cpp @@ -35,8 +35,8 @@ namespace TAO_PEGTL_NAMESPACE ~token_action_input() = default; - token_action_input operator=( const token_action_input& ) = delete; - token_action_input operator=( token_action_input&& ) = delete; + token_action_input& operator=( const token_action_input& ) = delete; + token_action_input& operator=( token_action_input&& ) = delete; [[nodiscard]] const iterator_t& iterator() const noexcept { @@ -101,8 +101,8 @@ namespace TAO_PEGTL_NAMESPACE ~token_parse_input() = default; - token_parse_input operator=( const token_parse_input& ) = delete; - token_parse_input operator=( token_parse_input&& ) = delete; + token_parse_input& operator=( const token_parse_input& ) = delete; + token_parse_input& operator=( token_parse_input&& ) = delete; void discard() const noexcept {} diff --git a/packages/PEGTL/src/test/pegtl/CMakeLists.txt b/packages/PEGTL/src/test/pegtl/CMakeLists.txt index 889b0520af98201aeeead2caaeef175fa0503578..6bfacb4a72caec4e1dd08bdaade1d934e3bac9e0 100644 --- a/packages/PEGTL/src/test/pegtl/CMakeLists.txt +++ b/packages/PEGTL/src/test/pegtl/CMakeLists.txt @@ -23,6 +23,7 @@ set(test_sources change_action_and_states.cpp change_state.cpp change_states.cpp + check_bytes.cpp contains.cpp contrib_alphabet.cpp contrib_analyze.cpp @@ -65,6 +66,8 @@ set(test_sources internal_endian.cpp internal_file_mapper.cpp internal_file_opener.cpp + limit_bytes.cpp + limit_depth.cpp parse_error.cpp pegtl_string_t.cpp position.cpp diff --git a/packages/PEGTL/src/test/pegtl/check_bytes.cpp b/packages/PEGTL/src/test/pegtl/check_bytes.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b773abed9b0d168bc4502b779617a38a5a3051b0 --- /dev/null +++ b/packages/PEGTL/src/test/pegtl/check_bytes.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#include <tao/pegtl/contrib/check_bytes.hpp> + +#include "test.hpp" + +namespace TAO_PEGTL_NAMESPACE +{ + struct test_rule + : star< alpha > + {}; + + struct test_grammar + : seq< test_rule, eof > + {}; + + template< typename Rule > + struct test_action + : nothing< Rule > + {}; + + template<> + struct test_action< test_rule > + : check_bytes< 5 > + {}; + + void unit_test() + { + memory_input<> i1( "aaa", __FUNCTION__ ); + const auto r1 = pegtl::parse< test_grammar >( i1 ); + TAO_PEGTL_TEST_ASSERT( r1 ); + + memory_input<> i2( "aaaaaaaaaaa", __FUNCTION__ ); + const auto r2 = pegtl::parse< test_grammar >( i2 ); + TAO_PEGTL_TEST_ASSERT( r2 ); + + memory_input<> i3( "aaa", __FUNCTION__ ); + const auto r3 = pegtl::parse< test_grammar, test_action >( i3 ); + TAO_PEGTL_TEST_ASSERT( r3 ); + +#if defined( __cpp_exceptions ) + memory_input<> i4( "aaaaaaaaaaa", __FUNCTION__ ); + TAO_PEGTL_TEST_THROWS( pegtl::parse< test_grammar, test_action >( i4 ) ); +#endif + } + +} // namespace TAO_PEGTL_NAMESPACE + +#include "main.hpp" diff --git a/packages/PEGTL/src/test/pegtl/contrib_instantiate.cpp b/packages/PEGTL/src/test/pegtl/contrib_instantiate.cpp index 5c6c268cb6ee4b8bbc27cde7eb2ce779459875c8..fc85dec752d01633ddf3bf9f45642d84b4c896ea 100644 --- a/packages/PEGTL/src/test/pegtl/contrib_instantiate.cpp +++ b/packages/PEGTL/src/test/pegtl/contrib_instantiate.cpp @@ -32,8 +32,8 @@ namespace TAO_PEGTL_NAMESPACE dtor = true; } - void operator=( test_class&& ) = delete; - void operator=( const test_class& ) = delete; + test_class& operator=( test_class&& ) = delete; + test_class& operator=( const test_class& ) = delete; }; using test_grammar = sor< alpha, digit >; diff --git a/packages/PEGTL/src/test/pegtl/limit_bytes.cpp b/packages/PEGTL/src/test/pegtl/limit_bytes.cpp new file mode 100644 index 0000000000000000000000000000000000000000..94e66c3f69bebc1748b9d86a322ee5518012492c --- /dev/null +++ b/packages/PEGTL/src/test/pegtl/limit_bytes.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#include <tao/pegtl/contrib/limit_bytes.hpp> + +#include "test.hpp" + +namespace TAO_PEGTL_NAMESPACE +{ + struct test_rule + : star< alpha > + {}; + + struct test_grammar + : seq< test_rule, eof > + {}; + + template< typename Rule > + struct test_action + : nothing< Rule > + {}; + + template<> + struct test_action< test_rule > + : limit_bytes< 5 > + {}; + + void unit_test() + { + memory_input<> i1( "aaa", __FUNCTION__ ); + const auto r1 = pegtl::parse< test_grammar >( i1 ); + TAO_PEGTL_TEST_ASSERT( r1 ); + + memory_input<> i2( "aaaaaaaaaaa", __FUNCTION__ ); + const auto r2 = pegtl::parse< test_grammar >( i2 ); + TAO_PEGTL_TEST_ASSERT( r2 ); + + memory_input<> i3( "aaa", __FUNCTION__ ); + const auto r3 = pegtl::parse< test_grammar, test_action >( i3 ); + TAO_PEGTL_TEST_ASSERT( r3 ); + +#if defined( __cpp_exceptions ) + memory_input<> i4( "aaaaaaaaaaa", __FUNCTION__ ); + TAO_PEGTL_TEST_THROWS( pegtl::parse< test_grammar, test_action >( i4 ) ); +#endif + } + +} // namespace TAO_PEGTL_NAMESPACE + +#include "main.hpp" diff --git a/packages/PEGTL/src/test/pegtl/limit_depth.cpp b/packages/PEGTL/src/test/pegtl/limit_depth.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8fb1b2713c3091278d716a4857a9f34e3e1f2fe4 --- /dev/null +++ b/packages/PEGTL/src/test/pegtl/limit_depth.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#include <tao/pegtl/contrib/limit_depth.hpp> + +#include "test.hpp" + +namespace TAO_PEGTL_NAMESPACE +{ + struct test_recursive + : seq< alpha, opt< test_recursive > > + {}; + + struct test_grammar + : seq< test_recursive, eof > + {}; + + template< typename Rule > + struct test_action + : nothing< Rule > + {}; + + template<> + struct test_action< test_recursive > + : limit_depth< 5 > + {}; + + void unit_test() + { + memory_input<> i1( "aaa", __FUNCTION__ ); + const auto r1 = pegtl::parse< test_grammar >( i1 ); + TAO_PEGTL_TEST_ASSERT( r1 ); + + memory_input<> i2( "aaaaaaaaaaa", __FUNCTION__ ); + const auto r2 = pegtl::parse< test_grammar >( i2 ); + TAO_PEGTL_TEST_ASSERT( r2 ); + + memory_input<> i3( "aaa", __FUNCTION__ ); + const auto r3 = pegtl::parse< test_grammar, test_action >( i3 ); + TAO_PEGTL_TEST_ASSERT( r3 ); + +#if defined( __cpp_exceptions ) + memory_input<> i4( "aaaaaaaaaaa", __FUNCTION__ ); + TAO_PEGTL_TEST_THROWS( pegtl::parse< test_grammar, test_action >( i4 ) ); +#endif + } + +} // namespace TAO_PEGTL_NAMESPACE + +#include "main.hpp" diff --git a/packages/kokkos/.github/workflows/cancelling.yml b/packages/kokkos/.github/workflows/cancelling.yml new file mode 100644 index 0000000000000000000000000000000000000000..fa30adf956e1c272c1b8d29d131f225b1ff94919 --- /dev/null +++ b/packages/kokkos/.github/workflows/cancelling.yml @@ -0,0 +1,20 @@ +name: cancel-builds-on-update +on: + workflow_run: + workflows: ['github-Linux', 'github-OSX'] + types: ['requested'] + +jobs: + cancel-duplicate-workflow-runs: + name: "Cancel duplicate workflow runs" + runs-on: ubuntu-latest + steps: + - uses: potiuk/cancel-workflow-runs@master + name: "Cancel duplicate workflow runs" + with: + cancelMode: duplicates + cancelFutureDuplicates: true + token: ${{ secrets.GITHUB_TOKEN }} + sourceRunId: ${{ github.event.workflow_run.id }} + notifyPRCancel: true + skipEventTypes: '["push", "schedule"]' diff --git a/packages/kokkos/.github/workflows/continuous-integration-workflow.yml b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml new file mode 100644 index 0000000000000000000000000000000000000000..0e5f523ccf77014b18a034659b450f7036901747 --- /dev/null +++ b/packages/kokkos/.github/workflows/continuous-integration-workflow.yml @@ -0,0 +1,72 @@ +name: github-Linux +on: [push, pull_request] + +jobs: + CI: + continue-on-error: true + strategy: + matrix: + distro: ['fedora:latest', 'fedora:rawhide', 'ubuntu:latest'] + cxx: ['g++', 'clang++'] + cmake_build_type: ['Release', 'Debug'] + openmp: ['ON'] + include: + - distro: 'fedora:intel' + cxx: 'icpc' + cmake_build_type: 'Release' + openmp: 'ON' + - distro: 'fedora:intel' + cxx: 'icpc' + cmake_build_type: 'Debug' + openmp: 'ON' + - distro: 'fedora:intel-oneapi' + cxx: 'icpx' + cmake_build_type: 'Release' + openmp: 'ON' + - distro: 'fedora:intel-oneapi' + cxx: 'icpx' + cmake_build_type: 'Debug' + openmp: 'ON' + runs-on: ubuntu-latest + container: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} + steps: + - name: Checkout code + uses: actions/checkout@v2.2.0 + - uses: actions/cache@v2 + with: + path: ~/.ccache + key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${github.ref}-${{ github.sha }} + restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{github.ref}} + - name: Get trial license + if: ${{ matrix.cxx == 'icpc' }} + run: | + mkdir ~/Licenses + curl https://dynamicinstaller.intel.com/api/v2/license > ~/Licenses/intel.lic + - name: maybe_disable_death_tests + if: ${{ matrix.distro == 'fedora:rawhide' }} + run: echo "GTEST_FILTER=-*DeathTest*" >> $GITHUB_ENV + - name: build-and-test + run: | + ccache -z + cmake \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DKokkos_ENABLE_HWLOC=ON \ + -DKokkos_ENABLE_OPENMP=${{ matrix.openmp }} \ + -DKokkos_ENABLE_TESTS=ON \ + -DKokkos_ENABLE_EXAMPLES=ON \ + -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ + -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} \ + -DBUILD_NAME=${{ matrix.distro }}-${{ matrix.cxx }} \ + -DBUILD_JOBS=2 -DBINARY_DIR=builddir -DSITE=GitHub-Linux \ + -P cmake/KokkosCI.cmake + ccache -s + - name: Test DESTDIR Install + run: DESTDIR=${PWD}/install cmake --build builddir --target install && rm -rf ${PWD}/install/usr && rmdir ${PWD}/install + - name: Install + run: sudo cmake --build builddir --target install + - name: Test install + working-directory: example/build_cmake_installed + run: | + cmake -B builddir -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} + cmake --build builddir + cmake --build builddir --target test diff --git a/packages/kokkos/.github/workflows/osx.yml b/packages/kokkos/.github/workflows/osx.yml new file mode 100644 index 0000000000000000000000000000000000000000..855b557c829a609f34b82c7e5f307eef60cf0ede --- /dev/null +++ b/packages/kokkos/.github/workflows/osx.yml @@ -0,0 +1,35 @@ +name: github-OSX + +on: [push, pull_request] + +jobs: + osxci: + name: osx-ci + runs-on: [macos-latest] + + strategy: + matrix: + include: + - backend: "SERIAL" + cmake_build_type: "RelWithDebInfo" + - backend: "PTHREAD" + cmake_build_type: "RelWithDebInfo" + - backend: "SERIAL" + cmake_build_type: "Debug" + - backend: "SERIAL" + cmake_build_type: "Release" + + steps: + - uses: actions/checkout@v2 + - name: build-and-test + run: + cmake + -DKokkos_ENABLE_${{ matrix.backend }}=On + -DCMAKE_CXX_FLAGS="-Werror" + -DCMAKE_CXX_STANDARD=14 + -DKokkos_ENABLE_COMPILER_WARNINGS=ON + -DKokkos_ENABLE_TESTS=On + -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} + -DBUILD_NAME=macOS-${{ matrix.backend }} + -DTARGET=install -DBUILD_JOBS=2 -DSITE=GitHub-OSX + -P cmake/KokkosCI.cmake diff --git a/packages/kokkos/.gitignore b/packages/kokkos/.gitignore index ec6f3487c9f83de8de9977890352fb9ca702255b..eb2257762bdbc1a0536bb04ef935d94387a5578d 100644 --- a/packages/kokkos/.gitignore +++ b/packages/kokkos/.gitignore @@ -12,3 +12,12 @@ testing/ /out/build /CMakeSettings.json /out/mytest + +# build directories in source tree +/build* + +# IDE-specific files/folders +## VSCode +/.vscode +## QtCreator +/CMakeLists.txt.user* diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo index deecc77edad38b350f7bf85fc5514fde2e1893ad..6dd4101e5bdf1210d26ef2ff0a34f557416c532b 100644 --- a/packages/kokkos/.gitrepo +++ b/packages/kokkos/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:kokkos/kokkos.git branch = master - commit = 1fb0c284d458c75370094921d9f202c287502325 - parent = 55da1f845ac4f9ea049f2d6a97c7edef95a887ab - cmdver = 0.4.3 + commit = 4b97a22ff7be7635116930bb97173058d6079202 + parent = f2fc77ba9037b2a2032ab980fb445175441f6d1f method = merge + cmdver = 0.4.3 diff --git a/packages/kokkos/.jenkins b/packages/kokkos/.jenkins index 889abe33f8305a9f3053079f2dffac4be7abd28e..001171d648e7cfb2236d17439720562707faaab4 100644 --- a/packages/kokkos/.jenkins +++ b/packages/kokkos/.jenkins @@ -5,6 +5,8 @@ pipeline { CCACHE_DIR = '/tmp/ccache' CCACHE_MAXSIZE = '10G' CCACHE_CPP2 = 'true' + BUILD_JOBS = 8 + SITE = 'Jenkins' } stages { stage('Clang-Format') { @@ -28,25 +30,27 @@ pipeline { dockerfile { filename 'Dockerfile.sycl' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=intel/oneapi-basekit:devel-ubuntu18.04' - label 'docker' + label 'nvidia-docker && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache' } } steps { sh 'ccache --zero-stats' - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''rm -rf build && \ cmake \ - -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_CXX_FLAGS="-Werror" \ + -DCMAKE_CXX_FLAGS="-Werror -Wno-unknown-cuda-version -Wno-gnu-zero-variadic-macro-arguments" \ + -DKokkos_ARCH_VOLTA70=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_SYCL=ON \ + -DKokkos_ENABLE_UNSUPPORTED_ARCHS=ON \ -DCMAKE_CXX_STANDARD=17 \ - .. && \ - make -j8 && ctest --verbose''' + -DBUILD_NAME=${STAGE_NAME} \ + -P cmake/KokkosCI.cmake''' } post { always { @@ -68,11 +72,12 @@ pipeline { OMP_NUM_THREADS = 8 OMP_PLACES = 'threads' OMP_PROC_BIND = 'spread' + LC_ALL = 'C' } steps { sh 'ccache --zero-stats' sh 'echo "/opt/rocm/llvm/lib" > /etc/ld.so.conf.d/llvm.conf && ldconfig' - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''rm -rf build && \ cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_COMPILER=hipcc \ @@ -83,8 +88,8 @@ pipeline { -DKokkos_ENABLE_HIP=ON \ -DKokkos_ARCH_VEGA906=ON \ -DKokkos_ENABLE_OPENMP=ON \ - .. && \ - make -j8 && ctest --verbose''' + -DBUILD_NAME=${STAGE_NAME} \ + -P cmake/KokkosCI.cmake''' } post { always { @@ -102,9 +107,12 @@ pipeline { args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES' } } + environment { + LC_ALL = 'C' + } steps { sh 'ccache --zero-stats' - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''rm -rf build && \ cmake \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=hipcc \ @@ -114,8 +122,8 @@ pipeline { -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_HIP=ON \ -DKokkos_ARCH_VEGA906=ON \ - .. && \ - make -j8 && ctest --verbose''' + -DBUILD_NAME=${STAGE_NAME} \ + -P cmake/KokkosCI.cmake''' } post { always { @@ -134,19 +142,19 @@ pipeline { } steps { sh 'ccache --zero-stats' - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''rm -rf build && \ cmake \ - -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_CXX_FLAGS="-Wno-unknown-cuda-version -Werror" \ + -DCMAKE_CXX_FLAGS="-Wno-unknown-cuda-version -Werror -Wno-undefined-internal -Wno-pass-failed" \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_TUNING=ON \ -DKokkos_ENABLE_OPENMPTARGET=ON \ -DKokkos_ARCH_VOLTA70=ON \ -DCMAKE_CXX_STANDARD=17 \ - .. && \ - make -j8 && ctest --verbose''' + -DBUILD_NAME=${STAGE_NAME} \ + -P cmake/KokkosCI.cmake''' } post { always { @@ -165,7 +173,7 @@ pipeline { } steps { sh 'ccache --zero-stats' - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''rm -rf build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_CLANG_TIDY="clang-tidy;-warnings-as-errors=*" \ @@ -179,8 +187,8 @@ pipeline { -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_TUNING=ON \ -DKokkos_ARCH_VOLTA70=ON \ - .. && \ - make -j8 && ctest --verbose''' + -DBUILD_NAME=${STAGE_NAME} \ + -P cmake/KokkosCI.cmake''' } post { always { @@ -222,7 +230,7 @@ pipeline { dockerfile { filename 'Dockerfile.nvcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.0-devel --build-arg ADDITIONAL_PACKAGES="g++-8 gfortran" --build-arg CMAKE_VERSION=3.17.3' + additionalBuildArgs '--build-arg BASE=nvidia/cuda:11.0-devel --build-arg ADDITIONAL_PACKAGES="g++-8 gfortran clang" --build-arg CMAKE_VERSION=3.17.3' label 'nvidia-docker && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } @@ -236,7 +244,7 @@ pipeline { steps { sh 'ccache --zero-stats' sh '''rm -rf install && mkdir -p install && \ - rm -rf build && mkdir -p build && cd build && \ + rm -rf build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER=g++-8 \ @@ -248,11 +256,10 @@ pipeline { -DKokkos_ENABLE_CUDA_LAMBDA=OFF \ -DKokkos_ENABLE_CUDA_UVM=ON \ -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \ - -DKokkos_ARCH_VOLTA70=ON \ - -DCMAKE_INSTALL_PREFIX=${PWD}/../install \ - .. && \ - make -j8 install && \ - cd .. && \ + -DCMAKE_INSTALL_PREFIX=${PWD}/install \ + -DBUILD_NAME=${STAGE_NAME} \ + -DTARGET=install \ + -P cmake/KokkosCI.cmake && \ rm -rf build-tests && mkdir -p build-tests && cd build-tests && \ export CMAKE_PREFIX_PATH=${PWD}/../install && \ cmake \ @@ -271,7 +278,11 @@ pipeline { -DCMAKE_CXX_FLAGS=-Werror \ -DCMAKE_CXX_STANDARD=17 \ .. && \ - make -j8 && ctest --verbose''' + make -j8 && ctest --verbose && \ + cd ../.. && \ + cmake -B build_cmake_installed_different_compiler/build -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_FLAGS=-Werror -DCMAKE_CXX_STANDARD=17 build_cmake_installed_different_compiler && \ + cmake --build build_cmake_installed_different_compiler/build --target all && \ + cmake --build build_cmake_installed_different_compiler/build --target test''' } post { always { @@ -284,14 +295,14 @@ pipeline { dockerfile { filename 'Dockerfile.nvcc' dir 'scripts/docker' - additionalBuildArgs '--build-arg BASE=nvidia/cuda:10.1-devel --build-arg CMAKE_VERSION=3.15.5' + additionalBuildArgs '--build-arg BASE=nvidia/cuda:10.1-devel' label 'nvidia-docker && volta' args '-v /tmp/ccache.kokkos:/tmp/ccache --env NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES' } } steps { sh 'ccache --zero-stats' - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''rm -rf build && \ cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ @@ -305,9 +316,10 @@ pipeline { -DKokkos_ENABLE_CUDA=ON \ -DKokkos_ENABLE_CUDA_LAMBDA=ON \ -DKokkos_ENABLE_LIBDL=OFF \ - .. && \ - make -j8 && ctest --verbose && \ - cd ../example/build_cmake_in_tree && \ + -DBUILD_NAME=${STAGE_NAME} \ + -DTARGET=install \ + -P cmake/KokkosCI.cmake && \ + cd example/build_cmake_in_tree && \ rm -rf build && mkdir -p build && cd build && \ cmake -DCMAKE_CXX_STANDARD=14 .. && make -j8 && ctest --verbose''' } @@ -330,7 +342,7 @@ pipeline { OMP_PROC_BIND = 'true' } steps { - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''rm -rf build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_STANDARD=14 \ @@ -339,8 +351,9 @@ pipeline { -DKokkos_ENABLE_TESTS=ON \ -DKokkos_ENABLE_OPENMP=ON \ -DKokkos_ENABLE_LIBDL=OFF \ - .. && \ - make -j8 && ctest --verbose && gcc -I$PWD/../core/src/ ../core/unit_test/tools/TestCInterface.c''' + -DBUILD_NAME=${STAGE_NAME} \ + -P cmake/KokkosCI.cmake && \ + gcc -I$PWD/core/src core/unit_test/tools/TestCInterface.c''' } } } diff --git a/packages/kokkos/.travis.yml b/packages/kokkos/.travis.yml index d156e91ee0984af24c0127c52fcba674bce7fa82..04ef01c1602cf87aae3e39225037d65f49651f62 100644 --- a/packages/kokkos/.travis.yml +++ b/packages/kokkos/.travis.yml @@ -4,7 +4,6 @@ language: cpp os: - linux - - osx compiler: - gcc @@ -30,7 +29,7 @@ branches: - /^release/ env: - - + - # - BACKEND="OPENMP" - BACKEND="PTHREAD" - CMAKE_BUILD_TYPE=Debug COVERAGE=yes GTEST_FILTER="-*DeathTest*" @@ -42,38 +41,40 @@ env: matrix: exclude: -# Apple GCC is just an alias to AppleClang - - os: osx - compiler: gcc -# Apple Clang doesn't support OpenMP - - os: osx - compiler: clang - env: CMAKE_BUILD_TYPE=Debug BACKEND="OPENMP" COVERAGE=yes GTEST_FILTER="-*DeathTest*" - - os: osx - compiler: clang - env: CMAKE_BUILD_TYPE=Release BACKEND="OPENMP" -# We do this as canary - os: linux compiler: gcc env: CMAKE_BUILD_TYPE=Release BACKEND="OPENMP" +# Install newer CMake. The distribution comes with CMake 3.12.4 but we require at least 3.16 +install: + - CMAKE_VERSION=3.17.1 + - CMAKE_DIR=/opt/cmake + - CMAKE_KEY=2D2CEF1034921684 && + CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && + CMAKE_SCRIPT=cmake-${CMAKE_VERSION}-Linux-x86_64.sh && + CMAKE_SHA256=cmake-${CMAKE_VERSION}-SHA-256.txt && + wget --quiet ${CMAKE_URL}/${CMAKE_SHA256} && + wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && + wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && + #gpg --keyserver pool.sks-keyservers.net --recv-keys ${CMAKE_KEY} && + #gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && + #grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && + mkdir -p ${CMAKE_DIR} && + sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && + rm cmake* + - PATH=${CMAKE_DIR}/bin:$PATH + - cd ${TRAVIS_BUILD_DIR} + before_script: - - if [[ ${TRAVIS_OS_NAME} == "osx" ]]; then - brew update; - export HOMEBREW_NO_AUTO_UPDATE=1; - brew ls --versions ccache > /dev/null || brew install ccache; - export PATH=/usr/local/opt/ccache/libexec:$PATH; - export CXXFLAGS="${CXXFLAGS} -Wno-unused-command-line-argument"; - if [[ ${BACKEND} == "OPENMP" ]]; then brew install libomp; fi - fi - ccache -z - - if [[ ${COVERAGE} ]]; then export CXX="${CXX} --coverage"; fi + - if [[ ${COVERAGE} ]]; then export CXX="${CXX} --coverage"; export BUILD_NAME_SUFFIX="-Coverage"; fi - if [[ ! ${CMAKE_BUILD_TYPE} ]]; then export CXXFLAGS="${CXXFLAGS} -O2"; fi script: - export OMP_NUM_THREADS=2 - export OMP_PLACES=threads - export OMP_PROC_BIND=spread + - export BUILD_JOBS=2 # LD_LIBRARY_PATH workaround to find clang's libomp: https://github.com/travis-ci/travis-ci/issues/8613 - if [[ ${CC} = clang ]]; then export LD_LIBRARY_PATH=/usr/local/clang/lib${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH; fi # enable ccache for clang on linux and add CCACHE_CPP2 to avoid 'Argument unused during compilation -I...' warning @@ -81,17 +82,17 @@ script: ln -s /usr/bin/ccache $HOME/bin/clang++; export CCACHE_CPP2=yes; fi - - mkdir build && - pushd build && - cmake .. + - cmake ${BACKEND:+-DKokkos_ENABLE_${BACKEND}=On} -DCMAKE_CXX_FLAGS="${CXXFLAGS} -Werror" -DCMAKE_CXX_STANDARD=14 -DKokkos_ENABLE_COMPILER_WARNINGS=ON -DKokkos_ENABLE_TESTS=On - ${CMAKE_BUILD_TYPE:+-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}} && - make VERBOSE=1 -j2 && - travis_wait 60 make test CTEST_OUTPUT_ON_FAILURE=1 && + ${CMAKE_BUILD_TYPE:+-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}} + -DBUILD_NAME="${CC}-${BACKEND}${BUILD_NAME_SUFFIX}" + -DSITE=Travis + -P cmake/KokkosCI.cmake && + pushd build && make install DESTDIR=${PWD}/install && rm -rf ${PWD}/install/usr/local && rmdir ${PWD}/install/usr && popd diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index c759181aa21ec3086507d678ecb9955ae4828681..3ce38c37d866dacc25528f5597461e7629175e00 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,168 @@ # Change Log +## [3.4.00](https://github.com/kokkos/kokkos/tree/3.4.00) (2021-04-25) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.3.01...3.4.00) + +**Highlights:** +- SYCL Backend Almost Feature Complete +- OpenMPTarget Backend Almost Feature Complete +- Performance Improvements for HIP backend +- Require CMake 3.16 or newer +- Tool Callback Interface Enhancements +- cmath wrapper functions available now in Kokkos::Experimental + +**Features:** +- Implement parallel_scan with ThreadVectorRange and Reducer [\#3861](https://github.com/kokkos/kokkos/pull/3861) +- Implement SYCL Random [\#3849](https://github.com/kokkos/kokkos/pull/3849) +- OpenMPTarget: Adding Implementation for nested reducers [\#3845](https://github.com/kokkos/kokkos/pull/3845) +- Implement UniqueToken for SYCL [\#3833](https://github.com/kokkos/kokkos/pull/3833) +- OpenMPTarget: UniqueToken::Global implementation [\#3823](https://github.com/kokkos/kokkos/pull/3823) +- DualView sync's on ExecutionSpaces [\#3822](https://github.com/kokkos/kokkos/pull/3822) +- SYCL outer TeamPolicy parallel_reduce [\#3818](https://github.com/kokkos/kokkos/pull/3818) +- SYCL TeamPolicy::team_scan [\#3815](https://github.com/kokkos/kokkos/pull/3815) +- SYCL MDRangePolicy parallel_reduce [\#3801](https://github.com/kokkos/kokkos/pull/3801) +- Enable use of execution space instances in ScatterView [\#3786](https://github.com/kokkos/kokkos/pull/3786) +- SYCL TeamPolicy nested parallel_reduce [\#3783](https://github.com/kokkos/kokkos/pull/3783) +- OpenMPTarget: MDRange with TagType for parallel_for [\#3781](https://github.com/kokkos/kokkos/pull/3781) +- Adding OpenMPTarget parallel_scan [\#3655](https://github.com/kokkos/kokkos/pull/3655) +- SYCL basic TeamPolicy [\#3654](https://github.com/kokkos/kokkos/pull/3654) +- OpenMPTarget: scratch memory implementation [\#3611](https://github.com/kokkos/kokkos/pull/3611) + +**Implemented enhancements Backends and Archs:** +- SYCL choose a specific GPU [\#3918](https://github.com/kokkos/kokkos/pull/3918) +- [HIP] Lock access to scratch memory when using Teams [\#3916](https://github.com/kokkos/kokkos/pull/3916) +- [HIP] fix multithreaded access to get_next_driver [\#3908](https://github.com/kokkos/kokkos/pull/3908) +- Forward declare HIPHostPinnedSpace and SYCLSharedUSMSpace [\#3902](https://github.com/kokkos/kokkos/pull/3902) +- Let SYCL USMObjectMem use SharedAllocationRecord [\#3898](https://github.com/kokkos/kokkos/pull/3898) +- Implement clock_tic for SYCL [\#3893](https://github.com/kokkos/kokkos/pull/3893) +- Don't use a static variable in HIPInternal::scratch_space [\#3866](https://github.com/kokkos/kokkos/pull/3866)(https://github.com/kokkos/kokkos/pull/3866) +- Reuse memory for SYCL parallel_reduce [\#3873](https://github.com/kokkos/kokkos/pull/3873) +- Update SYCL compiler in CI [\#3826](https://github.com/kokkos/kokkos/pull/3826) +- Introduce HostSharedPtr to manage m_space_instance for Cuda/HIP/SYCL [\#3824](https://github.com/kokkos/kokkos/pull/3824) +- [HIP] Use shuffle for range reduction [\#3811](https://github.com/kokkos/kokkos/pull/3811) +- OpenMPTarget: Changes to the hierarchical parallelism [\#3808](https://github.com/kokkos/kokkos/pull/3808) +- Remove ExtendedReferenceWrapper for SYCL parallel_reduce [\#3802](https://github.com/kokkos/kokkos/pull/3802) +- Eliminate sycl_indirect_launch [\#3777](https://github.com/kokkos/kokkos/pull/3777) +- OpenMPTarget: scratch implementation for parallel_reduce [\#3776](https://github.com/kokkos/kokkos/pull/3776) +- Allow initializing SYCL execution space from sycl::queue and SYCL::impl_static_fence [\#3767](https://github.com/kokkos/kokkos/pull/3767) +- SYCL TeamPolicy scratch memory alternative [\#3763](https://github.com/kokkos/kokkos/pull/3763) +- Alternative implementation for SYCL TeamPolicy [\#3759](https://github.com/kokkos/kokkos/pull/3759) +- Unify handling of synchronous errors in SYCL [\#3754](https://github.com/kokkos/kokkos/pull/3754) +- core/Cuda: Half_t updates for cgsolve [\#3746](https://github.com/kokkos/kokkos/pull/3746) +- Unify HIPParallelLaunch structures [\#3733](https://github.com/kokkos/kokkos/pull/3733) +- Improve performance for SYCL parallel_reduce [\#3732](https://github.com/kokkos/kokkos/pull/3732) +- Use consistent types in Kokkos_OpenMPTarget_Parallel.hpp [\#3703](https://github.com/kokkos/kokkos/pull/3703) +- Implement non-blocking kernel launches for HIP backend [\#3697](https://github.com/kokkos/kokkos/pull/3697) +- Change SYCLInternal::m_queue std::unique_ptr -> std::optional [\#3677](https://github.com/kokkos/kokkos/pull/3677) +- Use alternative SYCL parallel_reduce implementation [\#3671](https://github.com/kokkos/kokkos/pull/3671) +- Use runtime values in KokkosExp_MDRangePolicy.hpp [\#3626](https://github.com/kokkos/kokkos/pull/3626) +- Clean up AnalyzePolicy [\#3564](https://github.com/kokkos/kokkos/pull/3564) +- Changes for indirect launch of SYCL parallel reduce [\#3511](https://github.com/kokkos/kokkos/pull/3511) + +**Implemented enhancements BuildSystem:** +- Also require C++14 when building gtest [\#3912](https://github.com/kokkos/kokkos/pull/3912) +- Fix compiling SYCL with OpenMP [\#3874](https://github.com/kokkos/kokkos/pull/3874) +- Require C++17 for SYCL (at configuration time) [\#3869](https://github.com/kokkos/kokkos/pull/3869) +- Add COMPILE_DEFINITIONS argument to kokkos_create_imported_tpl [\#3862](https://github.com/kokkos/kokkos/pull/3862) +- Do not pass arch flags to the linker with no rdc [\#3846](https://github.com/kokkos/kokkos/pull/3846) +- Try compiling C++14 check with C++14 support and print error message [\#3843](https://github.com/kokkos/kokkos/pull/3843) +- Enable HIP with Cray Clang [\#3842](https://github.com/kokkos/kokkos/pull/3842) +- Add an option to disable header self containment tests [\#3834](https://github.com/kokkos/kokkos/pull/3834) +- CMake check for C++14 [\#3809](https://github.com/kokkos/kokkos/pull/3809) +- Prefer -std=* over --std=* [\#3779](https://github.com/kokkos/kokkos/pull/3779) +- Kokkos launch compiler updates [\#3778](https://github.com/kokkos/kokkos/pull/3778) +- Updated comments and enabled no-op for kokkos_launch_compiler [\#3774](https://github.com/kokkos/kokkos/pull/3774) +- Apple's Clang not correctly recognised [\#3772](https://github.com/kokkos/kokkos/pull/3772) +- kokkos_launch_compiler + CUDA auto-detect arch [\#3770](https://github.com/kokkos/kokkos/pull/3770) +- Add Spack test support for Kokkos [\#3753](https://github.com/kokkos/kokkos/pull/3753) +- Split SYCL tests for aot compilation [\#3741](https://github.com/kokkos/kokkos/pull/3741) +- Use consistent OpenMP flag for IntelClang [\#3735](https://github.com/kokkos/kokkos/pull/3735) +- Add support for -Wno-deprecated-gpu-targets [\#3722](https://github.com/kokkos/kokkos/pull/3722) +- Add configuration to target CUDA compute capability 8.6 [\#3713](https://github.com/kokkos/kokkos/pull/3713) +- Added VERSION and SOVERSION to KOKKOS_INTERNAL_ADD_LIBRARY [\#3706](https://github.com/kokkos/kokkos/pull/3706) +- Add fast-math to known NVCC flags [\#3699](https://github.com/kokkos/kokkos/pull/3699) +- Add MI-100 arch string [\#3698](https://github.com/kokkos/kokkos/pull/3698) +- Require CMake >=3.16 [\#3679](https://github.com/kokkos/kokkos/pull/3679) +- KokkosCI.cmake, KokkosCTest.cmake.in, CTestConfig.cmake.in + CI updates [\#2844](https://github.com/kokkos/kokkos/pull/2844) + +**Implemented enhancements Tools:** +- Improve readability of the callback invocation in profiling [\#3860](https://github.com/kokkos/kokkos/pull/3860) +- V1.1 Tools Interface: incremental, action-based [\#3812](https://github.com/kokkos/kokkos/pull/3812) +- Enable launch latency simulations [\#3721](https://github.com/kokkos/kokkos/pull/3721) +- Added metadata callback to tools interface [\#3711](https://github.com/kokkos/kokkos/pull/3711) +- MDRange Tile Size Tuning [\#3688](https://github.com/kokkos/kokkos/pull/3688) +- Added support for command-line args for kokkos-tools [\#3627](https://github.com/kokkos/kokkos/pull/3627) +- Query max tile sizes for an MDRangePolicy, and set tile sizes on an existing policy [\#3481](https://github.com/kokkos/kokkos/pull/3481) + +**Implemented enhancements Other:** +- Try detecting ndevices in get_gpu [\#3921](https://github.com/kokkos/kokkos/pull/3921) +- Use strcmp to compare names() [\#3909](https://github.com/kokkos/kokkos/pull/3909) +- Add execution space arguments for constructor overloads that might allocate a new underlying View [\#3904](https://github.com/kokkos/kokkos/pull/3904) +- Prefix labels in internal use of kokkos_malloc [\#3891](https://github.com/kokkos/kokkos/pull/3891) +- Prefix labels for internal uses of SharedAllocationRecord [\#3890](https://github.com/kokkos/kokkos/pull/3890) +- Add missing hypot math function [\#3880](https://github.com/kokkos/kokkos/pull/3880) +- Unify algorithm unit tests to avoid code duplication [\#3851](https://github.com/kokkos/kokkos/pull/3851) +- DualView.template view() better matches for Devices in UVMSpace cases [\#3857](https://github.com/kokkos/kokkos/pull/3857) +- More extensive disentangling of Policy Traits [\#3829](https://github.com/kokkos/kokkos/pull/3829) +- Replaced nanosleep and sched_yield with STL routines [\#3825](https://github.com/kokkos/kokkos/pull/3825) +- Constructing Atomic Subviews [\#3810](https://github.com/kokkos/kokkos/pull/3810) +- Metadata Declaration in Core [\#3729](https://github.com/kokkos/kokkos/pull/3729) +- Allow using tagged final functor in parallel_reduce [\#3714](https://github.com/kokkos/kokkos/pull/3714) +- Major duplicate code removal in SharedAllocationRecord specializations [\#3658](https://github.com/kokkos/kokkos/pull/3658) + +**Fixed bugs:** +- Provide forward declarations in Kokkos_ViewLayoutTiled.hpp for XL [\#3911](https://github.com/kokkos/kokkos/pull/3911) +- Fixup absolute value of floating points in Kokkos complex [\#3882](https://github.com/kokkos/kokkos/pull/3882) +- Address intel 17 ICE [\#3881](https://github.com/kokkos/kokkos/pull/3881) +- Add missing pow(Kokkos::complex) overloads [\#3868](https://github.com/kokkos/kokkos/pull/3868) +- Fix bug {pow, log}(Kokkos::complex) [\#3866](https://github.com/kokkos/kokkos/pull/3866)(https://github.com/kokkos/kokkos/pull/3866) +- Cleanup writing to output streams in Cuda [\#3859](https://github.com/kokkos/kokkos/pull/3859) +- Fixup cache CUDA fallback execution space instance used by DualView::sync [\#3856](https://github.com/kokkos/kokkos/pull/3856) +- Fix cmake warning with pthread [\#3854](https://github.com/kokkos/kokkos/pull/3854) +- Fix typo FOUND_CUDA_{DRIVVER -> DRIVER} [\#3852](https://github.com/kokkos/kokkos/pull/3852) +- Fix bug in SYCL team_reduce [\#3848](https://github.com/kokkos/kokkos/pull/3848) +- Atrocious bug in MDRange tuning [\#3803](https://github.com/kokkos/kokkos/pull/3803) +- Fix compiling SYCL with Kokkos_ENABLE_TUNING=ON [\#3800](https://github.com/kokkos/kokkos/pull/3800) +- Fixed command line parsing bug [\#3797](https://github.com/kokkos/kokkos/pull/3797) +- Workaround race condition in SYCL parallel_reduce [\#3782](https://github.com/kokkos/kokkos/pull/3782) +- Fix Atomic{Min,Max} for Kepler30 [\#3780](https://github.com/kokkos/kokkos/pull/3780) +- Fix SYCL typo [\#3755](https://github.com/kokkos/kokkos/pull/3755) +- Fixed Kokkos_install_additional_files macro [\#3752](https://github.com/kokkos/kokkos/pull/3752) +- Fix a typo for Kokkos_ARCH_A64FX [\#3751](https://github.com/kokkos/kokkos/pull/3751) +- OpenMPTarget: fixes and workarounds to work with "Release" build type [\#3748](https://github.com/kokkos/kokkos/pull/3748) +- Fix parsing bug for number of devices command line argument [\#3724](https://github.com/kokkos/kokkos/pull/3724) +- Avoid more warnings with clang and C++20 [\#3719](https://github.com/kokkos/kokkos/pull/3719) +- Fix gcc-10.1 C++20 warnings [\#3718](https://github.com/kokkos/kokkos/pull/3718) +- Fix cuda cache config not being set correct [\#3712](https://github.com/kokkos/kokkos/pull/3712) +- Fix dualview deepcopy perftools [\#3701](https://github.com/kokkos/kokkos/pull/3701) +- use drand instead of frand in drand [\#3696](https://github.com/kokkos/kokkos/pull/3696) + +**Incompatibilities:** +- Remove unimplemented member functions of SYCLDevice [\#3919](https://github.com/kokkos/kokkos/pull/3919) +- Replace cl::sycl [\#3896](https://github.com/kokkos/kokkos/pull/3896) +- Get rid of SYCL workaround in Kokkos_Complex.hpp [\#3884](https://github.com/kokkos/kokkos/pull/3884) +- Replace most uses of if_c [\#3883](https://github.com/kokkos/kokkos/pull/3883) +- Remove Impl::enable_if_type [\#3863](https://github.com/kokkos/kokkos/pull/3863) +- Remove HostBarrier test [\#3847](https://github.com/kokkos/kokkos/pull/3847) +- Avoid (void) interface [\#3836](https://github.com/kokkos/kokkos/pull/3836) +- Remove VerifyExecutionCanAccessMemorySpace [\#3813](https://github.com/kokkos/kokkos/pull/3813) +- Avoid duplicated code in ScratchMemorySpace [\#3793](https://github.com/kokkos/kokkos/pull/3793) +- Remove superfluous FunctorFinal specialization [\#3788](https://github.com/kokkos/kokkos/pull/3788) +- Rename cl::sycl -> sycl in Kokkos_MathematicalFunctions.hpp [\#3678](https://github.com/kokkos/kokkos/pull/3678) +- Remove integer_sequence backward compatibility implementation [\#3533](https://github.com/kokkos/kokkos/pull/3533) + +**Enabled tests:** +- Fixup re-enable core performance tests [\#3903](https://github.com/kokkos/kokkos/pull/3903) +- Enable more SYCL tests [\#3900](https://github.com/kokkos/kokkos/pull/3900) +- Restrict MDRange Policy tests for Intel GPUs [\#3853](https://github.com/kokkos/kokkos/pull/3853) +- Disable death tests for rawhide [\#3844](https://github.com/kokkos/kokkos/pull/3844) +- OpenMPTarget: Block unit tests that do not pass with the nvidia compiler [\#3839](https://github.com/kokkos/kokkos/pull/3839) +- Enable Bitset container test for SYCL [\#3830](https://github.com/kokkos/kokkos/pull/3830) +- Enable some more SYCL tests [\#3744](https://github.com/kokkos/kokkos/pull/3744) +- Enable SYCL atomic tests [\#3742](https://github.com/kokkos/kokkos/pull/3742) +- Enable more SYCL perf_tests [\#3692](https://github.com/kokkos/kokkos/pull/3692) +- Enable examples for SYCL [\#3691](https://github.com/kokkos/kokkos/pull/3691) + ## [3.3.01](https://github.com/kokkos/kokkos/tree/3.3.01) (2021-01-06) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.3.00...3.3.01) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index 7bc3c7725648d71c6703fd345ce23d159d40c1f8..6fc1bf7d2f7fd3b02a785b1184923cde07b438b2 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -72,7 +72,7 @@ ENDFUNCTION() LIST(APPEND CMAKE_MODULE_PATH cmake/Modules) IF(NOT KOKKOS_HAS_TRILINOS) - cmake_minimum_required(VERSION 3.10 FATAL_ERROR) + cmake_minimum_required(VERSION 3.16 FATAL_ERROR) set(CMAKE_DISABLE_SOURCE_CHANGES ON) set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) IF (Spack_WORKAROUND) @@ -111,27 +111,25 @@ ENDIF() set(Kokkos_VERSION_MAJOR 3) -set(Kokkos_VERSION_MINOR 3) -set(Kokkos_VERSION_PATCH 1) +set(Kokkos_VERSION_MINOR 4) +set(Kokkos_VERSION_PATCH 00) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") -IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") - MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables") - CMAKE_POLICY(SET CMP0074 NEW) -ENDIF() +MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables") +CMAKE_POLICY(SET CMP0074 NEW) # Load either the real TriBITS or a TriBITS wrapper # for certain utility functions that are universal (like GLOBAL_SET) INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) -IF (Kokkos_ENABLE_CUDA AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14.0") - #If we are building CUDA, we have tricked CMake because we declare a CXX project - #If the default C++ standard for a given compiler matches the requested - #standard, then CMake just omits the -std flag in later versions of CMake - #This breaks CUDA compilation (CUDA compiler can have a different default - #-std then the underlying host compiler by itself). Setting this variable - #forces CMake to always add the -std flag even if it thinks it doesn't need it +IF (Kokkos_ENABLE_CUDA) + # If we are building CUDA, we have tricked CMake because we declare a CXX project + # If the default C++ standard for a given compiler matches the requested + # standard, then CMake just omits the -std flag in later versions of CMake + # This breaks CUDA compilation (CUDA compiler can have a different default + # -std then the underlying host compiler by itself). Setting this variable + # forces CMake to always add the -std flag even if it thinks it doesn't need it GLOBAL_SET(CMAKE_CXX_STANDARD_DEFAULT 98) ENDIF() @@ -139,15 +137,19 @@ ENDIF() # I really wish these were regular variables # but scoping issues can make it difficult GLOBAL_SET(KOKKOS_COMPILE_OPTIONS) -GLOBAL_SET(KOKKOS_LINK_OPTIONS -DKOKKOS_DEPENDENCE) +GLOBAL_SET(KOKKOS_LINK_OPTIONS) GLOBAL_SET(KOKKOS_CUDA_OPTIONS) GLOBAL_SET(KOKKOS_CUDAFE_OPTIONS) GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS) # We need to append text here for making sure TPLs # we import are available for an installed Kokkos GLOBAL_SET(KOKKOS_TPL_EXPORTS) -# this could probably be scoped to project +# KOKKOS_DEPENDENCE is used by kokkos_launch_compiler GLOBAL_SET(KOKKOS_COMPILE_DEFINITIONS KOKKOS_DEPENDENCE) +# MSVC never goes through kokkos_launch_compiler +IF(NOT MSVC) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -DKOKKOS_DEPENDENCE) +ENDIF() # Include a set of Kokkos-specific wrapper functions that # will either call raw CMake or TriBITS diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index 3b6a5ff4368c966a8a44f04bdbe64c9fceb3745b..2599121d70ada48567c61fdc63ba94925a402267 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -1,8 +1,8 @@ # Default settings common options. KOKKOS_VERSION_MAJOR = 3 -KOKKOS_VERSION_MINOR = 3 -KOKKOS_VERSION_PATCH = 1 +KOKKOS_VERSION_MINOR = 4 +KOKKOS_VERSION_PATCH = 00 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,OpenMP,Pthread,Serial @@ -10,7 +10,7 @@ KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MIN KOKKOS_DEVICES ?= "Pthread" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKX -# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80 +# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 # AMD-GPUS: Vega900,Vega906,Vega908 @@ -154,17 +154,17 @@ KOKKOS_INTERNAL_OS_DARWIN := $(call kokkos_has_string,$(KOKKOS_OS),Darwin) KOKKOS_CXX_VERSION := $(strip $(shell $(CXX) --version 2>&1)) KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Intel Corporation) KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI) -KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) -KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) -KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l)>0" | bc)) +KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep -c XL)) +KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -c "CC-")) +KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep -c nvcc)>0" | bc)) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) -KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM) +KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC) # Check Host Compiler if using NVCC through nvcc_wrapper ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER := $(strip $(shell echo $(CXX) | grep nvcc_wrapper | wc -l)) + KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER := $(strip $(shell echo $(CXX) | grep -c nvcc_wrapper)) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER), 1) KOKKOS_CXX_HOST_VERSION := $(strip $(shell $(CXX) $(CXXFLAGS) --host-version 2>&1)) @@ -287,11 +287,11 @@ else #KOKKOS_INTERNAL_CXX1Z_FLAG := -hstd=c++1z #KOKKOS_INTERNAL_CXX2A_FLAG := -hstd=c++2a else - KOKKOS_INTERNAL_CXX14_FLAG := --std=c++14 - KOKKOS_INTERNAL_CXX1Y_FLAG := --std=c++1y - KOKKOS_INTERNAL_CXX17_FLAG := --std=c++17 - KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z - KOKKOS_INTERNAL_CXX2A_FLAG := --std=c++2a + KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14 + KOKKOS_INTERNAL_CXX1Y_FLAG := -std=c++1y + KOKKOS_INTERNAL_CXX17_FLAG := -std=c++17 + KOKKOS_INTERNAL_CXX1Z_FLAG := -std=c++1z + KOKKOS_INTERNAL_CXX2A_FLAG := -std=c++2a endif endif endif @@ -322,6 +322,7 @@ KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volt KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72) KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75) KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80) +KOKKOS_INTERNAL_USE_ARCH_AMPERE86 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere86) KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ @@ -334,7 +335,8 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLE + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ - + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80)) + + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80) \ + + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE86)) #SEK: This seems like a bug to me ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) @@ -575,10 +577,10 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) endif ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TUNING") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TUNING") endif -tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LIBDL") +tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LIBDL") ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) ifneq ($(KOKKOS_CMAKE), yes) @@ -742,6 +744,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) KOKKOS_CXXFLAGS += -march=armv8.2-a+sve KOKKOS_LDFLAGS += -march=armv8.2-a+sve + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_CXXFLAGS += -msve-vector-bits=512 + KOKKOS_LDFLAGS += -msve-vector-bits=512 + endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) + KOKKOS_CXXFLAGS += -msve-vector-bits=512 + KOKKOS_LDFLAGS += -msve-vector-bits=512 + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) @@ -1090,6 +1100,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80 endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 + endif ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) @@ -1149,7 +1164,7 @@ endif KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) - KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) + KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep -c define)) else KOKKOS_INTERNAL_NEW_CONFIG := 1 endif @@ -1171,41 +1186,41 @@ tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp") tmp := $(call kokkos_update_config_header, KOKKOS_POST_INCLUDE_HPP_, "KokkosCore_Config_PostInclude.tmp", "KokkosCore_Config_PostInclude.hpp") ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - tmp := $(call kokkos_append_config_header,"\#include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <setup/Kokkos_Setup_Cuda.hpp>","KokkosCore_Config_SetupBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_Cuda.hpp>","KokkosCore_Config_SetupBackend.hpp") ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) else endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - tmp := $(call kokkos_append_config_header,"\#include <fwd/Kokkos_Fwd_OPENMPTARGET.hpp>","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <decl/Kokkos_Declare_OPENMPTARGET.hpp>","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMPTARGET.hpp>","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMPTARGET.hpp>","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - tmp := $(call kokkos_append_config_header,"\#include <fwd/Kokkos_Fwd_HIP.hpp>","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <decl/Kokkos_Declare_HIP.hpp>","KokkosCore_Config_DeclareBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <setup/Kokkos_Setup_HIP.hpp>","KokkosCore_Config_SetupBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HIP.hpp>","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HIP.hpp>","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_HIP.hpp>","KokkosCore_Config_SetupBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(call kokkos_append_config_header,"\#include <fwd/Kokkos_Fwd_OPENMP.hpp>","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <decl/Kokkos_Declare_OPENMP.hpp>","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMP.hpp>","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMP.hpp>","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - tmp := $(call kokkos_append_config_header,"\#include <fwd/Kokkos_Fwd_THREADS.hpp>","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <decl/Kokkos_Declare_THREADS.hpp>","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_THREADS.hpp>","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_THREADS.hpp>","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) - tmp := $(call kokkos_append_config_header,"\#include <fwd/Kokkos_Fwd_HPX.hpp>","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <decl/Kokkos_Declare_HPX.hpp>","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HPX.hpp>","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HPX.hpp>","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(call kokkos_append_config_header,"\#include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - tmp := $(call kokkos_append_config_header,"\#include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp") endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) @@ -1324,7 +1339,7 @@ ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) endif # With Cygwin functions such as fdopen and fileno are not defined -# when strict ansi is enabled. strict ansi gets enabled with --std=c++14 +# when strict ansi is enabled. strict ansi gets enabled with -std=c++14 # though. So we hard undefine it here. Not sure if that has any bad side effects # This is needed for gtest actually, not for Kokkos itself! ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) diff --git a/packages/kokkos/Makefile.targets b/packages/kokkos/Makefile.targets index 5a03f7d17e946d4ed0792302d25a8de30a594aee..cf9fc242420e1dbbb519b3312cf1a4c3b4354738 100644 --- a/packages/kokkos/Makefile.targets +++ b/packages/kokkos/Makefile.targets @@ -36,6 +36,8 @@ Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp +Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp diff --git a/packages/kokkos/algorithms/src/Kokkos_Random.hpp b/packages/kokkos/algorithms/src/Kokkos_Random.hpp index 69d6cf8f35ea4705885900f9fc2bfdb608c54373..904cf5ccb967037d94ac9b4a06144a4f7333dd3d 100644 --- a/packages/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/packages/kokkos/algorithms/src/Kokkos_Random.hpp @@ -668,6 +668,25 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> { }; #endif +#ifdef KOKKOS_ENABLE_SYCL +template <> +struct Random_UniqueIndex<Kokkos::Experimental::SYCL> { + using locks_view_type = View<int*, Kokkos::Experimental::SYCL>; + KOKKOS_FUNCTION + static int get_state_idx(const locks_view_type& locks_) { +#ifdef KOKKOS_ARCH_INTEL_GEN + int i = Kokkos::Impl::clock_tic() % locks_.extent(0); +#else + int i = 0; +#endif + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i = (i + 1) % static_cast<int>(locks_.extent(0)); + } + return i; + } +}; +#endif + } // namespace Impl template <class DeviceType> @@ -1028,7 +1047,7 @@ class Random_XorShift1024 { KOKKOS_INLINE_FUNCTION double drand(const double& start, const double& end) { - return frand(end - start) + start; + return drand(end - start) + start; } // Marsaglia polar method for drawing a standard normal distributed random diff --git a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt index 819c9e54bae4f293e76252679d041de44c25c051..9109837985a91ad14245133682af15aca59be503 100644 --- a/packages/kokkos/algorithms/unit_tests/CMakeLists.txt +++ b/packages/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -3,6 +3,7 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) +KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/core/unit_test/category_files) SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) @@ -25,7 +26,7 @@ KOKKOS_ADD_TEST_LIBRARY( TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0) IF((NOT (Kokkos_ENABLE_CUDA AND WIN32)) AND (NOT ("${KOKKOS_CXX_COMPILER_ID}" STREQUAL "Fujitsu"))) -TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11) + TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_14) ENDIF() # Suppress clang-tidy diagnostics on code that we do not have control over @@ -33,51 +34,42 @@ IF(CMAKE_CXX_CLANG_TIDY) SET_TARGET_PROPERTIES(kokkosalgorithms_gtest PROPERTIES CXX_CLANG_TIDY "") ENDIF() -SET(SOURCES - UnitTestMain.cpp -) +SET(ALGORITHM UnitTestMain.cpp) IF(Kokkos_ENABLE_OPENMP) - LIST( APPEND SOURCES - TestOpenMP.cpp + LIST(APPEND ALGORITHM_SOURCES TestOpenMP_Sort1D.cpp TestOpenMP_Sort3D.cpp TestOpenMP_SortDynamicView.cpp - TestOpenMP_Random.cpp - ) -ENDIF() - -IF(Kokkos_ENABLE_HIP) - LIST( APPEND SOURCES - TestHIP.cpp ) ENDIF() -IF(Kokkos_ENABLE_CUDA) - LIST( APPEND SOURCES - TestCuda.cpp - ) -ENDIF() - -IF(Kokkos_ENABLE_HPX) - LIST( APPEND SOURCES - TestHPX.cpp - ) -ENDIF() - -IF(Kokkos_ENABLE_SERIAL) - LIST( APPEND SOURCES - TestSerial.cpp - ) -ENDIF() - -IF(Kokkos_ENABLE_PTHREAD) - LIST( APPEND SOURCES - TestThreads.cpp - ) -ENDIF() +foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL) + # Because there is always an exception to the rule + if(Tag STREQUAL "Threads") + set(DEVICE "PTHREAD") + else() + string(TOUPPER ${Tag} DEVICE) + endif() + + if(Kokkos_ENABLE_${DEVICE}) + set(dir ${CMAKE_CURRENT_BINARY_DIR}) + set(file ${dir}/Test${Tag}.cpp) + # Write to a temporary intermediate file and call configure_file to avoid + # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. + file(WRITE ${dir}/dummy.cpp + "#include <Test${Tag}_Category.hpp>\n" + "#include <TestRandomCommon.hpp>\n" + "#include <TestSortCommon.hpp>\n" + ) + configure_file(${dir}/dummy.cpp ${file}) + list(APPEND ALGORITHM_SOURCES ${file}) + endif() +endforeach() KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest - SOURCES ${SOURCES} + SOURCES + UnitTestMain.cpp + ${ALGORITHM_SOURCES} ) diff --git a/packages/kokkos/algorithms/unit_tests/Makefile b/packages/kokkos/algorithms/unit_tests/Makefile index c112d7c6fcad3b47647078e27c3f11e9433956b5..dd0aa87de0b2c76fe76d03f8ea77092833dd9f63 100644 --- a/packages/kokkos/algorithms/unit_tests/Makefile +++ b/packages/kokkos/algorithms/unit_tests/Makefile @@ -20,11 +20,19 @@ override LDFLAGS += -lpthread include $(KOKKOS_PATH)/Makefile.kokkos -KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files TEST_TARGETS = TARGETS = +tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ + $(if $(filter Test$(device).cpp, $(shell ls Test$(device).cpp 2>/dev/null)),,\ + $(shell echo "\#include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \ + $(shell echo "\#include <TestRandomCommon.hpp>" >> Test$(device).cpp); \ + $(shell echo "\#include <TestSortCommon.hpp>" >> Test$(device).cpp); \ + ) \ +) + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o TARGETS += KokkosAlgorithms_UnitTest_Cuda @@ -44,7 +52,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = TestOpenMP.o TestOpenMP_Random.o TestOpenMP_Sort1D.o TestOpenMP_Sort3D.o TestOpenMP_SortDynamicView.o UnitTestMain.o gtest-all.o + OBJ_OPENMP = TestOpenMP.o TestOpenMP_Sort1D.o TestOpenMP_Sort3D.o TestOpenMP_SortDynamicView.o UnitTestMain.o gtest-all.o TARGETS += KokkosAlgorithms_UnitTest_OpenMP TEST_TARGETS += test-openmp endif diff --git a/packages/kokkos/algorithms/unit_tests/TestOpenMP_Sort1D.cpp b/packages/kokkos/algorithms/unit_tests/TestOpenMP_Sort1D.cpp index a9b2010ad025bd0c967071aca37407bea4a351bf..4a5839f0c80a5298c14ff91422d74664b9dd95bd 100644 --- a/packages/kokkos/algorithms/unit_tests/TestOpenMP_Sort1D.cpp +++ b/packages/kokkos/algorithms/unit_tests/TestOpenMP_Sort1D.cpp @@ -59,6 +59,8 @@ TEST(openmp, SortUnsigned1D) { Impl::test_1D_sort<Kokkos::OpenMP, unsigned>(171); } +TEST(openmp, SortIssue1160) { Impl::test_issue_1160_sort<Kokkos::OpenMP>(); } + } // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} diff --git a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp index caba92c152faac40d46feabb7407e1e6a4e9fb5d..1f14875096dd2fbd0bebf4feea796d4c6ccd79f0 100644 --- a/packages/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -491,6 +491,34 @@ void test_random(unsigned int num_draws) { } } // namespace Impl +template <typename ExecutionSpace> +void test_random_xorshift64() { +#if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \ + defined(KOKKOS_ENABLE_HIP) + const int num_draws = 132141141; +#else // SERIAL, HPX, OPENMP + const int num_draws = 10240000; +#endif + Impl::test_random<Kokkos::Random_XorShift64_Pool<ExecutionSpace>>(num_draws); + Impl::test_random<Kokkos::Random_XorShift64_Pool< + Kokkos::Device<ExecutionSpace, typename ExecutionSpace::memory_space>>>( + num_draws); +} + +template <typename ExecutionSpace> +void test_random_xorshift1024() { +#if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \ + defined(KOKKOS_ENABLE_HIP) + const int num_draws = 52428813; +#else // SERIAL, HPX, OPENMP + const int num_draws = 10130144; +#endif + Impl::test_random<Kokkos::Random_XorShift1024_Pool<ExecutionSpace>>( + num_draws); + Impl::test_random<Kokkos::Random_XorShift1024_Pool< + Kokkos::Device<ExecutionSpace, typename ExecutionSpace::memory_space>>>( + num_draws); +} } // namespace Test #endif // KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/packages/kokkos/algorithms/unit_tests/TestRandomCommon.hpp b/packages/kokkos/algorithms/unit_tests/TestRandomCommon.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c6d3b59ae1f12422c448a13f5f91f2ed74cc58ff --- /dev/null +++ b/packages/kokkos/algorithms/unit_tests/TestRandomCommon.hpp @@ -0,0 +1,60 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TESTRANDOM_COMMON_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TESTRANDOM_COMMON_HPP + +#include <TestRandom.hpp> + +namespace Test { + +TEST(TEST_CATEGORY, Random_XorShift64) { + test_random_xorshift64<TEST_EXECSPACE>(); +} +TEST(TEST_CATEGORY, Random_XorShift1024_0) { + test_random_xorshift1024<TEST_EXECSPACE>(); +} +} // namespace Test + +#endif diff --git a/packages/kokkos/containers/unit_tests/TestHIP_Category.hpp b/packages/kokkos/algorithms/unit_tests/TestSortCommon.hpp similarity index 88% rename from packages/kokkos/containers/unit_tests/TestHIP_Category.hpp rename to packages/kokkos/algorithms/unit_tests/TestSortCommon.hpp index c2d60d18148b30674de5ee559ecafc09d23d126f..56657b6574b865419a1f93e01a49aa2a3e648736 100644 --- a/packages/kokkos/containers/unit_tests/TestHIP_Category.hpp +++ b/packages/kokkos/algorithms/unit_tests/TestSortCommon.hpp @@ -42,10 +42,14 @@ //@HEADER */ -#ifndef KOKKOS_TEST_HIP_HPP -#define KOKKOS_TEST_HIP_HPP +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_COMMON_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_COMMON_HPP -#define TEST_CATEGORY hip -#define TEST_EXECSPACE Kokkos::Experimental::HIP +#include <TestSort.hpp> +namespace Test { +TEST(TEST_CATEGORY, SortUnsigned) { + Impl::test_sort<TEST_EXECSPACE, unsigned>(171); +} +} // namespace Test #endif diff --git a/packages/kokkos/appveyor.yml b/packages/kokkos/appveyor.yml index c40bf066b7a9c6e9de822cc8124147fa8f241de9..e8763c0b665c4a992f74b70eab0caa915beb33dd 100644 --- a/packages/kokkos/appveyor.yml +++ b/packages/kokkos/appveyor.yml @@ -3,8 +3,4 @@ image: clone_folder: c:\projects\source build_script: - cmd: >- - mkdir build && - cd build && - cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON && - cmake --build . --target install && - ctest -C Debug -V + cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc /d1reportClassLayoutChanges" -DCTEST_ARGS="-C Debug -V --output-on-failure" -DBUILD_NAME=MSVC-2019 -DBUILD_TYPE=Debug -DSITE=AppVeyor -DTARGET=install -P cmake/KokkosCI.cmake diff --git a/packages/kokkos/bin/kokkos_launch_compiler b/packages/kokkos/bin/kokkos_launch_compiler index 1fbebf648fa0af5f0ec627b87c603c651aff65e6..d929d24f1dca42fc277940ffb27f54d374e89cd1 100755 --- a/packages/kokkos/bin/kokkos_launch_compiler +++ b/packages/kokkos/bin/kokkos_launch_compiler @@ -13,6 +13,17 @@ # $1 are 'ar', 'cmake', etc. during the linking phase # +# emit a message about the underlying command executed +: ${DEBUG:=0} +: ${KOKKOS_DEBUG_LAUNCH_COMPILER:=${DEBUG}} + +debug-message() +{ + if [ "${KOKKOS_DEBUG_LAUNCH_COMPILER}" -ne 0 ]; then + echo -e "##### $(basename ${BASH_SOURCE[0]}) executing: \"$@\"... #####" + fi +} + # check the arguments for the KOKKOS_DEPENDENCE compiler definition KOKKOS_DEPENDENCE=0 for i in ${@} @@ -23,16 +34,30 @@ do fi done -# if C++ is not passed, someone is probably trying to invoke it directly +# if Kokkos compiler is not passed, someone is probably trying to invoke it directly if [ -z "${1}" ]; then - echo -e "\n${BASH_SOURCE[0]} was invoked without the C++ compiler as the first argument." + echo -e "\n${BASH_SOURCE[0]} was invoked without the Kokkos compiler as the first argument." echo "This script is not indended to be directly invoked by any mechanism other" - echo -e "than through a RULE_LAUNCH_COMPILE or RULE_LAUNCH_LINK property set in CMake\n" + echo -e "than through a RULE_LAUNCH_COMPILE or RULE_LAUNCH_LINK property set in CMake.\n" + exit 1 +fi + +# if Kokkos compiler is not passed, someone is probably trying to invoke it directly +if [ -z "${2}" ]; then + echo -e "\n${BASH_SOURCE[0]} was invoked without the C++ compiler as the second argument." + echo "This script is not indended to be directly invoked by any mechanism other" + echo -e "than through a RULE_LAUNCH_COMPILE or RULE_LAUNCH_LINK property set in CMake.\n" exit 1 fi # if there aren't two args, this isn't necessarily invalid, just a bit strange -if [ -z "${2}" ]; then exit 0; fi +if [ -z "${3}" ]; then exit 0; fi + +# store the Kokkos compiler +KOKKOS_COMPILER=${1} + +# remove the Kokkos compiler from the arguments +shift # store the expected C++ compiler CXX_COMPILER=${1} @@ -40,48 +65,57 @@ CXX_COMPILER=${1} # remove the expected C++ compiler from the arguments shift -# after the above shift, $1 is now the exe for the compile or link command, e.g. -# kokkos_launch_compiler g++ gcc -c file.c -o file.o +# NOTE: in below, ${KOKKOS_COMPILER} is usually nvcc_wrapper +# +# after the above shifts, $1 is now the exe for the compile or link command, e.g. +# kokkos_launch_compiler ${KOKKOS_COMPILER} g++ gcc -c file.c -o file.o # becomes: # kokkos_launch_compiler gcc -c file.c -o file.o -# Check to see if the executable is the C++ compiler and if it is not, then +# We check to see if the executable is the C++ compiler and if it is not, then # just execute the command. # # Summary: -# kokkos_launch_compiler g++ gcc -c file.c -o file.o +# kokkos_launch_compiler ${KOKKOS_COMPILER} g++ gcc -c file.c -o file.o # results in this command being executed: # gcc -c file.c -o file.o # and -# kokkos_launch_compiler g++ g++ -c file.cpp -o file.o +# kokkos_launch_compiler ${KOKKOS_COMPILER} g++ g++ -c file.cpp -o file.o # results in this command being executed: -# nvcc_wrapper -c file.cpp -o file.o +# ${KOKKOS_COMPILER} -c file.cpp -o file.o if [[ "${KOKKOS_DEPENDENCE}" -eq "0" || "${CXX_COMPILER}" != "${1}" ]]; then - # the command does not depend on Kokkos so just execute the command w/o re-directing to nvcc_wrapper + debug-message $@ + # the command does not depend on Kokkos so just execute the command w/o re-directing to ${KOKKOS_COMPILER} eval $@ else - # the executable is the C++ compiler, so we need to re-direct to nvcc_wrapper + # the executable is the C++ compiler, so we need to re-direct to ${KOKKOS_COMPILER} + if [ ! -f "${KOKKOS_COMPILER}" ]; then + echo -e "\nError: the compiler redirect for Kokkos was not found at ${KOKKOS_COMPILER}\n" + exit 1 + fi # find the nvcc_wrapper from the same build/install NVCC_WRAPPER="$(dirname ${BASH_SOURCE[0]})/nvcc_wrapper" + if [ "${KOKKOS_COMPILER}" = "${NVCC_WRAPPER}" ]; then + # this should only be valid in the install tree -- it will be set to CMAKE_CXX_COMPILER used using Kokkos installation + if [ -z $(echo "@NVCC_WRAPPER_DEFAULT_COMPILER@" | grep 'NVCC_WRAPPER_DEFAULT_COMPILER') ]; then + : ${NVCC_WRAPPER_DEFAULT_COMPILER:="@NVCC_WRAPPER_DEFAULT_COMPILER@"} + fi - if [ -z "${NVCC_WRAPPER}" ]; then - echo -e "\nError: nvcc_wrapper not found in $(dirname ${BASH_SOURCE[0]}).\n" - exit 1 - fi + # set default nvcc wrapper compiler if not specified + : ${NVCC_WRAPPER_DEFAULT_COMPILER:=${CXX_COMPILER}} + export NVCC_WRAPPER_DEFAULT_COMPILER - # set default nvcc wrapper compiler if not specified - : ${NVCC_WRAPPER_DEFAULT_COMPILER:=${CXX_COMPILER}} - export NVCC_WRAPPER_DEFAULT_COMPILER - - # calling itself will cause an infinitely long build - if [ "${NVCC_WRAPPER}" = "${NVCC_WRAPPER_DEFAULT_COMPILER}" ]; then - echo -e "\nError: NVCC_WRAPPER == NVCC_WRAPPER_DEFAULT_COMPILER. Terminating to avoid infinite loop!\n" - exit 1 + # nvcc_wrapper calling itself will cause an infinitely long build + if [ "${NVCC_WRAPPER}" = "${NVCC_WRAPPER_DEFAULT_COMPILER}" ]; then + echo -e "\nError: NVCC_WRAPPER == NVCC_WRAPPER_DEFAULT_COMPILER. Terminating to avoid infinite loop!\n" + exit 1 + fi fi # discard the compiler from the command shift - # execute nvcc_wrapper - ${NVCC_WRAPPER} $@ + debug-message ${KOKKOS_COMPILER} $@ + # execute ${KOKKOS_COMPILER} (again, usually nvcc_wrapper) + ${KOKKOS_COMPILER} $@ fi diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper index 4ecf4c66d5a069eba4c8ca4e379299dfb6ed53bb..5556e888e34b2f7c2dd18bdb6f47071abde0574b 100755 --- a/packages/kokkos/bin/nvcc_wrapper +++ b/packages/kokkos/bin/nvcc_wrapper @@ -191,11 +191,11 @@ do shift ;; #Handle known nvcc args - --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad*|--Wext-lambda-captures-this|-Wext-lambda-captures-this) + --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this) cuda_args="$cuda_args $1" ;; #Handle more known nvcc args - --expt-extended-lambda|--expt-relaxed-constexpr) + --expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets) cuda_args="$cuda_args $1" ;; #Handle known nvcc args that have an argument diff --git a/packages/kokkos/cmake/CTestConfig.cmake.in b/packages/kokkos/cmake/CTestConfig.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..1f82c0d64d15e0a4fb346cfb7227be9cd41e5f17 --- /dev/null +++ b/packages/kokkos/cmake/CTestConfig.cmake.in @@ -0,0 +1,91 @@ +#----------------------------------------------------------------------------------------# +# +# CTestConfig.cmake template for Kokkos +# +#----------------------------------------------------------------------------------------# + +# +# dash-board related +# +set(CTEST_PROJECT_NAME "Kokkos") +set(CTEST_NIGHTLY_START_TIME "01:00:00 UTC") +set(CTEST_DROP_METHOD "https") +set(CTEST_DROP_SITE "cdash.nersc.gov") +set(CTEST_DROP_LOCATION "/submit.php?project=${CTEST_PROJECT_NAME}") +set(CTEST_CDASH_VERSION "1.6") +set(CTEST_CDASH_QUERY_VERSION TRUE) +set(CTEST_SUBMIT_RETRY_COUNT "1") +set(CTEST_SUBMIT_RETRY_DELAY "30") + +# +# configure/build related +# +set(CTEST_BUILD_NAME "@BUILD_NAME@") +set(CTEST_MODEL "@MODEL@") +set(CTEST_SITE "@SITE@") +set(CTEST_CONFIGURATION_TYPE "@BUILD_TYPE@") +set(CTEST_SOURCE_DIRECTORY "@SOURCE_REALDIR@") +set(CTEST_BINARY_DIRECTORY "@BINARY_REALDIR@") + +# +# configure/build related +# +set(CTEST_UPDATE_TYPE "git") +set(CTEST_UPDATE_VERSION_ONLY ON) +# set(CTEST_GENERATOR "") +# set(CTEST_GENERATOR_PLATFORM "") + +# +# testing related +# +set(CTEST_TIMEOUT "7200") +set(CTEST_TEST_TIMEOUT "7200") +set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "100") +set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "100") +set(CTEST_CUSTOM_MAXIMUM_PASSED_TEST_OUTPUT_SIZE "1048576") + +# +# coverage related +# +set(CTEST_CUSTOM_COVERAGE_EXCLUDE ".*tpls/.*;/usr/.*;.*unit_test/.*;.*unit_tests/.*;.*perf_test/.*") + +# +# commands +# +if(NOT "@CHECKOUT_COMMAND@" STREQUAL "") + set(CTEST_CHECKOUT_COMMAND "@CHECKOUT_COMMAND@") +endif() +set(CTEST_UPDATE_COMMAND "@GIT_EXECUTABLE@") +set(CTEST_CONFIGURE_COMMAND "@CMAKE_COMMAND@ -DCMAKE_BUILD_TYPE=@BUILD_TYPE@ -DKokkos_ENABLE_TESTS=ON @CONFIG_ARGS@ @SOURCE_REALDIR@") +set(CTEST_BUILD_COMMAND "@CMAKE_COMMAND@ --build @BINARY_REALDIR@ --target @TARGET@") +if(NOT WIN32) + set(CTEST_BUILD_COMMAND "${CTEST_BUILD_COMMAND} -- -j@BUILD_JOBS@") +endif() +set(CTEST_COVERAGE_COMMAND "gcov") +set(CTEST_MEMORYCHECK_COMMAND "valgrind") +set(CTEST_GIT_COMMAND "@GIT_EXECUTABLE@") + +# +# various configs +# +set(APPEND_VALUE @APPEND@) +if(APPEND_VALUE) + set(APPEND_CTEST APPEND) +endif() + +macro(SET_TEST_PROP VAR) + if(NOT "${ARGS}" STREQUAL "") + set(${VAR}_CTEST ${VAR} ${ARGN}) + endif() +endmacro() + +set_test_prop(START @START@) +set_test_prop(END @END@) +set_test_prop(STRIDE @STRIDE@) +set_test_prop(INCLUDE @INCLUDE@) +set_test_prop(EXCLUDE @EXCLUDE@) +set_test_prop(INCLUDE_LABEL @INCLUDE_LABEL@) +set_test_prop(EXCLUDE_LABEL @EXCLUDE_LABEL@) +set_test_prop(PARALLEL_LEVEL @PARALLEL_LEVEL@) +set_test_prop(STOP_TIME @STOP_TIME@) +set_test_prop(COVERAGE_LABELS @LABELS@) diff --git a/packages/kokkos/cmake/KokkosCI.cmake b/packages/kokkos/cmake/KokkosCI.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e8c9af37ad544a93a62f498e9a903696321a1c75 --- /dev/null +++ b/packages/kokkos/cmake/KokkosCI.cmake @@ -0,0 +1,350 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) + +message(STATUS "") + +get_cmake_property(_cached_vars CACHE_VARIABLES) +set(KOKKOS_CMAKE_ARGS) +set(EXCLUDED_VARIABLES "CMAKE_COMMAND" "CMAKE_CPACK_COMMAND" "CMAKE_CTEST_COMMAND" "CMAKE_ROOT" + "CTEST_ARGS" "BUILD_NAME" "CMAKE_CXX_FLAGS" "CMAKE_BUILD_TYPE") +list(SORT _cached_vars) +foreach(_var ${_cached_vars}) + if(NOT "${_var}" IN_LIST EXCLUDED_VARIABLES) + list(APPEND KOKKOS_CMAKE_ARGS ${_var}) + if("${_var}" STREQUAL "CMAKE_BUILD_TYPE") + set(BUILD_TYPE "${CMAKE_BUILD_TYPE}") + endif() + endif() +endforeach() + + +#----------------------------------------------------------------------------------------# +# +# Macros and variables +# +#----------------------------------------------------------------------------------------# + +macro(CHECK_REQUIRED VAR) + if(NOT DEFINED ${VAR}) + message(FATAL_ERROR "Error! Variable '${VAR}' must be defined") + endif() +endmacro() + +# require the build name variable +CHECK_REQUIRED(BUILD_NAME) + +# uses all args +macro(SET_DEFAULT VAR) + if(NOT DEFINED ${VAR}) + set(${VAR} ${ARGN}) + endif() + # remove these ctest configuration variables from the defines + # passed to the Kokkos configuration + if("${VAR}" IN_LIST KOKKOS_CMAKE_ARGS) + list(REMOVE_ITEM KOKKOS_CMAKE_ARGS "${VAR}") + endif() +endmacro() + +# uses first arg -- useful for selecting via priority from multiple +# potentially defined variables, e.g.: +# +# set_default_arg1(BUILD_NAME ${TRAVIS_BUILD_NAME} ${BUILD_NAME}) +# +macro(SET_DEFAULT_ARG1 VAR) + if(NOT DEFINED ${VAR}) + foreach(_ARG ${ARGN}) + if(NOT "${_ARG}" STREQUAL "") + set(${VAR} ${_ARG}) + break() + endif() + endforeach() + endif() + # remove these ctest configuration variables from the defines + # passed to the Kokkos configuration + if("${VAR}" IN_LIST KOKKOS_CMAKE_ARGS) + list(REMOVE_ITEM KOKKOS_CMAKE_ARGS "${VAR}") + endif() +endmacro() + +# determine the default working directory +if(NOT "$ENV{WORKSPACE}" STREQUAL "") + set(WORKING_DIR "$ENV{WORKSPACE}") +else() + get_filename_component(WORKING_DIR ${CMAKE_CURRENT_LIST_DIR} DIRECTORY) +endif() + +# determine the hostname +execute_process(COMMAND hostname + OUTPUT_VARIABLE HOSTNAME + OUTPUT_STRIP_TRAILING_WHITESPACE) + +SET_DEFAULT(HOSTNAME "$ENV{HOSTNAME}") + +# get the number of processors +include(ProcessorCount) +ProcessorCount(NUM_PROCESSORS) + +# find git +find_package(Git QUIET) +if(NOT GIT_EXECUTABLE) + unset(GIT_EXECUTABLE CACHE) + unset(GIT_EXECUTABLE) +endif() + +function(EXECUTE_GIT_COMMAND VAR) + set(${VAR} "" PARENT_SCOPE) + execute_process(COMMAND ${GIT_EXECUTABLE} ${ARGN} + OUTPUT_VARIABLE VAL + RESULT_VARIABLE RET + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + ERROR_QUIET) + string(REPLACE ";" " " _CMD "${GIT_EXECUTABLE} ${ARGN}") + set(LAST_GIT_COMMAND "${_CMD}" PARENT_SCOPE) + if(RET EQUAL 0) + set(${VAR} "${VAL}" PARENT_SCOPE) + endif() +endfunction() + +# just gets the git branch name if available +function(GET_GIT_BRANCH_NAME VAR) + execute_git_command(GIT_BRANCH branch --show-current) + set(_INVALID "%D" "HEAD") + if(NOT GIT_BRANCH OR "${GIT_BRANCH}" IN_LIST _INVALID) + execute_git_command(GIT_BRANCH show -s --format=%D) + if(NOT GIT_BRANCH OR "${GIT_BRANCH}" IN_LIST _INVALID) + execute_git_command(GIT_BRANCH --describe all) + endif() + endif() + # + if(GIT_BRANCH) + string(REPLACE " " ";" _DESC "${GIT_BRANCH}") + # just set it to last one via loop instead of wonky cmake index manip + foreach(_ITR ${_DESC}) + set(GIT_BRANCH "${_ITR}") + endforeach() + set(${VAR} "${GIT_BRANCH}" PARENT_SCOPE) + message(STATUS "GIT BRANCH via '${LAST_GIT_COMMAND}': ${GIT_BRANCH}") + endif() +endfunction() + +# just gets the git branch name if available +function(GET_GIT_AUTHOR_NAME VAR) + execute_git_command(GIT_AUTHOR show -s --format=%an) + if(GIT_AUTHOR) + string(LENGTH "${GIT_AUTHOR}" STRLEN) + # if the build name gets too long, this can cause submission errors + if(STRLEN GREATER 24) + # remove middle initial + string(REGEX REPLACE " [A-Z]\. " " " GIT_AUTHOR "${GIT_AUTHOR}") + # get first and sur name + string(REGEX REPLACE "([A-Za-z]+) ([A-Za-z]+)" "\\1" F_NAME "${GIT_AUTHOR}") + string(REGEX REPLACE "([A-Za-z]+) ([A-Za-z]+)" "\\2" S_NAME "${GIT_AUTHOR}") + if(S_NAME) + set(GIT_AUTHOR "${S_NAME}") + elseif(F_NAME) + set(GIT_AUTHOR "${F_NAME}") + endif() + endif() + # remove any spaces, quotes, periods, etc. + string(REGEX REPLACE "[ ',;_\.\"]+" "" GIT_AUTHOR "${GIT_AUTHOR}") + set(${VAR} "${GIT_AUTHOR}" PARENT_SCOPE) + message(STATUS "GIT AUTHOR via '${LAST_GIT_COMMAND}': ${GIT_AUTHOR}") + endif() +endfunction() + +# get the name of the branch +GET_GIT_BRANCH_NAME(GIT_BRANCH) +# get the name of the author +GET_GIT_AUTHOR_NAME(GIT_AUTHOR) +# author, prefer git method for consistency +SET_DEFAULT_ARG1(AUTHOR ${GIT_AUTHOR} $ENV{GIT_AUTHOR} $ENV{AUTHOR}) +# SLUG == owner_name/repo_name +SET_DEFAULT_ARG1(SLUG $ENV{TRAVIS_PULL_REQUEST_SLUG} $ENV{TRAVIS_REPO_SLUG} $ENV{APPVEYOR_REPO_NAME} $ENV{PULL_REQUEST_SLUG} $ENV{REPO_SLUG}) +# branch name +SET_DEFAULT_ARG1(BRANCH $ENV{TRAVIS_PULL_REQUEST_BRANCH} $ENV{TRAVIS_BRANCH} $ENV{APPVEYOR_PULL_REQUEST_HEAD_REPO_BRANCH} $ENV{APPVEYOR_REPO_BRANCH} $ENV{GIT_BRANCH} $ENV{BRANCH_NAME} $ENV{BRANCH} ${GIT_BRANCH}) +# pull request number +SET_DEFAULT_ARG1(PULL_REQUEST_NUM $ENV{TRAVIS_PULL_REQUEST} $ENV{CHANGE_ID} $ENV{APPVEYOR_PULL_REQUEST_NUMBER} $ENV{PULL_REQUEST_NUM}) +# get the event type, e.g. push, pull_request, api, cron, etc. +SET_DEFAULT_ARG1(EVENT_TYPE $ENV{TRAVIS_EVENT_TYPE} ${EVENT_TYPE}) + +if("${BRANCH}" STREQUAL "") + message(STATUS "Checked: environment variables for Travis, Appveyor, Jenkins (git plugin), BRANCH_NAME, BRANCH and 'git branch --show-current'") + message(FATAL_ERROR "Error! Git branch could not be determined. Please provide -DBRANCH=<name>") +endif() + +#----------------------------------------------------------------------------------------# +# +# Set default values if not provided on command-line +# +#----------------------------------------------------------------------------------------# + +SET_DEFAULT(SOURCE_DIR "${WORKING_DIR}") # source directory +SET_DEFAULT(BINARY_DIR "${WORKING_DIR}/build") # build directory +SET_DEFAULT(BUILD_TYPE "${CMAKE_BUILD_TYPE}") # Release, Debug, etc. +SET_DEFAULT(MODEL "Continuous") # Continuous, Nightly, or Experimental +SET_DEFAULT(JOBS 1) # number of parallel ctests +SET_DEFAULT(CTEST_COMMAND "${CMAKE_CTEST_COMMAND}") # just in case +SET_DEFAULT(CTEST_ARGS "-V --output-on-failure") # extra arguments when ctest is called +SET_DEFAULT(GIT_EXECUTABLE "git") # ctest_update +SET_DEFAULT(TARGET "all") # build target +SET_DEFAULT_ARG1(SITE "$ENV{SITE}" + "${HOSTNAME}") # update site +SET_DEFAULT_ARG1(BUILD_JOBS "$ENV{BUILD_JOBS}" + "${NUM_PROCESSORS}") # number of parallel compile jobs +# +# The variable below correspond to ctest arguments, i.e. START,END,STRIDE are +# '-I START,END,STRIDE' +# +SET_DEFAULT(START "") +SET_DEFAULT(END "") +SET_DEFAULT(STRIDE "") +SET_DEFAULT(INCLUDE "") +SET_DEFAULT(EXCLUDE "") +SET_DEFAULT(INCLUDE_LABEL "") +SET_DEFAULT(EXCLUDE_LABEL "") +SET_DEFAULT(PARALLEL_LEVEL "") +SET_DEFAULT(STOP_TIME "") +SET_DEFAULT(LABELS "") +SET_DEFAULT(NOTES "") + +# default static build tag for Nightly +set(BUILD_TAG "${BRANCH}") + +if(NOT BUILD_TYPE) + # default for kokkos if not specified + set(BUILD_TYPE "RelWithDebInfo") +endif() + +# generate dynamic name if continuous or experimental model +if(NOT "${MODEL}" STREQUAL "Nightly") + if(EVENT_TYPE AND PULL_REQUEST_NUM) + # e.g. pull_request/123 + if(AUTHOR) + set(BUILD_TAG "${AUTHOR}/${EVENT_TYPE}/${PULL_REQUEST_NUM}") + else() + set(BUILD_TAG "${EVENT_TYPE}/${PULL_REQUEST_NUM}") + endif() + elseif(SLUG) + # e.g. owner_name/repo_name + set(BUILD_TAG "${SLUG}") + elseif(AUTHOR) + set(BUILD_TAG "${AUTHOR}/${BRANCH}") + endif() + if(EVENT_TYPE AND NOT PULL_REQUEST_NUM) + set(BUILD_TAG "${BUILD_TAG}-${EVENT_TYPE}") + endif() +endif() + +# unnecessary +string(REPLACE "/remotes/" "/" BUILD_TAG "${BUILD_TAG}") +string(REPLACE "/origin/" "/" BUILD_TAG "${BUILD_TAG}") + +message(STATUS "BUILD_TAG: ${BUILD_TAG}") + +set(BUILD_NAME "[${BUILD_TAG}] [${BUILD_NAME}-${BUILD_TYPE}]") + +# colons in build name create extra (empty) entries in CDash +string(REPLACE ":" "-" BUILD_NAME "${BUILD_NAME}") +# unnecessary info +string(REPLACE "/merge]" "]" BUILD_NAME "${BUILD_NAME}") +# consistency +string(REPLACE "/pr/" "/pull/" BUILD_NAME "${BUILD_NAME}") +string(REPLACE "pull_request/" "pull/" BUILD_NAME "${BUILD_NAME}") +# miscellaneous from missing fields +string(REPLACE "--" "-" BUILD_NAME "${BUILD_NAME}") +string(REPLACE "-]" "]" BUILD_NAME "${BUILD_NAME}") + +# check binary directory +if(EXISTS ${BINARY_DIR}) + if(NOT IS_DIRECTORY "${BINARY_DIR}") + message(FATAL_ERROR "Error! '${BINARY_DIR}' already exists and is not a directory!") + endif() + file(GLOB BINARY_DIR_FILES "${BINARY_DIR}/*") + if(NOT "${BINARY_DIR_FILES}" STREQUAL "") + message(FATAL_ERROR "Error! '${BINARY_DIR}' already exists and is not empty!") + endif() +endif() + +get_filename_component(SOURCE_REALDIR ${SOURCE_DIR} REALPATH) +get_filename_component(BINARY_REALDIR ${BINARY_DIR} REALPATH) + +#----------------------------------------------------------------------------------------# +# +# Generate the CTestConfig.cmake +# +#----------------------------------------------------------------------------------------# + +set(CONFIG_ARGS) +foreach(_ARG ${KOKKOS_CMAKE_ARGS}) + if(NOT "${${_ARG}}" STREQUAL "") + get_property(_ARG_TYPE CACHE ${_ARG} PROPERTY TYPE) + if("${_ARG_TYPE}" STREQUAL "UNINITIALIZED") + if("${${_ARG}}" STREQUAL "ON" OR "${${_ARG}}" STREQUAL "OFF") + set(_ARG_TYPE "BOOL") + elseif(EXISTS "${${_ARG}}" AND NOT IS_DIRECTORY "${${_ARG}}") + set(_ARG_TYPE "FILEPATH") + elseif(EXISTS "${${_ARG}}" AND IS_DIRECTORY "${${_ARG}}") + set(_ARG_TYPE "PATH") + elseif(NOT "${${_ARG}}" STREQUAL "") + set(_ARG_TYPE "STRING") + endif() + endif() + set(CONFIG_ARGS "${CONFIG_ARGS}set(${_ARG} \"${${_ARG}}\" CACHE ${_ARG_TYPE} \"\")\n") + endif() +endforeach() + +file(WRITE ${BINARY_REALDIR}/initial-cache.cmake +" +set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS}\" CACHE STRING \"\") +${CONFIG_ARGS} +") + +file(READ ${BINARY_REALDIR}/initial-cache.cmake _CACHE_INFO) +message(STATUS "Initial cache:\n${_CACHE_INFO}") + +# initialize the cache +set(CONFIG_ARGS "-C ${BINARY_REALDIR}/initial-cache.cmake") + + +# generate the CTestConfig.cmake +configure_file( + ${CMAKE_CURRENT_LIST_DIR}/CTestConfig.cmake.in + ${BINARY_REALDIR}/CTestConfig.cmake + @ONLY) + +# copy/generate the dashboard script +configure_file( + ${CMAKE_CURRENT_LIST_DIR}/KokkosCTest.cmake.in + ${BINARY_REALDIR}/KokkosCTest.cmake + @ONLY) + +# custom CTest settings go in ${BINARY_DIR}/CTestCustom.cmake +execute_process( + COMMAND ${CMAKE_COMMAND} -E touch CTestCustom.cmake + WORKING_DIRECTORY ${BINARY_REALDIR} + ) + +#----------------------------------------------------------------------------------------# +# +# Execute CTest +# +#----------------------------------------------------------------------------------------# + +message(STATUS "") +message(STATUS "BUILD_NAME: ${BUILD_NAME}") +message(STATUS "Executing '${CTEST_COMMAND} -S KokkosCTest.cmake ${CTEST_ARGS}'...") +message(STATUS "") + +# e.g. -DCTEST_ARGS="--output-on-failure -VV" should really be -DCTEST_ARGS="--output-on-failure;-VV" +string(REPLACE " " ";" CTEST_ARGS "${CTEST_ARGS}") + +execute_process( + COMMAND ${CTEST_COMMAND} -S KokkosCTest.cmake ${CTEST_ARGS} + RESULT_VARIABLE RET + WORKING_DIRECTORY ${BINARY_REALDIR} + ) + +# ensure that any non-zero result variable gets propagated +if(NOT RET EQUAL 0) + message(FATAL_ERROR "CTest return non-zero exit code: ${RET}") +endif() diff --git a/packages/kokkos/cmake/KokkosCTest.cmake.in b/packages/kokkos/cmake/KokkosCTest.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..b6917f3cc1897aa6b1f0876560bb08c0c87b4c3a --- /dev/null +++ b/packages/kokkos/cmake/KokkosCTest.cmake.in @@ -0,0 +1,261 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) + +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/CTestConfig.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/CTestConfig.cmake") +endif() + +include(ProcessorCount) +ProcessorCount(CTEST_PROCESSOR_COUNT) + +cmake_policy(SET CMP0009 NEW) +cmake_policy(SET CMP0011 NEW) + +# ---------------------------------------------------------------------------- # +# -- Commands +# ---------------------------------------------------------------------------- # +find_program(CTEST_CMAKE_COMMAND NAMES cmake) +find_program(CTEST_UNAME_COMMAND NAMES uname) + +find_program(CTEST_BZR_COMMAND NAMES bzr) +find_program(CTEST_CVS_COMMAND NAMES cvs) +find_program(CTEST_GIT_COMMAND NAMES git) +find_program(CTEST_HG_COMMAND NAMES hg) +find_program(CTEST_P4_COMMAND NAMES p4) +find_program(CTEST_SVN_COMMAND NAMES svn) + +find_program(VALGRIND_COMMAND NAMES valgrind) +find_program(GCOV_COMMAND NAMES gcov) +find_program(LCOV_COMMAND NAMES llvm-cov) +find_program(MEMORYCHECK_COMMAND NAMES valgrind ) + +set(MEMORYCHECK_TYPE Valgrind) +# set(MEMORYCHECK_TYPE Purify) +# set(MEMORYCHECK_TYPE BoundsChecker) +# set(MEMORYCHECK_TYPE ThreadSanitizer) +# set(MEMORYCHECK_TYPE AddressSanitizer) +# set(MEMORYCHECK_TYPE LeakSanitizer) +# set(MEMORYCHECK_TYPE MemorySanitizer) +# set(MEMORYCHECK_TYPE UndefinedBehaviorSanitizer) +set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full") + +# ---------------------------------------------------------------------------- # +# -- Settings +# ---------------------------------------------------------------------------- # +## -- Process timeout in seconds +set(CTEST_TIMEOUT "7200") +## -- Set output to English +set(ENV{LC_MESSAGES} "en_EN" ) + + +# ---------------------------------------------------------------------------- # +# -- Copy ctest configuration file +# ---------------------------------------------------------------------------- # +macro(COPY_CTEST_CONFIG_FILES) + + foreach(_FILE CTestConfig.cmake CTestCustom.cmake) + + # if current directory is not binary or source directory + if(NOT "${CMAKE_CURRENT_LIST_DIR}" STREQUAL "${CTEST_BINARY_DIRECTORY}" AND + NOT "${CTEST_SOURCE_DIRECTORY}" STREQUAL "${CTEST_BINARY_DIRECTORY}") + + # if file exists in current directory + if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_FILE}) + configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} + ${CTEST_BINARY_DIRECTORY}/${_FILE} COPYONLY) + endif() + + # if source and binary differ + elseif(NOT "${CTEST_SOURCE_DIRECTORY}" STREQUAL "${CTEST_BINARY_DIRECTORY}") + + # if file exists in source directory but not in binary directory + if(EXISTS ${CTEST_SOURCE_DIRECTORY}/${_FILE} AND + NOT EXISTS ${CTEST_BINARY_DIRECTORY}/${_FILE}) + configure_file(${CTEST_SOURCE_DIRECTORY}/${_FILE} + ${CTEST_BINARY_DIRECTORY}/${_FILE} COPYONLY) + endif() + + endif() + endforeach() + +endmacro() + +ctest_read_custom_files("${CMAKE_CURRENT_LIST_DIR}") + +message(STATUS "CTEST_MODEL: ${CTEST_MODEL}") + +#-------------------------------------------------------------------------# +# Start +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Running START_CTEST stage...") +message(STATUS "") + +ctest_start(${CTEST_MODEL} TRACK ${CTEST_MODEL} ${APPEND_CTEST} + ${CTEST_SOURCE_DIRECTORY} ${CTEST_BINARY_DIRECTORY}) + + +#-------------------------------------------------------------------------# +# Config +# +copy_ctest_config_files() +ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}") + + +#-------------------------------------------------------------------------# +# Update +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_UPDATE stage...") +message(STATUS "") + +ctest_update(SOURCE "${CTEST_SOURCE_DIRECTORY}" + RETURN_VALUE up_ret) + + +#-------------------------------------------------------------------------# +# Configure +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_CONFIGURE stage...") +message(STATUS "") + +ctest_configure(BUILD "${CTEST_BINARY_DIRECTORY}" + SOURCE ${CTEST_SOURCE_DIRECTORY} + ${APPEND_CTEST} + OPTIONS "${CTEST_CONFIGURE_OPTIONS}" + RETURN_VALUE config_ret) + + +#-------------------------------------------------------------------------# +# Echo configure log bc Damien wants to delay merging this PR for eternity +# +file(GLOB _configure_log "${CTEST_BINARY_DIRECTORY}/Testing/Temporary/LastConfigure*.log") +# should only have one but loop just for safety +foreach(_LOG ${_configure_log}) + file(READ ${_LOG} _LOG_MESSAGE) + message(STATUS "Configure Log: ${_LOG}") + message(STATUS "\n${_LOG_MESSAGE}\n") +endforeach() + + +#-------------------------------------------------------------------------# +# Build +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_BUILD stage...") +message(STATUS "") + +ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" + ${APPEND_CTEST} + RETURN_VALUE build_ret) + + +#-------------------------------------------------------------------------# +# Echo build log bc Damien wants to delay merging this PR for eternity +# +file(GLOB _build_log "${CTEST_BINARY_DIRECTORY}/Testing/Temporary/LastBuild*.log") +# should only have one but loop just for safety +foreach(_LOG ${_build_log}) + file(READ ${_LOG} _LOG_MESSAGE) + message(STATUS "Build Log: ${_LOG}") + message(STATUS "\n${_LOG_MESSAGE}\n") +endforeach() + + +#-------------------------------------------------------------------------# +# Test +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_TEST stage...") +message(STATUS "") + +ctest_test(RETURN_VALUE test_ret + ${APPEND_CTEST} + ${START_CTEST} + ${END_CTEST} + ${STRIDE_CTEST} + ${INCLUDE_CTEST} + ${EXCLUDE_CTEST} + ${INCLUDE_LABEL_CTEST} + ${EXCLUDE_LABEL_CTEST} + ${PARALLEL_LEVEL_CTEST} + ${STOP_TIME_CTEST} + SCHEDULE_RANDOM OFF) + + +#-------------------------------------------------------------------------# +# Coverage +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_COVERAGE stage...") +message(STATUS "") + +execute_process(COMMAND ${CTEST_COVERAGE_COMMAND} ${CTEST_COVERAGE_EXTRA_FLAGS} + WORKING_DIRECTORY ${CTEST_BINARY_DIRECTORY} + ERROR_QUIET) + +ctest_coverage(${APPEND_CTEST} + ${CTEST_COVERAGE_LABELS} + RETURN_VALUE cov_ret) + + +#-------------------------------------------------------------------------# +# MemCheck +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_MEMCHECK stage...") +message(STATUS "") + +ctest_memcheck(RETURN_VALUE mem_ret + ${APPEND_CTEST} + ${START_CTEST} + ${END_CTEST} + ${STRIDE_CTEST} + ${INCLUDE_CTEST} + ${EXCLUDE_CTEST} + ${INCLUDE_LABEL_CTEST} + ${EXCLUDE_LABEL_CTEST} + ${PARALLEL_LEVEL_CTEST}) + + +#-------------------------------------------------------------------------# +# Submit +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_SUBMIT stage...") +message(STATUS "") + +file(GLOB_RECURSE NOTE_FILES "${CTEST_BINARY_DIRECTORY}/*CTestNotes.cmake") +foreach(_FILE ${NOTE_FILES}) + message(STATUS "Including CTest notes files: \"${_FILE}\"...") + include("${_FILE}") +endforeach() + +# capture submit error so it doesn't fail because of a submission error +ctest_submit(RETURN_VALUE submit_ret + RETRY_COUNT 2 + RETRY_DELAY 10 + CAPTURE_CMAKE_ERROR submit_err) + +#-------------------------------------------------------------------------# +# Submit +# +message(STATUS "") +message(STATUS "[${CTEST_BUILD_NAME}] Finished ${CTEST_MODEL} Stages (${STAGES})") +message(STATUS "") + + +#-------------------------------------------------------------------------# +# Non-zero exit codes for important errors +# +if(NOT config_ret EQUAL 0) + message(FATAL_ERROR "Error during configuration! Exit code: ${config_ret}") +endif() + +if(NOT build_ret EQUAL 0) + message(FATAL_ERROR "Error during build! Exit code: ${build_ret}") +endif() + +if(NOT test_ret EQUAL 0) + message(FATAL_ERROR "Error during testing! Exit code: ${test_ret}") +endif() diff --git a/packages/kokkos/cmake/KokkosConfig.cmake.in b/packages/kokkos/cmake/KokkosConfig.cmake.in index 9fbd22ee5c47899f5b625b852dd2858c894e2053..44a8fcd9c319326399ab19146f8cf213dbb51f64 100644 --- a/packages/kokkos/cmake/KokkosConfig.cmake.in +++ b/packages/kokkos/cmake/KokkosConfig.cmake.in @@ -19,17 +19,44 @@ INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") INCLUDE("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake") UNSET(Kokkos_CMAKE_DIR) -# if CUDA was enabled and separable compilation was specified, e.g. -# find_package(Kokkos COMPONENTS separable_compilation) -# then we set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK -IF(@Kokkos_ENABLE_CUDA@ AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) +# check for conflicts +IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND + "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) + MESSAGE(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.") + MESSAGE(STATUS "'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'") + MESSAGE(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'") +ENDIF() + +IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS) + # + # if find_package(Kokkos COMPONENTS launch_compiler) then rely on the + # RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the + # appropriate compiler for Kokkos + # + + MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos") + kokkos_compilation( + GLOBAL + CHECK_CUDA_COMPILES) + +ELSEIF(@Kokkos_ENABLE_CUDA@ AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) + # + # if CUDA was enabled, separable compilation was not specified, and current compiler + # cannot compile CUDA, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and + # kokkos_launch_compiler will re-direct to the compiler used to compile CUDA code during installation. + # kokkos_launch_compiler will re-direct if ${CMAKE_CXX_COMPILER} and -DKOKKOS_DEPENDENCE is present, + # otherwise, the original command will be executed + # + # run test to see if CMAKE_CXX_COMPILER=nvcc_wrapper kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER}) - # if not nvcc_wrapper, use RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK - IF(NOT IS_NVCC AND NOT CMAKE_CXX_COMPILER_ID STREQUAL Clang AND - (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER)) - MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to nvcc_wrapper") + + # if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF + IF(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER)) + MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos") kokkos_compilation(GLOBAL) ENDIF() - UNSET(IS_NVCC) # be mindful of the environment, pollution is bad + + # be mindful of the environment, pollution is bad + UNSET(IS_NVCC) ENDIF() diff --git a/packages/kokkos/cmake/KokkosConfigCommon.cmake.in b/packages/kokkos/cmake/KokkosConfigCommon.cmake.in index 42c755c2157f67baa3c88af05172ac450651f7e2..ab93e65afe97ab9be9295312e6cd879a1aff6b27 100644 --- a/packages/kokkos/cmake/KokkosConfigCommon.cmake.in +++ b/packages/kokkos/cmake/KokkosConfigCommon.cmake.in @@ -3,6 +3,7 @@ SET(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@) SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@) SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@) SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") +SET(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") # These are needed by KokkosKernels FOREACH(DEV ${Kokkos_DEVICES}) @@ -13,13 +14,13 @@ IF(NOT Kokkos_FIND_QUIETLY) MESSAGE(STATUS "Enabled Kokkos devices: ${Kokkos_DEVICES}") ENDIF() -IF (Kokkos_ENABLE_CUDA AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14.0") - #If we are building CUDA, we have tricked CMake because we declare a CXX project - #If the default C++ standard for a given compiler matches the requested - #standard, then CMake just omits the -std flag in later versions of CMake - #This breaks CUDA compilation (CUDA compiler can have a different default - #-std then the underlying host compiler by itself). Setting this variable - #forces CMake to always add the -std flag even if it thinks it doesn't need it +IF (Kokkos_ENABLE_CUDA) + # If we are building CUDA, we have tricked CMake because we declare a CXX project + # If the default C++ standard for a given compiler matches the requested + # standard, then CMake just omits the -std flag in later versions of CMake + # This breaks CUDA compilation (CUDA compiler can have a different default + # -std then the underlying host compiler by itself). Setting this variable + # forces CMake to always add the -std flag even if it thinks it doesn't need it SET(CMAKE_CXX_STANDARD_DEFAULT 98 CACHE INTERNAL "" FORCE) ENDIF() @@ -90,7 +91,88 @@ function(kokkos_check) endif() endfunction() -# this function is provided to easily select which files use nvcc_wrapper: +# A test to check whether a downstream project set the C++ compiler to NVCC or not +# this is called only when Kokkos was installed with Kokkos_ENABLE_CUDA=ON +FUNCTION(kokkos_compiler_is_nvcc VAR COMPILER) + # Check if the compiler is nvcc (which really means nvcc_wrapper). + EXECUTE_PROCESS(COMMAND ${COMPILER} ${ARGN} --version + OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RET) + # something went wrong + IF(RET GREATER 0) + SET(${VAR} false PARENT_SCOPE) + ELSE() + STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} ) + STRING(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "nvcc" INTERNAL_COMPILER_VERSION_CONTAINS_NVCC) + STRING(REGEX REPLACE "^ +" "" INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}") + IF(${INTERNAL_COMPILER_VERSION_CONTAINS_NVCC} GREATER -1) + SET(${VAR} true PARENT_SCOPE) + ELSE() + SET(${VAR} false PARENT_SCOPE) + ENDIF() + ENDIF() +ENDFUNCTION() + +# this function checks whether the current CXX compiler supports building CUDA +FUNCTION(kokkos_cxx_compiler_cuda_test _VAR _COMPILER) + + FILE(WRITE ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu +" +#include <cuda.h> +#include <cstdlib> + +__global__ +void kernel(int sz, double* data) +{ + int _beg = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = _beg; i < sz; ++i) + data[i] += static_cast<double>(i); +} + +int main() +{ + double* data = NULL; + int blocks = 64; + int grids = 64; + int ret = cudaMalloc(&data, blocks * grids * sizeof(double)); + if(ret != cudaSuccess) + return EXIT_FAILURE; + kernel<<<grids, blocks>>>(blocks * grids, data); + cudaDeviceSynchronize(); + return EXIT_SUCCESS; +} +") + + # save the command for debugging + SET(_COMMANDS "${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu") + + # use execute_process instead of try compile because we want to set custom compiler + EXECUTE_PROCESS(COMMAND ${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu + RESULT_VARIABLE _RET + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests + TIMEOUT 15 + OUTPUT_QUIET + ERROR_QUIET) + + IF(NOT _RET EQUAL 0) + # save the command for debugging + SET(_COMMANDS "${_COMMAND}\n${_COMPILER} --cuda-gpu-arch=sm_35 ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu") + # try the compile test again with clang arguments + EXECUTE_PROCESS(COMMAND ${_COMPILER} --cuda-gpu-arch=sm_35 -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu + RESULT_VARIABLE _RET + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests + TIMEOUT 15 + OUTPUT_QUIET + ERROR_QUIET) + ENDIF() + + SET(${_VAR}_COMMANDS "${_COMMANDS}" PARENT_SCOPE) + SET(${_VAR} ${_RET} PARENT_SCOPE) +ENDFUNCTION() + +# this function is provided to easily select which files use the same compiler as Kokkos +# when it was installed (or nvcc_wrapper): # # GLOBAL --> all files # TARGET --> all files in a target @@ -98,8 +180,21 @@ endfunction() # DIRECTORY --> all files in directory # PROJECT --> all files/targets in a project/subproject # +# Use the COMPILER argument to specify a compiler, if needed. By default, it will +# set the values to ${Kokkos_CXX_COMPILER} unless Kokkos_ENABLE_CUDA=ON and +# Kokkos_CXX_COMPILER_ID is NVIDIA, then it will set it to nvcc_wrapper +# +# Use CHECK_CUDA_COMPILES to run a check when CUDA is enabled +# FUNCTION(kokkos_compilation) - CMAKE_PARSE_ARGUMENTS(COMP "GLOBAL;PROJECT" "" "DIRECTORY;TARGET;SOURCE" ${ARGN}) + CMAKE_PARSE_ARGUMENTS(COMP + "GLOBAL;PROJECT;CHECK_CUDA_COMPILES" + "COMPILER" + "DIRECTORY;TARGET;SOURCE;COMMAND_PREFIX" + ${ARGN}) + + # if built w/o CUDA support, we want to basically make this a no-op + SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@) # search relative first and then absolute SET(_HINTS "${CMAKE_CURRENT_LIST_DIR}/../.." "@CMAKE_INSTALL_PREFIX@") @@ -115,10 +210,52 @@ FUNCTION(kokkos_compilation) MESSAGE(FATAL_ERROR "Kokkos could not find 'kokkos_launch_compiler'. Please set '-DKokkos_COMPILE_LAUNCHER=/path/to/launcher'") ENDIF() + # if COMPILER was not specified, assume Kokkos_CXX_COMPILER + IF(NOT COMP_COMPILER) + SET(COMP_COMPILER ${Kokkos_CXX_COMPILER}) + IF(_Kokkos_ENABLE_CUDA AND Kokkos_CXX_COMPILER_ID STREQUAL NVIDIA) + # find nvcc_wrapper + FIND_PROGRAM(Kokkos_NVCC_WRAPPER + NAMES nvcc_wrapper + HINTS ${_HINTS} + PATHS ${_HINTS} + PATH_SUFFIXES bin) + # fatal if we can't nvcc_wrapper + IF(NOT Kokkos_NVCC_WRAPPER) + MESSAGE(FATAL_ERROR "Kokkos could not find nvcc_wrapper. Please set '-DKokkos_NVCC_WRAPPER=/path/to/nvcc_wrapper'") + ENDIF() + SET(COMP_COMPILER ${Kokkos_NVCC_WRAPPER}) + ENDIF() + ENDIF() + + # check that the original compiler still exists! + IF(NOT EXISTS ${COMP_COMPILER}) + MESSAGE(FATAL_ERROR "Kokkos could not find original compiler: '${COMP_COMPILER}'") + ENDIF() + + # try to ensure that compiling cuda code works! + IF(_Kokkos_ENABLE_CUDA AND COMP_CHECK_CUDA_COMPILES) + + # this may fail if kokkos_compiler launcher was used during install + kokkos_cxx_compiler_cuda_test(_COMPILES_CUDA + ${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}) + + # if above failed, throw an error + IF(NOT _COMPILES_CUDA) + MESSAGE(FATAL_ERROR "kokkos_cxx_compiler_cuda_test failed! Test commands:\n${_COMPILES_CUDA_COMMANDS}") + ENDIF() + ENDIF() + + IF(COMP_COMMAND_PREFIX) + SET(_PREFIX "${COMP_COMMAND_PREFIX}") + STRING(REPLACE ";" " " _PREFIX "${COMP_COMMAND_PREFIX}") + SET(Kokkos_COMPILER_LAUNCHER "${_PREFIX} ${Kokkos_COMPILE_LAUNCHER}") + ENDIF() + IF(COMP_GLOBAL) # if global, don't bother setting others - SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER}") - SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER}") + SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}") + SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}") ELSE() FOREACH(_TYPE PROJECT DIRECTORY TARGET SOURCE) # make project/subproject scoping easy, e.g. KokkosCompilation(PROJECT) after project(...) @@ -128,34 +265,10 @@ FUNCTION(kokkos_compilation) ENDIF() # set the properties if defined IF(COMP_${_TYPE}) - # MESSAGE(STATUS "Using nvcc_wrapper :: ${_TYPE} :: ${COMP_${_TYPE}}") - SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER}") - SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER}") + # MESSAGE(STATUS "Using ${COMP_COMPILER} :: ${_TYPE} :: ${COMP_${_TYPE}}") + SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}") + SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}") ENDIF() ENDFOREACH() ENDIF() ENDFUNCTION() - -# A test to check whether a downstream project set the C++ compiler to NVCC or not -# this is called only when Kokkos was installed with Kokkos_ENABLE_CUDA=ON -FUNCTION(kokkos_compiler_is_nvcc VAR COMPILER) - # Check if the compiler is nvcc (which really means nvcc_wrapper). - EXECUTE_PROCESS(COMMAND ${COMPILER} ${ARGN} --version - OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE - RESULT_VARIABLE RET) - # something went wrong - IF(RET GREATER 0) - SET(${VAR} false PARENT_SCOPE) - ELSE() - STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} ) - STRING(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "nvcc" INTERNAL_COMPILER_VERSION_CONTAINS_NVCC) - STRING(REGEX REPLACE "^ +" "" INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}") - IF(${INTERNAL_COMPILER_VERSION_CONTAINS_NVCC} GREATER -1) - SET(${VAR} true PARENT_SCOPE) - ELSE() - SET(${VAR} false PARENT_SCOPE) - ENDIF() - ENDIF() -ENDFUNCTION() - diff --git a/packages/kokkos/cmake/KokkosCore_config.h.in b/packages/kokkos/cmake/KokkosCore_config.h.in index 0259fe69d50c3f47fa090b9b221df8253b425c5c..fbfae3711ec14573b4c3067aea4a8625d6b2ad8c 100644 --- a/packages/kokkos/cmake/KokkosCore_config.h.in +++ b/packages/kokkos/cmake/KokkosCore_config.h.in @@ -78,6 +78,7 @@ #cmakedefine KOKKOS_ARCH_POWER7 #cmakedefine KOKKOS_ARCH_POWER8 #cmakedefine KOKKOS_ARCH_POWER9 +#cmakedefine KOKKOS_ARCH_INTEL_GEN #cmakedefine KOKKOS_ARCH_KEPLER #cmakedefine KOKKOS_ARCH_KEPLER30 #cmakedefine KOKKOS_ARCH_KEPLER32 @@ -95,5 +96,8 @@ #cmakedefine KOKKOS_ARCH_VOLTA72 #cmakedefine KOKKOS_ARCH_TURING75 #cmakedefine KOKKOS_ARCH_AMPERE80 +#cmakedefine KOKKOS_ARCH_AMPERE86 #cmakedefine KOKKOS_ARCH_AMD_ZEN #cmakedefine KOKKOS_ARCH_AMD_ZEN2 + +#cmakedefine KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF diff --git a/packages/kokkos/cmake/Modules/CudaToolkit.cmake b/packages/kokkos/cmake/Modules/CudaToolkit.cmake index d620a71d369888fd5adecabde14119fbff63d6c0..eda5541f7c0633a868285190e9a4c39c275adf6b 100644 --- a/packages/kokkos/cmake/Modules/CudaToolkit.cmake +++ b/packages/kokkos/cmake/Modules/CudaToolkit.cmake @@ -481,76 +481,6 @@ if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILE unset(cuda_dir) endif() -IF(CMAKE_VERSION VERSION_LESS "3.12.0") - function(import_target_link_libraries target) - cmake_parse_arguments(HACK - "SYSTEM;INTERFACE;PUBLIC" - "" - "" - ${ARGN} - ) - get_target_property(LIBS ${target} INTERFACE_LINK_LIBRARIES) - if (LIBS) - list(APPEND LIBS ${HACK_UNPARSED_ARGUMENTS}) - else() - set(LIBS ${HACK_UNPARSED_ARGUMENTS}) - endif() - set_target_properties(${target} PROPERTIES - INTERFACE_LINK_LIBRARIES "${LIBS}") - endfunction() -ELSE() - function(import_target_link_libraries) - target_link_libraries(${ARGN}) - endfunction() -ENDIF() - -IF(CMAKE_VERSION VERSION_LESS "3.13.0") - function(import_target_link_directories target) - cmake_parse_arguments(HACK - "SYSTEM;INTERFACE;PUBLIC" - "" - "" - ${ARGN} - ) - get_target_property(LINK_LIBS ${target} INTERFACE_LINK_LIBRARIES) - if (LINK_LIBS) #could be not-found - set(LINK_LIBS_LIST ${LINK_LIBS}) - endif() - foreach(LIB ${HACK_UNPARSED_ARGUMENTS}) - list(APPEND LINK_LIBS_LIST -L${LIB}) - endforeach() - set_target_properties(${target} PROPERTIES - INTERFACE_LINK_LIBRARIES "${LINK_LIBS_LIST}") - endfunction() -ELSE() - function(import_target_link_directories) - target_link_directories(${ARGN}) - endfunction() -ENDIF() - -IF(CMAKE_VERSION VERSION_LESS "3.12.0") - function(import_target_include_directories target) - cmake_parse_arguments(HACK - "SYSTEM;INTERFACE;PUBLIC" - "" - "" - ${ARGN} - ) - get_target_property(INLUDE_DIRS ${target} INTERFACE_INCLUDE_DIRECTORIES) - if (INCLUDE_DIRS) - list(APPEND INCLUDE_DIRS ${HACK_UNPARSED_ARGUMENTS}) - else() - set(INCLUDE_DIRS ${HACK_UNPARSED_ARGUMENTS}) - endif() - set_target_properties(${target} PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${INCLUDE_DIRS}") - endfunction() -ELSE() - function(import_target_include_directories) - target_include_directories(${ARGN}) - endfunction() -ENDIF() - # Try language- or user-provided path first. if(CUDAToolkit_BIN_DIR) find_program(CUDAToolkit_NVCC_EXECUTABLE @@ -854,11 +784,11 @@ if(CUDAToolkit_FOUND) if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) add_library(CUDA::${lib_name} IMPORTED INTERFACE) - import_target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") - import_target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}") + target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}") foreach(dep ${arg_DEPS}) if(TARGET CUDA::${dep}) - import_target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep}) + target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep}) endif() endforeach() endif() @@ -866,8 +796,8 @@ if(CUDAToolkit_FOUND) if(NOT TARGET CUDA::toolkit) add_library(CUDA::toolkit IMPORTED INTERFACE) - import_target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") - import_target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") endif() _CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda) @@ -882,11 +812,11 @@ if(CUDAToolkit_FOUND) AND TARGET CUDA::cudart_static) add_library(CUDA::cudart_static_deps IMPORTED INTERFACE) - import_target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps) + target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps) if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER)) find_package(Threads REQUIRED) - import_target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) + target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) endif() if(UNIX AND NOT APPLE) @@ -896,7 +826,7 @@ if(CUDAToolkit_FOUND) if(NOT CUDAToolkit_rt_LIBRARY) message(WARNING "Could not find librt library, needed by CUDA::cudart_static") else() - import_target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY}) + target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY}) endif() endif() endif() diff --git a/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake b/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake index a1072a60c6182413768941b6f9d4537d7df74f61..8d58d96415808499dc39d44ad3600f5f5a64368e 100644 --- a/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake +++ b/packages/kokkos/cmake/Modules/FindTPLCUDA.cmake @@ -25,7 +25,7 @@ IF (TARGET CUDA::cuda_driver) SET(FOUND_CUDA_DRIVER TRUE) KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver) ELSE() - SET(FOUND_CUDA_DRIVVER FALSE) + SET(FOUND_CUDA_DRIVER FALSE) ENDIF() include(FindPackageHandleStandardArgs) diff --git a/packages/kokkos/cmake/Modules/FindTPLPTHREAD.cmake b/packages/kokkos/cmake/Modules/FindTPLPTHREAD.cmake index 1d154e29afff16479663d9c8d495f81142e5cf82..a743fca0e45290cf7ad80e3b022e7f66a34947fa 100644 --- a/packages/kokkos/cmake/Modules/FindTPLPTHREAD.cmake +++ b/packages/kokkos/cmake/Modules/FindTPLPTHREAD.cmake @@ -10,7 +10,7 @@ TRY_COMPILE(KOKKOS_HAS_PTHREAD_ARG # ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(PTHREAD DEFAULT_MSG KOKKOS_HAS_PTHREAD_ARG) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLPTHREAD DEFAULT_MSG KOKKOS_HAS_PTHREAD_ARG) #Only create the TPL if we succeed IF (KOKKOS_HAS_PTHREAD_ARG) KOKKOS_CREATE_IMPORTED_TPL(PTHREAD diff --git a/packages/kokkos/cmake/Modules/FindTPLROCM.cmake b/packages/kokkos/cmake/Modules/FindTPLROCM.cmake new file mode 100644 index 0000000000000000000000000000000000000000..512ad6ceb283dcd27f8db1dfb45f045f998d7875 --- /dev/null +++ b/packages/kokkos/cmake/Modules/FindTPLROCM.cmake @@ -0,0 +1,11 @@ +include(FindPackageHandleStandardArgs) + +FIND_LIBRARY(AMD_HIP_LIBRARY amdhip64 PATHS ENV ROCM_PATH PATH_SUFFIXES lib) +FIND_LIBRARY(HSA_RUNTIME_LIBRARY hsa-runtime64 PATHS ENV ROCM_PATH PATH_SUFFIXES lib) + +find_package_handle_standard_args(TPLROCM DEFAULT_MSG AMD_HIP_LIBRARY HSA_RUNTIME_LIBRARY) + +kokkos_create_imported_tpl(ROCM INTERFACE + LINK_LIBRARIES ${HSA_RUNTIME_LIBRARY} ${AMD_HIP_LIBRARY} + COMPILE_DEFINITIONS __HIP_ROCclr__ +) diff --git a/packages/kokkos/cmake/compile_tests/cplusplus14.cpp b/packages/kokkos/cmake/compile_tests/cplusplus14.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52ec9885ec3ed5f4e7c0871f59de3d651df33efe --- /dev/null +++ b/packages/kokkos/cmake/compile_tests/cplusplus14.cpp @@ -0,0 +1,8 @@ +#include <type_traits> + +int main() { + // _t versions of type traits were added in C++14 + std::remove_cv_t<int> i = 0; + + return i; +} diff --git a/packages/kokkos/cmake/compile_tests/cuda_compute_capability.cc b/packages/kokkos/cmake/compile_tests/cuda_compute_capability.cc index 48c01c070cb8d1db5542a4da4e4d3fbd51e008be..a26ac5af4bf2dee2c26f1ee20c6c500fe465bf9f 100644 --- a/packages/kokkos/cmake/compile_tests/cuda_compute_capability.cc +++ b/packages/kokkos/cmake/compile_tests/cuda_compute_capability.cc @@ -72,6 +72,7 @@ int main() { case 72: std::cout << "Set -DKokkos_ARCH_VOLTA72=ON ." << std::endl; break; case 75: std::cout << "Set -DKokkos_ARCH_TURING75=ON ." << std::endl; break; case 80: std::cout << "Set -DKokkos_ARCH_AMPERE80=ON ." << std::endl; break; + case 86: std::cout << "Set -DKokkos_ARCH_AMPERE86=ON ." << std::endl; break; default: std::cout << "Compute capability " << compute_capability << " is not supported" << std::endl; diff --git a/packages/kokkos/cmake/compile_tests/pthread.cpp b/packages/kokkos/cmake/compile_tests/pthread.cpp index 92310da0293704a121e265766dbe2979fc66513e..3f83bf6a5f7fe399fc4a44547792e738177facfb 100644 --- a/packages/kokkos/cmake/compile_tests/pthread.cpp +++ b/packages/kokkos/cmake/compile_tests/pthread.cpp @@ -2,7 +2,7 @@ void* kokkos_test(void* args) { return args; } -int main(void) { +int main() { pthread_t thread; /* Use NULL to avoid C++11. Some compilers do not have C++11 by default. Forcing C++11 diff --git a/packages/kokkos/cmake/fake_tribits.cmake b/packages/kokkos/cmake/fake_tribits.cmake index 2e82a462356b5520b1f3edcfec1635fc0f6f99cc..fbd6745a602caa8976958d10cf7d9b4c1fa3c471 100644 --- a/packages/kokkos/cmake/fake_tribits.cmake +++ b/packages/kokkos/cmake/fake_tribits.cmake @@ -81,10 +81,16 @@ ENDMACRO() FUNCTION(KOKKOS_ADD_TEST) if (KOKKOS_HAS_TRILINOS) CMAKE_PARSE_ARGUMENTS(TEST - "" + "SKIP_TRIBITS" "EXE;NAME;TOOL" "ARGS" ${ARGN}) + + IF(TEST_SKIP_TRIBITS) + MESSAGE(STATUS "Skipping test ${TEST_NAME} in TriBits") + RETURN() + ENDIF() + IF(TEST_EXE) SET(EXE_ROOT ${TEST_EXE}) ELSE() @@ -119,11 +125,10 @@ FUNCTION(KOKKOS_ADD_TEST) endif() else() CMAKE_PARSE_ARGUMENTS(TEST - "WILL_FAIL" + "WILL_FAIL;SKIP_TRIBITS" "FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME;TOOL" "CATEGORIES;ARGS" ${ARGN}) - SET(TESTS_ADDED) # To match Tribits, we should always be receiving # the root names of exes/libs IF(TEST_EXE) @@ -135,48 +140,27 @@ FUNCTION(KOKKOS_ADD_TEST) # These should be the full target name SET(TEST_NAME ${PACKAGE_NAME}_${TEST_NAME}) SET(EXE ${PACKAGE_NAME}_${EXE_ROOT}) - IF (TEST_ARGS) - SET(TEST_NUMBER 0) - FOREACH (ARG_STR ${TEST_ARGS}) - # This is passed as a single string blob to match TriBITS behavior - # We need this to be turned into a list - STRING(REPLACE " " ";" ARG_STR_LIST ${ARG_STR}) - IF(WIN32) - ADD_TEST(NAME ${TEST_NAME}${TEST_NUMBER} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} - COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX} ${ARG_STR_LIST}) - ELSE() - ADD_TEST(NAME ${TEST_NAME}${TEST_NUMBER} COMMAND ${EXE} ${ARG_STR_LIST}) - ENDIF() - LIST(APPEND TESTS_ADDED "${TEST_NAME}${TEST_NUMBER}") - MATH(EXPR TEST_NUMBER "${TEST_NUMBER} + 1") - ENDFOREACH() + IF(WIN32) + ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} + COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX} ${TEST_ARGS}) ELSE() - IF(WIN32) - ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} - COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX}) - ELSE() - ADD_TEST(NAME ${TEST_NAME} COMMAND ${EXE}) - ENDIF() - LIST(APPEND TESTS_ADDED "${TEST_NAME}") + ADD_TEST(NAME ${TEST_NAME} COMMAND ${EXE} ${TEST_ARGS}) + ENDIF() + IF(TEST_WILL_FAIL) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${TEST_WILL_FAIL}) + ENDIF() + IF(TEST_FAIL_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${TEST_FAIL_REGULAR_EXPRESSION}) + ENDIF() + IF(TEST_PASS_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${TEST_PASS_REGULAR_EXPRESSION}) + ENDIF() + IF(TEST_TOOL) + ADD_DEPENDENCIES(${EXE} ${TEST_TOOL}) #make sure the exe has to build the tool + SET_PROPERTY(TEST ${TEST_NAME} APPEND_STRING PROPERTY ENVIRONMENT "KOKKOS_PROFILE_LIBRARY=$<TARGET_FILE:${TEST_TOOL}>") ENDIF() - - FOREACH(TEST_NAME ${TESTS_ADDED}) - IF(TEST_WILL_FAIL) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${TEST_WILL_FAIL}) - ENDIF() - IF(TEST_FAIL_REGULAR_EXPRESSION) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${TEST_FAIL_REGULAR_EXPRESSION}) - ENDIF() - IF(TEST_PASS_REGULAR_EXPRESSION) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${TEST_PASS_REGULAR_EXPRESSION}) - ENDIF() - if(TEST_TOOL) - add_dependencies(${EXE} ${TEST_TOOL}) #make sure the exe has to build the tool - set_property(TEST ${TEST_NAME} APPEND_STRING PROPERTY ENVIRONMENT "KOKKOS_PROFILE_LIBRARY=$<TARGET_FILE:${TEST_TOOL}>") - endif() - ENDFOREACH() VERIFY_EMPTY(KOKKOS_ADD_TEST ${TEST_UNPARSED_ARGUMENTS}) - endif() + ENDIF() ENDFUNCTION() FUNCTION(KOKKOS_ADD_ADVANCED_TEST) @@ -326,14 +310,6 @@ ENDIF() ENDFUNCTION() -FUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS) - IF (KOKKOS_HAS_TRILINOS) - TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) - ELSE() - TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) - ENDIF() -ENDFUNCTION() - FUNCTION(KOKKOS_INCLUDE_DIRECTORIES) IF(KOKKOS_HAS_TRILINOS) TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) @@ -350,10 +326,6 @@ ENDIF() ENDFUNCTION() -MACRO(KOKKOS_ADD_COMPILE_OPTIONS) -ADD_COMPILE_OPTIONS(${ARGN}) -ENDMACRO() - MACRO(PRINTALL match) get_cmake_property(_variableNames VARIABLES) list (SORT _variableNames) @@ -376,4 +348,3 @@ FUNCTION(GLOBAL_APPEND VARNAME) LIST(APPEND TEMP ${ARGN}) GLOBAL_SET(${VARNAME} ${TEMP}) ENDFUNCTION() - diff --git a/packages/kokkos/cmake/kokkos_arch.cmake b/packages/kokkos/cmake/kokkos_arch.cmake index 53aaf7dccf169a4bfd0dff0e93bf619a4f1f8bee..ec18e70a36a34dbecc305f978e0d7b84c482da37 100644 --- a/packages/kokkos/cmake/kokkos_arch.cmake +++ b/packages/kokkos/cmake/kokkos_arch.cmake @@ -35,7 +35,7 @@ KOKKOS_ARCH_OPTION(ARMV80 HOST "ARMv8.0 Compatible CPU") KOKKOS_ARCH_OPTION(ARMV81 HOST "ARMv8.1 Compatible CPU") KOKKOS_ARCH_OPTION(ARMV8_THUNDERX HOST "ARMv8 Cavium ThunderX CPU") KOKKOS_ARCH_OPTION(ARMV8_THUNDERX2 HOST "ARMv8 Cavium ThunderX2 CPU") -KOKKOS_ARCH_OPTION(A64FX HOST "ARMv8.2 with SVE Suport") +KOKKOS_ARCH_OPTION(A64FX HOST "ARMv8.2 with SVE Support") KOKKOS_ARCH_OPTION(WSM HOST "Intel Westmere CPU") KOKKOS_ARCH_OPTION(SNB HOST "Intel Sandy/Ivy Bridge CPUs") KOKKOS_ARCH_OPTION(HSW HOST "Intel Haswell CPUs") @@ -60,11 +60,12 @@ KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0") KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2") KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5") KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0") +KOKKOS_ARCH_OPTION(AMPERE86 GPU "NVIDIA Ampere generation CC 8.6") KOKKOS_ARCH_OPTION(ZEN HOST "AMD Zen architecture") KOKKOS_ARCH_OPTION(ZEN2 HOST "AMD Zen2 architecture") KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900") KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906") -KOKKOS_ARCH_OPTION(VEGA908 GPU "AMD GPU") +KOKKOS_ARCH_OPTION(VEGA908 GPU "AMD GPU MI100 GFX908") KOKKOS_ARCH_OPTION(INTEL_GEN GPU "Intel GPUs Gen9+") @@ -141,8 +142,16 @@ ENDIF() #------------------------------- KOKKOS_HIP_OPTIONS --------------------------- #clear anything that might be in the cache GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS) -IF(KOKKOS_CXX_COMPILER_ID STREQUAL HIP) - SET(AMDGPU_ARCH_FLAG "--amdgpu-target") +IF(KOKKOS_ENABLE_HIP) + IF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) + SET(AMDGPU_ARCH_FLAG "--amdgpu-target") + ELSE() + SET(AMDGPU_ARCH_FLAG "--offload-arch") + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS -x hip) + IF(DEFINED ENV{ROCM_PATH}) + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS --rocm-path=$ENV{ROCM_PATH}) + ENDIF() + ENDIF() ENDIF() @@ -183,6 +192,8 @@ ENDIF() IF (KOKKOS_ARCH_A64FX) COMPILER_SPECIFIC_FLAGS( DEFAULT -march=armv8.2-a+sve + Clang -march=armv8.2-a+sve -msve-vector-bits=512 + GCC -march=armv8.2-a+sve -msve-vector-bits=512 ) ENDIF() @@ -309,7 +320,7 @@ IF (KOKKOS_ARCH_POWER8 OR KOKKOS_ARCH_POWER9) SET(KOKKOS_USE_ISA_POWERPCLE ON) ENDIF() -IF (Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) +IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) COMPILER_SPECIFIC_FLAGS( Clang -fcuda-rdc NVIDIA --relocatable-device-code=true @@ -333,8 +344,8 @@ ENDIF() #Right now we cannot get the compiler ID when cross-compiling, so just check #that HIP is enabled -IF (Kokkos_ENABLE_HIP) - IF (Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) +IF (KOKKOS_ENABLE_HIP) + IF (KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) COMPILER_SPECIFIC_FLAGS( DEFAULT -fgpu-rdc ) @@ -345,8 +356,7 @@ IF (Kokkos_ENABLE_HIP) ENDIF() ENDIF() - -IF (Kokkos_ENABLE_SYCL) +IF (KOKKOS_ENABLE_SYCL) COMPILER_SPECIFIC_FLAGS( DEFAULT -fsycl ) @@ -363,7 +373,7 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") ENDIF() SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) - IF (NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMPTARGET) + IF (NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMPTARGET AND NOT KOKKOS_ENABLE_SYCL) MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) ELSE() @@ -396,6 +406,7 @@ CHECK_CUDA_ARCH(VOLTA70 sm_70) CHECK_CUDA_ARCH(VOLTA72 sm_72) CHECK_CUDA_ARCH(TURING75 sm_75) CHECK_CUDA_ARCH(AMPERE80 sm_80) +CHECK_CUDA_ARCH(AMPERE86 sm_86) SET(AMDGPU_ARCH_ALREADY_SPECIFIED "") FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG) @@ -405,12 +416,12 @@ FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG) ENDIF() SET(AMDGPU_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) IF (NOT KOKKOS_ENABLE_HIP AND NOT KOKKOS_ENABLE_OPENMPTARGET) - MESSAGE(WARNING "Given HIP arch ${ARCH}, but Kokkos_ENABLE_AMDGPU and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") + MESSAGE(WARNING "Given AMD GPU architecture ${ARCH}, but Kokkos_ENABLE_HIP and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) ELSE() SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE) GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") - IF(KOKKOS_ENABLE_HIP) + IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") ENDIF() ENDIF() @@ -451,6 +462,24 @@ IF (KOKKOS_ENABLE_OPENMPTARGET) ENDIF() ENDIF() +IF (KOKKOS_ENABLE_SYCL) + IF(CUDA_ARCH_ALREADY_SPECIFIED) + IF(KOKKOS_ENABLE_UNSUPPORTED_ARCHS) + COMPILER_SPECIFIC_FLAGS( + DEFAULT -fsycl-targets=nvptx64-nvidia-cuda-sycldevice + ) + # FIXME_SYCL The CUDA backend doesn't support printf yet. + GLOBAL_SET(KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF ON) + ELSE() + MESSAGE(SEND_ERROR "Setting a CUDA architecture for SYCL is only allowed with Kokkos_ENABLE_UNSUPPORTED_ARCHS=ON!") + ENDIF() + ELSEIF(KOKKOS_ARCH_INTEL_GEN) + COMPILER_SPECIFIC_FLAGS( + DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device skl" + ) + ENDIF() +ENDIF() + IF(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED) # Try to autodetect the CUDA Compute Capability by asking the device SET(_BINARY_TEST_DIR ${CMAKE_CURRENT_BINARY_DIR}/cmake/compile_tests/CUDAComputeCapabilityWorkdir) @@ -464,6 +493,43 @@ IF(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED) ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_tests/cuda_compute_capability.cc COMPILE_DEFINITIONS -DSM_ONLY RUN_OUTPUT_VARIABLE _CUDA_COMPUTE_CAPABILITY) + + # if user is using kokkos_compiler_launcher, above will fail. + IF(NOT _COMPILE_RESULT OR NOT _RESULT EQUAL 0) + # check to see if CUDA is not already enabled (may happen when Kokkos is subproject) + GET_PROPERTY(_ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) + # language has to be fully enabled, just checking for CMAKE_CUDA_COMPILER isn't enough + IF(NOT "CUDA" IN_LIST _ENABLED_LANGUAGES) + # make sure the user knows that we aren't using CUDA compiler for anything else + MESSAGE(STATUS "CUDA auto-detection of architecture failed with ${CMAKE_CXX_COMPILER}. Enabling CUDA language ONLY to auto-detect architecture...") + INCLUDE(CheckLanguage) + CHECK_LANGUAGE(CUDA) + IF(CMAKE_CUDA_COMPILER) + ENABLE_LANGUAGE(CUDA) + ELSE() + MESSAGE(STATUS "CUDA language could not be enabled") + ENDIF() + ENDIF() + + # if CUDA was enabled, this will be defined + IF(CMAKE_CUDA_COMPILER) + # copy our test to .cu so cmake compiles as CUDA + CONFIGURE_FILE( + ${PROJECT_SOURCE_DIR}/cmake/compile_tests/cuda_compute_capability.cc + ${PROJECT_BINARY_DIR}/compile_tests/cuda_compute_capability.cu + COPYONLY + ) + # run test again + TRY_RUN( + _RESULT + _COMPILE_RESULT + ${_BINARY_TEST_DIR} + ${PROJECT_BINARY_DIR}/compile_tests/cuda_compute_capability.cu + COMPILE_DEFINITIONS -DSM_ONLY + RUN_OUTPUT_VARIABLE _CUDA_COMPUTE_CAPABILITY) + ENDIF() + ENDIF() + LIST(FIND KOKKOS_CUDA_ARCH_FLAGS sm_${_CUDA_COMPUTE_CAPABILITY} FLAG_INDEX) IF(_COMPILE_RESULT AND _RESULT EQUAL 0 AND NOT FLAG_INDEX EQUAL -1) MESSAGE(STATUS "Detected CUDA Compute Capability ${_CUDA_COMPUTE_CAPABILITY}") @@ -500,7 +566,7 @@ IF (KOKKOS_ENABLE_CUDA) SET(KOKKOS_ARCH_VOLTA ON) ENDIF() - IF (KOKKOS_ARCH_AMPERE80) + IF (KOKKOS_ARCH_AMPERE80 OR KOKKOS_ARCH_AMPERE86) SET(KOKKOS_ARCH_AMPERE ON) ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_compiler_id.cmake b/packages/kokkos/cmake/kokkos_compiler_id.cmake index e6600161f9fe1b205fe4b481bc1af4d91a00c3e1..4434d6928f46429ad7525c944a0c1c6c351c4cdd 100644 --- a/packages/kokkos/cmake/kokkos_compiler_id.cmake +++ b/packages/kokkos/cmake/kokkos_compiler_id.cmake @@ -27,6 +27,12 @@ IF(Kokkos_ENABLE_CUDA) PATHS ${PROJECT_SOURCE_DIR} PATH_SUFFIXES bin) + FIND_PROGRAM(Kokkos_NVCC_WRAPPER + NAMES nvcc_wrapper + HINTS ${PROJECT_SOURCE_DIR} + PATHS ${PROJECT_SOURCE_DIR} + PATH_SUFFIXES bin) + # check if compiler was set to nvcc_wrapper kokkos_internal_have_compiler_nvcc(${CMAKE_CXX_COMPILER}) # if launcher was found and nvcc_wrapper was not specified as @@ -37,7 +43,7 @@ IF(Kokkos_ENABLE_CUDA) # if the second argument matches the C++ compiler, it forwards the rest of the # args to nvcc_wrapper kokkos_internal_have_compiler_nvcc( - ${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER} -DKOKKOS_DEPENDENCE) + ${Kokkos_COMPILE_LAUNCHER} ${Kokkos_NVCC_WRAPPER} ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER} -DKOKKOS_DEPENDENCE) SET(INTERNAL_USE_COMPILER_LAUNCHER true) ENDIF() ENDIF() @@ -55,32 +61,7 @@ IF(INTERNAL_HAVE_COMPILER_NVCC) SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE) MESSAGE(STATUS "Compiler Version: ${KOKKOS_CXX_COMPILER_VERSION}") IF(INTERNAL_USE_COMPILER_LAUNCHER) - IF(Kokkos_LAUNCH_COMPILER_INFO) - GET_FILENAME_COMPONENT(BASE_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME) - # does not have STATUS intentionally - MESSAGE("") - MESSAGE("Kokkos_LAUNCH_COMPILER_INFO (${Kokkos_COMPILE_LAUNCHER}):") - MESSAGE(" - Kokkos + CUDA backend requires the C++ files to be compiled as CUDA code.") - MESSAGE(" - kokkos_launch_compiler permits CMAKE_CXX_COMPILER to be set to a traditional C++ compiler when Kokkos_ENABLE_CUDA=ON") - MESSAGE(" by prefixing all the compile and link commands with the path to the script + CMAKE_CXX_COMPILER (${CMAKE_CXX_COMPILER}).") - MESSAGE(" - If any of the compile or link commands have CMAKE_CXX_COMPILER as the first argument, it replaces CMAKE_CXX_COMPILER with nvcc_wrapper.") - MESSAGE(" - If the compile or link command is not CMAKE_CXX_COMPILER, it just executes the command.") - MESSAGE(" - If using ccache, set CMAKE_CXX_COMPILER to nvcc_wrapper explicitly.") - MESSAGE(" - kokkos_compiler_launcher is available to downstream projects as well.") - MESSAGE(" - If CMAKE_CXX_COMPILER=nvcc_wrapper, all legacy behavior will be preserved during 'find_package(Kokkos)'") - MESSAGE(" - If CMAKE_CXX_COMPILER is not nvcc_wrapper, 'find_package(Kokkos)' will apply 'kokkos_compilation(GLOBAL)' unless separable compilation is enabled") - MESSAGE(" - This can be disabled via '-DKokkos_LAUNCH_COMPILER=OFF'") - MESSAGE(" - Use 'find_package(Kokkos COMPONENTS separable_compilation)' to enable separable compilation") - MESSAGE(" - Separable compilation allows you to control the scope of where the compiler transformation behavior (${BASE_COMPILER_NAME} -> nvcc_wrapper) is applied") - MESSAGE(" - The compiler transformation can be applied on a per-project, per-directory, per-target, and/or per-source-file basis") - MESSAGE(" - 'kokkos_compilation(PROJECT)' will apply the compiler transformation to all targets in a project/subproject") - MESSAGE(" - 'kokkos_compilation(TARGET <TARGET> [<TARGETS>...])' will apply the compiler transformation to the specified target(s)") - MESSAGE(" - 'kokkos_compilation(SOURCE <SOURCE> [<SOURCES>...])' will apply the compiler transformation to the specified source file(s)") - MESSAGE(" - 'kokkos_compilation(DIRECTORY <DIR> [<DIRS>...])' will apply the compiler transformation to the specified directories") - MESSAGE("") - ELSE() - MESSAGE(STATUS "kokkos_launch_compiler (${Kokkos_COMPILE_LAUNCHER}) is enabled... Set Kokkos_LAUNCH_COMPILER_INFO=ON for more info.") - ENDIF() + MESSAGE(STATUS "kokkos_launch_compiler (${Kokkos_COMPILE_LAUNCHER}) is enabled...") kokkos_compilation(GLOBAL) ENDIF() ENDIF() @@ -92,7 +73,11 @@ IF(Kokkos_ENABLE_HIP) OUTPUT_STRIP_TRAILING_WHITESPACE) STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} ) - SET(KOKKOS_CXX_COMPILER_ID HIP CACHE STRING INTERNAL FORCE) + + STRING(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "HIP version" INTERNAL_COMPILER_VERSION_CONTAINS_HIP) + IF(INTERNAL_COMPILER_VERSION_CONTAINS_HIP GREATER -1) + SET(KOKKOS_CXX_COMPILER_ID HIPCC CACHE STRING INTERNAL FORCE) + ENDIF() STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" TEMP_CXX_COMPILER_VERSION ${INTERNAL_COMPILER_VERSION_ONE_LINE}) @@ -103,8 +88,7 @@ ENDIF() IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) # The Cray compiler reports as Clang to most versions of CMake EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version - COMMAND grep Cray - COMMAND wc -l + COMMAND grep -c Cray OUTPUT_VARIABLE INTERNAL_HAVE_CRAY_COMPILER OUTPUT_STRIP_TRAILING_WHITESPACE) IF (INTERNAL_HAVE_CRAY_COMPILER) #not actually Clang @@ -112,8 +96,7 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) ENDIF() # The clang based Intel compiler reports as Clang to most versions of CMake EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version - COMMAND grep icpx - COMMAND wc -l + COMMAND grep -c "DPC++\\|icpx" OUTPUT_VARIABLE INTERNAL_HAVE_INTEL_COMPILER OUTPUT_STRIP_TRAILING_WHITESPACE) IF (INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang @@ -174,7 +157,7 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE) -ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIP) +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 3.8.0) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() diff --git a/packages/kokkos/cmake/kokkos_corner_cases.cmake b/packages/kokkos/cmake/kokkos_corner_cases.cmake index 3962c4b16efbcf240b52e2463ea575d39b844a1d..a84ac2b63027e7112cb3a7b76e5e9a7b8fc892e3 100644 --- a/packages/kokkos/cmake/kokkos_corner_cases.cmake +++ b/packages/kokkos/cmake/kokkos_corner_cases.cmake @@ -49,11 +49,14 @@ ENDIF() IF (KOKKOS_CXX_STANDARD STREQUAL 17) IF (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7) - MESSAGE(FATAL_ERROR "You have requested c++17 support for GCC ${KOKKOS_CXX_COMPILER_VERSION}. Although CMake has allowed this and GCC accepts -std=c++1z/c++17, GCC <= 6 does not properly support *this capture. Please reduce the C++ standard to 14 or upgrade the compiler if you do need C++17 support.") + MESSAGE(FATAL_ERROR "You have requested C++17 support for GCC ${KOKKOS_CXX_COMPILER_VERSION}. Although CMake has allowed this and GCC accepts -std=c++1z/c++17, GCC < 7 does not properly support *this capture. Please reduce the C++ standard to 14 or upgrade the compiler if you do need C++17 support.") ENDIF() IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11) - MESSAGE(FATAL_ERROR "You have requested c++17 support for NVCC ${KOKKOS_CXX_COMPILER_VERSION}. NVCC only supports C++17 from version 11 on. Please reduce the C++ standard to 14 or upgrade the compiler if you need C++17 support.") + MESSAGE(FATAL_ERROR "You have requested C++17 support for NVCC ${KOKKOS_CXX_COMPILER_VERSION}. NVCC only supports C++17 from version 11 on. Please reduce the C++ standard to 14 or upgrade the compiler if you need C++17 support.") + ENDIF() + IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND KOKKOS_ENABLE_CUDA_CONSTEXPR) + MESSAGE(WARNING "You have requested -DKokkos_ENABLE_CUDA_CONSTEXPR=ON with C++17 support for NVCC ${KOKKOS_CXX_COMPILER_VERSION} which is known to trigger compiler bugs. See https://github.com/kokkos/kokkos/issues/3496") ENDIF() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_enable_devices.cmake b/packages/kokkos/cmake/kokkos_enable_devices.cmake index 41ee10a8a05c6909374be1c704b03997bb8f8618..445dad47ce561979037bf5b1622413ddda05f3b3 100644 --- a/packages/kokkos/cmake/kokkos_enable_devices.cmake +++ b/packages/kokkos/cmake/kokkos_enable_devices.cmake @@ -48,9 +48,6 @@ IF(KOKKOS_ENABLE_OPENMP) IF(KOKKOS_CLANG_IS_CRAY) SET(ClangOpenMPFlag -fopenmp) ENDIF() - IF(KOKKOS_CLANG_IS_INTEL) - SET(ClangOpenMPFlag -fiopenmp) - ENDIF() IF(KOKKOS_COMPILER_CLANG_MSVC) #for clang-cl expression /openmp yields an error, so directly add the specific Clang flag SET(ClangOpenMPFlag /clang:-fopenmp=libomp) @@ -64,6 +61,7 @@ IF(KOKKOS_ENABLE_OPENMP) COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID Clang -Xcompiler ${ClangOpenMPFlag} + IntelClang -Xcompiler -fiopenmp PGI -Xcompiler -mp Cray NO-VALUE-SPECIFIED XL -Xcompiler -qsmp=omp @@ -72,6 +70,7 @@ IF(KOKKOS_ENABLE_OPENMP) ELSE() COMPILER_SPECIFIC_FLAGS( Clang ${ClangOpenMPFlag} + IntelClang -fiopenmp AppleClang -Xpreprocessor -fopenmp PGI -mp Cray NO-VALUE-SPECIFIED @@ -152,3 +151,11 @@ IF (KOKKOS_ENABLE_HIP) ENDIF() KOKKOS_DEVICE_OPTION(SYCL OFF DEVICE "Whether to build SYCL backend") + +## SYCL has extra setup requirements, turn on Kokkos_Setup_SYCL.hpp in macros +IF (KOKKOS_ENABLE_SYCL) + IF(KOKKOS_CXX_STANDARD LESS 17) + MESSAGE(FATAL_ERROR "SYCL backend requires C++17 or newer!") + ENDIF() + LIST(APPEND DEVICE_SETUP_LIST SYCL) +ENDIF() diff --git a/packages/kokkos/cmake/kokkos_enable_options.cmake b/packages/kokkos/cmake/kokkos_enable_options.cmake index 5df498f3735484dea3e2cf39e296d59135fe2774..95bce66c7bee32f8800cbd6e0324f9d4c599c97c 100644 --- a/packages/kokkos/cmake/kokkos_enable_options.cmake +++ b/packages/kokkos/cmake/kokkos_enable_options.cmake @@ -48,6 +48,7 @@ KOKKOS_ENABLE_OPTION(COMPILER_WARNINGS OFF "Whether to print all compiler war KOKKOS_ENABLE_OPTION(PROFILING_LOAD_PRINT OFF "Whether to print information about which profiling tools got loaded") KOKKOS_ENABLE_OPTION(TUNING OFF "Whether to create bindings for tuning tools") KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops") +KOKKOS_ENABLE_OPTION(LAUNCH_COMPILER ON "Whether to potentially use the launch compiler") IF (KOKKOS_ENABLE_CUDA) SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}") @@ -68,6 +69,15 @@ ELSE() ENDIF() KOKKOS_ENABLE_OPTION(COMPLEX_ALIGN ${COMPLEX_ALIGN_DEFAULT} "Whether to align Kokkos::complex to 2*alignof(RealType)") +IF (KOKKOS_ENABLE_TESTS) + SET(HEADER_SELF_CONTAINMENT_TESTS_DEFAULT ON) +ELSE() + SET(HEADER_SELF_CONTAINMENT_TESTS_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(HEADER_SELF_CONTAINMENT_TESTS ${HEADER_SELF_CONTAINMENT_TESTS_DEFAULT} "Enable header self-containment unit tests") +IF (NOT KOKKOS_ENABLE_TESTS AND KOKKOS_ENABLE_HEADER_SELF_CONTAINMENT_TESTS) + MESSAGE(WARNING "Kokkos_ENABLE_HEADER_SELF_CONTAINMENT_TESTS is ON but Kokkos_ENABLE_TESTS is OFF. Option will be ignored.") +ENDIF() IF (KOKKOS_ENABLE_CUDA AND (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)) SET(CUDA_CONSTEXPR_DEFAULT ON) @@ -76,14 +86,14 @@ ELSE() ENDIF() KOKKOS_ENABLE_OPTION(CUDA_CONSTEXPR ${CUDA_CONSTEXPR_DEFAULT} "Whether to activate experimental relaxed constexpr functions") +Kokkos_ENABLE_OPTION(UNSUPPORTED_ARCHS OFF "Whether to allow architectures in backends Kokkos doesn't optimize for") + FUNCTION(check_device_specific_options) CMAKE_PARSE_ARGUMENTS(SOME "" "DEVICE" "OPTIONS" ${ARGN}) IF(NOT KOKKOS_ENABLE_${SOME_DEVICE}) FOREACH(OPTION ${SOME_OPTIONS}) - IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.14) - IF(NOT DEFINED CACHE{Kokkos_ENABLE_${OPTION}} OR NOT DEFINED CACHE{Kokkos_ENABLE_${SOME_DEVICE}}) - MESSAGE(FATAL_ERROR "Internal logic error: option '${OPTION}' or device '${SOME_DEVICE}' not recognized.") - ENDIF() + IF(NOT DEFINED CACHE{Kokkos_ENABLE_${OPTION}} OR NOT DEFINED CACHE{Kokkos_ENABLE_${SOME_DEVICE}}) + MESSAGE(FATAL_ERROR "Internal logic error: option '${OPTION}' or device '${SOME_DEVICE}' not recognized.") ENDIF() IF(KOKKOS_ENABLE_${OPTION}) MESSAGE(WARNING "Kokkos_ENABLE_${OPTION} is ON but ${SOME_DEVICE} backend is not enabled. Option will be ignored.") diff --git a/packages/kokkos/cmake/kokkos_functions.cmake b/packages/kokkos/cmake/kokkos_functions.cmake index 2b17d648b44b39a6fbdf1b48d8cbd26001aa9030..858322394d7aefcb9fe23f55a60863f3a8f63484 100644 --- a/packages/kokkos/cmake/kokkos_functions.cmake +++ b/packages/kokkos/cmake/kokkos_functions.cmake @@ -169,9 +169,7 @@ MACRO(kokkos_export_imported_tpl NAME) ENDIF() SET(TPL_LINK_OPTIONS) - IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13.0") - GET_TARGET_PROPERTY(TPL_LINK_OPTIONS ${NAME} INTERFACE_LINK_OPTIONS) - ENDIF() + GET_TARGET_PROPERTY(TPL_LINK_OPTIONS ${NAME} INTERFACE_LINK_OPTIONS) IF(TPL_LINK_OPTIONS) KOKKOS_APPEND_CONFIG_LINE("INTERFACE_LINK_OPTIONS ${TPL_LINK_OPTIONS}") ENDIF() @@ -230,9 +228,7 @@ MACRO(kokkos_import_tpl NAME) # I have still been getting errors about ROOT variables being ignored # I'm not sure if this is a scope issue - but make sure # the policy is set before we do any find_package calls - IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") - CMAKE_POLICY(SET CMP0074 NEW) - ENDIF() + CMAKE_POLICY(SET CMP0074 NEW) IF (KOKKOS_ENABLE_${NAME}) #Tack on a TPL here to make sure we avoid using anyone else's find @@ -314,7 +310,7 @@ MACRO(kokkos_create_imported_tpl NAME) CMAKE_PARSE_ARGUMENTS(TPL "INTERFACE" "LIBRARY" - "LINK_LIBRARIES;INCLUDES;COMPILE_OPTIONS;LINK_OPTIONS" + "LINK_LIBRARIES;INCLUDES;COMPILE_DEFINITIONS;COMPILE_OPTIONS;LINK_OPTIONS" ${ARGN}) @@ -334,6 +330,9 @@ MACRO(kokkos_create_imported_tpl NAME) IF(TPL_INCLUDES) TARGET_INCLUDE_DIRECTORIES(${NAME} INTERFACE ${TPL_INCLUDES}) ENDIF() + IF(TPL_COMPILE_DEFINITIONS) + TARGET_COMPILE_DEFINITIONS(${NAME} INTERFACE ${TPL_COMPILE_DEFINITIONS}) + ENDIF() IF(TPL_COMPILE_OPTIONS) TARGET_COMPILE_OPTIONS(${NAME} INTERFACE ${TPL_COMPILE_OPTIONS}) ENDIF() @@ -355,6 +354,10 @@ MACRO(kokkos_create_imported_tpl NAME) SET_TARGET_PROPERTIES(${NAME} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${TPL_INCLUDES}") ENDIF() + IF(TPL_COMPILE_DEFINITIONS) + SET_TARGET_PROPERTIES(${NAME} PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "${TPL_COMPILE_DEFINITIONS}") + ENDIF() IF(TPL_COMPILE_OPTIONS) SET_TARGET_PROPERTIES(${NAME} PROPERTIES INTERFACE_COMPILE_OPTIONS "${TPL_COMPILE_OPTIONS}") @@ -770,7 +773,7 @@ FUNCTION(kokkos_link_tpl TARGET) ENDFUNCTION() FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER) - SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelClang GNU HIP Fujitsu) + SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelClang GNU HIPCC Fujitsu) CMAKE_PARSE_ARGUMENTS( PARSE "LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES" @@ -926,6 +929,9 @@ ENDFUNCTION() # DIRECTORY --> all files in directory # PROJECT --> all files/targets in a project/subproject # +# NOTE: this is VERY DIFFERENT than the version in KokkosConfigCommon.cmake.in. +# This version explicitly uses nvcc_wrapper. +# FUNCTION(kokkos_compilation) # check whether the compiler already supports building CUDA KOKKOS_CXX_COMPILER_CUDA_TEST(Kokkos_CXX_COMPILER_COMPILES_CUDA) @@ -947,10 +953,21 @@ FUNCTION(kokkos_compilation) MESSAGE(FATAL_ERROR "Kokkos could not find 'kokkos_launch_compiler'. Please set '-DKokkos_COMPILE_LAUNCHER=/path/to/launcher'") ENDIF() + # find nvcc_wrapper + FIND_PROGRAM(Kokkos_NVCC_WRAPPER + NAMES nvcc_wrapper + HINTS ${PROJECT_SOURCE_DIR} + PATHS ${PROJECT_SOURCE_DIR} + PATH_SUFFIXES bin) + + IF(NOT Kokkos_COMPILE_LAUNCHER) + MESSAGE(FATAL_ERROR "Kokkos could not find 'nvcc_wrapper'. Please set '-DKokkos_COMPILE_LAUNCHER=/path/to/nvcc_wrapper'") + ENDIF() + IF(COMP_GLOBAL) # if global, don't bother setting others - SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER}") - SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER}") + SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${Kokkos_NVCC_WRAPPER} ${CMAKE_CXX_COMPILER}") + SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${Kokkos_NVCC_WRAPPER} ${CMAKE_CXX_COMPILER}") ELSE() FOREACH(_TYPE PROJECT DIRECTORY TARGET SOURCE) # make project/subproject scoping easy, e.g. KokkosCompilation(PROJECT) after project(...) @@ -961,8 +978,8 @@ FUNCTION(kokkos_compilation) # set the properties if defined IF(COMP_${_TYPE}) # MESSAGE(STATUS "Using nvcc_wrapper :: ${_TYPE} :: ${COMP_${_TYPE}}") - SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER}") - SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${CMAKE_CXX_COMPILER}") + SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${Kokkos_NVCC_WRAPPER} ${CMAKE_CXX_COMPILER}") + SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${Kokkos_NVCC_WRAPPER} ${CMAKE_CXX_COMPILER}") ENDIF() ENDFOREACH() ENDIF() diff --git a/packages/kokkos/cmake/kokkos_test_cxx_std.cmake b/packages/kokkos/cmake/kokkos_test_cxx_std.cmake index 1d7da922eb6ee931436631c648f2a1109e8bde0d..707fb000af528694780d6668f160a3fee3472a69 100644 --- a/packages/kokkos/cmake/kokkos_test_cxx_std.cmake +++ b/packages/kokkos/cmake/kokkos_test_cxx_std.cmake @@ -86,6 +86,19 @@ ELSE() MESSAGE(FATAL_ERROR "Unknown C++ standard ${KOKKOS_CXX_STANDARD} - must be 14, 17, or 20") ENDIF() +# Enforce that we can compile a simple C++14 program + +TRY_COMPILE(CAN_COMPILE_CPP14 + ${KOKKOS_TOP_BUILD_DIR}/corner_cases + ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/cplusplus14.cpp + OUTPUT_VARIABLE ERROR_MESSAGE + CXX_STANDARD 14 +) +if (NOT CAN_COMPILE_CPP14) + UNSET(CAN_COMPILE_CPP14 CACHE) #make sure CMake always re-runs this + MESSAGE(FATAL_ERROR "C++${KOKKOS_CXX_STANDARD}-compliant compiler detected, but unable to compile C++14 or later program. Verify that ${CMAKE_CXX_COMPILER_ID}:${CMAKE_CXX_COMPILER_VERSION} is set up correctly (e.g., check that correct library headers are being used).\nFailing output:\n ${ERROR_MESSAGE}") +ENDIF() +UNSET(CAN_COMPILE_CPP14 CACHE) #make sure CMake always re-runs this # Enforce that extensions are turned off for nvcc_wrapper. diff --git a/packages/kokkos/cmake/kokkos_tpls.cmake b/packages/kokkos/cmake/kokkos_tpls.cmake index b58d3696ea9a412d9f008c0ba4e03a142a9fc5fc..d8d044c9d75384a1d8d312a94708623c735d121f 100644 --- a/packages/kokkos/cmake/kokkos_tpls.cmake +++ b/packages/kokkos/cmake/kokkos_tpls.cmake @@ -1,5 +1,6 @@ KOKKOS_CFG_DEPENDS(TPLS OPTIONS) KOKKOS_CFG_DEPENDS(TPLS DEVICES) +KOKKOS_CFG_DEPENDS(TPLS COMPILER_ID) FUNCTION(KOKKOS_TPL_OPTION PKG DEFAULT) CMAKE_PARSE_ARGUMENTS(PARSED @@ -38,6 +39,12 @@ IF(KOKKOS_ENABLE_MEMKIND) ENDIF() KOKKOS_TPL_OPTION(CUDA ${Kokkos_ENABLE_CUDA} TRIBITS CUDA) KOKKOS_TPL_OPTION(LIBRT Off) +IF(KOKKOS_ENABLE_HIP AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) + SET(ROCM_DEFAULT ON) +ELSE() + SET(ROCM_DEFAULT OFF) +ENDIF() +KOKKOS_TPL_OPTION(ROCM ${ROCM_DEFAULT}) IF (WIN32) SET(LIBDL_DEFAULT Off) @@ -70,6 +77,7 @@ KOKKOS_IMPORT_TPL(LIBRT) KOKKOS_IMPORT_TPL(LIBDL) KOKKOS_IMPORT_TPL(MEMKIND) KOKKOS_IMPORT_TPL(PTHREAD INTERFACE) +KOKKOS_IMPORT_TPL(ROCM INTERFACE) #Convert list to newlines (which CMake doesn't always like in cache variables) STRING(REPLACE ";" "\n" KOKKOS_TPL_EXPORT_TEMP "${KOKKOS_TPL_EXPORTS}") diff --git a/packages/kokkos/cmake/kokkos_tribits.cmake b/packages/kokkos/cmake/kokkos_tribits.cmake index 059fb192f05153843c131a6d89fc4adf8fb202cf..afa036066afeef954c5fed457782546565b7cfa5 100644 --- a/packages/kokkos/cmake/kokkos_tribits.cmake +++ b/packages/kokkos/cmake/kokkos_tribits.cmake @@ -141,39 +141,54 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE ROOT_NAME) ENDFUNCTION() FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME) -CMAKE_PARSE_ARGUMENTS(PARSE - "" - "" - "SOURCES;CATEGORIES;ARGS" - ${ARGN}) -VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE_AND_TEST ${PARSE_UNPARSED_ARGUMENTS}) - -IF (KOKKOS_HAS_TRILINOS) - IF(DEFINED PARSE_ARGS) - STRING(REPLACE ";" " " PARSE_ARGS "${PARSE_ARGS}") - ENDIF() - TRIBITS_ADD_EXECUTABLE_AND_TEST( - ${ROOT_NAME} - SOURCES ${PARSE_SOURCES} - TESTONLYLIBS kokkos_gtest - NUM_MPI_PROCS 1 - COMM serial mpi - ARGS ${PARSE_ARGS} - CATEGORIES ${PARSE_CATEGORIES} - SOURCES ${PARSE_SOURCES} - FAIL_REGULAR_EXPRESSION " FAILED " - ARGS ${PARSE_ARGS} - ) -ELSE() - KOKKOS_ADD_TEST_EXECUTABLE(${ROOT_NAME} - SOURCES ${PARSE_SOURCES} - ) - KOKKOS_ADD_TEST(NAME ${ROOT_NAME} - EXE ${ROOT_NAME} - FAIL_REGULAR_EXPRESSION " FAILED " - ARGS ${PARSE_ARGS} - ) -ENDIF() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "SOURCES;CATEGORIES;ARGS" + ${ARGN}) + VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE_AND_TEST ${PARSE_UNPARSED_ARGUMENTS}) + + IF (KOKKOS_HAS_TRILINOS) + IF(DEFINED PARSE_ARGS) + STRING(REPLACE ";" " " PARSE_ARGS "${PARSE_ARGS}") + ENDIF() + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ROOT_NAME} + SOURCES ${PARSE_SOURCES} + TESTONLYLIBS kokkos_gtest + NUM_MPI_PROCS 1 + COMM serial mpi + ARGS ${PARSE_ARGS} + CATEGORIES ${PARSE_CATEGORIES} + SOURCES ${PARSE_SOURCES} + FAIL_REGULAR_EXPRESSION " FAILED " + ARGS ${PARSE_ARGS} + ) + ELSE() + KOKKOS_ADD_TEST_EXECUTABLE(${ROOT_NAME} + SOURCES ${PARSE_SOURCES} + ) + IF (PARSE_ARGS) + SET(TEST_NUMBER 0) + FOREACH (ARG_STR ${PARSE_ARGS}) + # This is passed as a single string blob to match TriBITS behavior + # We need this to be turned into a list + STRING(REPLACE " " ";" ARG_STR_LIST ${ARG_STR}) + LIST(APPEND TEST_NAME "${ROOT_NAME}${TEST_NUMBER}") + MATH(EXPR TEST_NUMBER "${TEST_NUMBER} + 1") + KOKKOS_ADD_TEST(NAME ${TEST_NAME} + EXE ${ROOT_NAME} + FAIL_REGULAR_EXPRESSION " FAILED " + ARGS ${ARG_STR_LIST} + ) + ENDFOREACH() + ELSE() + KOKKOS_ADD_TEST(NAME ${ROOT_NAME} + EXE ${ROOT_NAME} + FAIL_REGULAR_EXPRESSION " FAILED " + ) + ENDIF() + ENDIF() ENDFUNCTION() FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME) @@ -301,11 +316,26 @@ ENDMACRO() ## Includes generated header files, scripts such as nvcc_wrapper and hpcbind, ## as well as other files provided through plugins. MACRO(KOKKOS_INSTALL_ADDITIONAL_FILES) - # kokkos_launch_compiler is used by Kokkos to prefix compiler commands so that they forward to nvcc_wrapper + + # kokkos_launch_compiler is used by Kokkos to prefix compiler commands so that they forward to original kokkos compiler + # if nvcc_wrapper was not used as CMAKE_CXX_COMPILER, configure the original compiler into kokkos_launch_compiler + IF(NOT "${CMAKE_CXX_COMPILER}" MATCHES "nvcc_wrapper") + SET(NVCC_WRAPPER_DEFAULT_COMPILER "${CMAKE_CXX_COMPILER}") + ELSE() + IF(NOT "$ENV{NVCC_WRAPPER_DEFAULT_COMPILER}" STREQUAL "") + SET(NVCC_WRAPPER_DEFAULT_COMPILER "$ENV{NVCC_WRAPPER_DEFAULT_COMPILER}") + ENDIF() + ENDIF() + + CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/bin/kokkos_launch_compiler + ${PROJECT_BINARY_DIR}/temp/kokkos_launch_compiler + @ONLY) + INSTALL(PROGRAMS "${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper" "${CMAKE_CURRENT_SOURCE_DIR}/bin/hpcbind" "${CMAKE_CURRENT_SOURCE_DIR}/bin/kokkos_launch_compiler" + "${PROJECT_BINARY_DIR}/temp/kokkos_launch_compiler" DESTINATION ${CMAKE_INSTALL_BINDIR}) INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" @@ -313,7 +343,7 @@ MACRO(KOKKOS_INSTALL_ADDITIONAL_FILES) "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_SetupBackend.hpp" "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_DeclareBackend.hpp" "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_PostInclude.hpp" - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + DESTINATION ${KOKKOS_HEADER_DIR}) ENDMACRO() FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) @@ -330,24 +360,12 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) ${LIBRARY_NAME} PUBLIC $<$<LINK_LANGUAGE:CXX>:${KOKKOS_LINK_OPTIONS}> ) - ELSEIF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13") + ELSE() #I can use link options #just assume CXX linkage TARGET_LINK_OPTIONS( ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} ) - ELSE() - #assume CXX linkage, we have no good way to check otherwise - IF (PARSE_PLAIN_STYLE) - TARGET_LINK_LIBRARIES( - ${LIBRARY_NAME} ${KOKKOS_LINK_OPTIONS} - ) - ELSE() - #well, have to do it the wrong way for now - TARGET_LINK_LIBRARIES( - ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} - ) - ENDIF() ENDIF() TARGET_COMPILE_OPTIONS( @@ -448,6 +466,13 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) ${PARSE_SOURCES} ) + IF(PARSE_SHARED OR BUILD_SHARED_LIBS) + SET_TARGET_PROPERTIES(${LIBRARY_NAME} PROPERTIES + VERSION ${Kokkos_VERSION} + SOVERSION ${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR} + ) + ENDIF() + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) #In case we are building in-tree, add an alias name diff --git a/packages/kokkos/containers/src/CMakeLists.txt b/packages/kokkos/containers/src/CMakeLists.txt index 7000624b6bcfca69bf2bae30bdae7d971a067a63..98655896d4f351418fc60e5330cd194fa2358d0e 100644 --- a/packages/kokkos/containers/src/CMakeLists.txt +++ b/packages/kokkos/containers/src/CMakeLists.txt @@ -26,8 +26,6 @@ KOKKOS_ADD_LIBRARY( HEADERS ${KOKKOS_CONTAINER_HEADERS} ) -SET_TARGET_PROPERTIES(kokkoscontainers PROPERTIES VERSION ${Kokkos_VERSION}) - KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscontainers ${KOKKOS_TOP_BUILD_DIR} ${CMAKE_CURRENT_BINARY_DIR} @@ -36,4 +34,3 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscontainers KOKKOS_LINK_INTERNAL_LIBRARY(kokkoscontainers kokkoscore) #----------------------------------------------------------------------------- - diff --git a/packages/kokkos/containers/src/Kokkos_DualView.hpp b/packages/kokkos/containers/src/Kokkos_DualView.hpp index 689f0eb2ed4e14597ce22d284060fd9b5576eb18..45710d1f737ca14348dd79d698bbc4a581225bbb 100644 --- a/packages/kokkos/containers/src/Kokkos_DualView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DualView.hpp @@ -91,6 +91,25 @@ namespace Kokkos { * behavior. Please see the documentation of Kokkos::View for * examples. The default suffices for most users. */ + +namespace Impl { + +#ifdef KOKKOS_ENABLE_CUDA + +inline const Kokkos::Cuda& get_cuda_space(const Kokkos::Cuda& in) { return in; } + +inline const Kokkos::Cuda& get_cuda_space() { + return *Kokkos::Impl::cuda_get_deep_copy_space(); +} + +template <typename NonCudaExecSpace> +inline const Kokkos::Cuda& get_cuda_space(const NonCudaExecSpace&) { + return get_cuda_space(); +} + +#endif // KOKKOS_ENABLE_CUDA + +} // namespace Impl template <class DataType, class Arg1Type = void, class Arg2Type = void, class Arg3Type = void> class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { @@ -295,6 +314,53 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { "DualView constructed with incompatible views"); } } + // does the DualView have only one device + struct impl_dualview_is_single_device { + enum : bool { + value = std::is_same<typename t_dev::device_type, + typename t_host::device_type>::value + }; + }; + + // does the given device match the device of t_dev? + template <typename Device> + struct impl_device_matches_tdev_device { + enum : bool { + value = std::is_same<typename t_dev::device_type, Device>::value + }; + }; + // does the given device match the device of t_host? + template <typename Device> + struct impl_device_matches_thost_device { + enum : bool { + value = std::is_same<typename t_host::device_type, Device>::value + }; + }; + + // does the given device match the execution space of t_host? + template <typename Device> + struct impl_device_matches_thost_exec { + enum : bool { + value = std::is_same<typename t_host::execution_space, Device>::value + }; + }; + + // does the given device match the execution space of t_dev? + template <typename Device> + struct impl_device_matches_tdev_exec { + enum : bool { + value = std::is_same<typename t_dev::execution_space, Device>::value + }; + }; + + // does the given device's memory space match the memory space of t_dev? + template <typename Device> + struct impl_device_matches_tdev_memory_space { + enum : bool { + value = std::is_same<typename t_dev::memory_space, + typename Device::memory_space>::value + }; + }; //@} //! \name Methods for synchronizing, marking as modified, and getting Views. @@ -302,7 +368,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { /// \brief Return a View on a specific device \c Device. /// - /// Please don't be afraid of the if_c expression in the return + /// Please don't be afraid of the nested if_c expressions in the return /// value's type. That just tells the method what the return type /// should be: t_dev if the \c Device template parameter matches /// this DualView's device type, else t_host. @@ -323,10 +389,17 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { /// typename dual_view_type::t_host hostView = DV.view<host_device_type> (); /// \endcode template <class Device> - KOKKOS_INLINE_FUNCTION const typename Impl::if_c< - std::is_same<typename t_dev::memory_space, - typename Device::memory_space>::value, - t_dev, t_host>::type& + KOKKOS_INLINE_FUNCTION const typename std::conditional_t< + impl_device_matches_tdev_device<Device>::value, t_dev, + typename std::conditional_t< + impl_device_matches_thost_device<Device>::value, t_host, + typename std::conditional_t< + impl_device_matches_thost_exec<Device>::value, t_host, + typename std::conditional_t< + impl_device_matches_tdev_exec<Device>::value, t_dev, + typename std::conditional_t< + impl_device_matches_tdev_memory_space<Device>::value, + t_dev, t_host> > > > > view() const { constexpr bool device_is_memspace = std::is_same<Device, typename Device::memory_space>::value; @@ -463,6 +536,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { true); } } + /// \brief Update data on device or host only if data in the other /// space has been marked as modified. /// @@ -480,12 +554,9 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { /// the data in either View. You must manually mark modified data /// as modified, by calling the modify() method with the /// appropriate template parameter. - template <class Device> - void sync(const typename std::enable_if< - (std::is_same<typename traits::data_type, - typename traits::non_const_data_type>::value) || - (std::is_same<Device, int>::value), - int>::type& = 0) { + // deliberately passing args by cref as they're used multiple times + template <class Device, class... Args> + void sync_impl(std::true_type, Args const&... args) { if (modified_flags.data() == nullptr) return; int dev = get_device_side<Device>(); @@ -497,12 +568,12 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { Kokkos::CudaUVMSpace>::value) { if (d_view.data() == h_view.data()) Kokkos::Impl::cuda_prefetch_pointer( - Kokkos::Cuda(), d_view.data(), + Impl::get_cuda_space(args...), d_view.data(), sizeof(typename t_dev::value_type) * d_view.span(), true); } #endif - deep_copy(d_view, h_view); + deep_copy(args..., d_view, h_view); modified_flags(0) = modified_flags(1) = 0; impl_report_device_sync(); } @@ -514,12 +585,12 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { Kokkos::CudaUVMSpace>::value) { if (d_view.data() == h_view.data()) Kokkos::Impl::cuda_prefetch_pointer( - Kokkos::Cuda(), d_view.data(), + Impl::get_cuda_space(args...), d_view.data(), sizeof(typename t_dev::value_type) * d_view.span(), false); } #endif - deep_copy(h_view, d_view); + deep_copy(args..., h_view, d_view); modified_flags(0) = modified_flags(1) = 0; impl_report_host_sync(); } @@ -533,10 +604,26 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { template <class Device> void sync(const typename std::enable_if< - (!std::is_same<typename traits::data_type, - typename traits::non_const_data_type>::value) || + (std::is_same<typename traits::data_type, + typename traits::non_const_data_type>::value) || (std::is_same<Device, int>::value), int>::type& = 0) { + sync_impl<Device>(std::true_type{}); + } + + template <class Device, class ExecutionSpace> + void sync(const ExecutionSpace& exec, + const typename std::enable_if< + (std::is_same<typename traits::data_type, + typename traits::non_const_data_type>::value) || + (std::is_same<Device, int>::value), + int>::type& = 0) { + sync_impl<Device>(std::true_type{}, exec); + } + + // deliberately passing args by cref as they're used multiple times + template <class Device, class... Args> + void sync_impl(std::false_type, Args const&...) { if (modified_flags.data() == nullptr) return; int dev = get_device_side<Device>(); @@ -557,7 +644,27 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { } } - void sync_host() { + template <class Device> + void sync(const typename std::enable_if< + (!std::is_same<typename traits::data_type, + typename traits::non_const_data_type>::value) || + (std::is_same<Device, int>::value), + int>::type& = 0) { + sync_impl<Device>(std::false_type{}); + } + template <class Device, class ExecutionSpace> + void sync(const ExecutionSpace& exec, + const typename std::enable_if< + (!std::is_same<typename traits::data_type, + typename traits::non_const_data_type>::value) || + (std::is_same<Device, int>::value), + int>::type& = 0) { + sync_impl<Device>(std::false_type{}, exec); + } + + // deliberately passing args by cref as they're used multiple times + template <typename... Args> + void sync_host_impl(Args const&... args) { if (!std::is_same<typename traits::data_type, typename traits::non_const_data_type>::value) Impl::throw_runtime_exception( @@ -569,18 +676,26 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { Kokkos::CudaUVMSpace>::value) { if (d_view.data() == h_view.data()) Kokkos::Impl::cuda_prefetch_pointer( - Kokkos::Cuda(), d_view.data(), + Impl::get_cuda_space(args...), d_view.data(), sizeof(typename t_dev::value_type) * d_view.span(), false); } #endif - deep_copy(h_view, d_view); + deep_copy(args..., h_view, d_view); modified_flags(1) = modified_flags(0) = 0; impl_report_host_sync(); } } - void sync_device() { + template <class ExecSpace> + void sync_host(const ExecSpace& exec) { + sync_host_impl(exec); + } + void sync_host() { sync_host_impl(); } + + // deliberately passing args by cref as they're used multiple times + template <typename... Args> + void sync_device_impl(Args const&... args) { if (!std::is_same<typename traits::data_type, typename traits::non_const_data_type>::value) Impl::throw_runtime_exception( @@ -592,17 +707,23 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { Kokkos::CudaUVMSpace>::value) { if (d_view.data() == h_view.data()) Kokkos::Impl::cuda_prefetch_pointer( - Kokkos::Cuda(), d_view.data(), + Impl::get_cuda_space(args...), d_view.data(), sizeof(typename t_dev::value_type) * d_view.span(), true); } #endif - deep_copy(d_view, h_view); + deep_copy(args..., d_view, h_view); modified_flags(1) = modified_flags(0) = 0; impl_report_device_sync(); } } + template <class ExecSpace> + void sync_device(const ExecSpace& exec) { + sync_device_impl(exec); + } + void sync_device() { sync_device_impl(); } + template <class Device> bool need_sync() const { if (modified_flags.data() == nullptr) return false; @@ -658,6 +779,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { template <class Device> void modify() { if (modified_flags.data() == nullptr) return; + if (impl_dualview_is_single_device::value) return; int dev = get_device_side<Device>(); if (dev == 1) { // if Device is the same as DualView's device type @@ -690,6 +812,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { } inline void modify_host() { + if (impl_dualview_is_single_device::value) return; if (modified_flags.data() != nullptr) { modified_flags(0) = (modified_flags(1) > modified_flags(0) ? modified_flags(1) @@ -710,6 +833,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> { } inline void modify_device() { + if (impl_dualview_is_single_device::value) return; if (modified_flags.data() != nullptr) { modified_flags(1) = (modified_flags(1) > modified_flags(0) ? modified_flags(1) diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp index c66d7a5f36caabc18c4559e85855529dbfae15b6..c6323fef93694de1ee39d5784141bf6991f78bd7 100644 --- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -245,13 +245,10 @@ KOKKOS_INLINE_FUNCTION bool dyn_rank_view_verify_operator_bounds( return (size_t(i) < map.extent(R)) && dyn_rank_view_verify_operator_bounds<R + 1>(rank, map, args...); } else if (i != 0) { - // FIXME_SYCL SYCL doesn't allow printf in kernels -#ifndef KOKKOS_ENABLE_SYCL - printf( + KOKKOS_IMPL_DO_NOT_USE_PRINTF( "DynRankView Debug Bounds Checking Error: at rank %u\n Extra " "arguments beyond the rank must be zero \n", R); -#endif return (false) && dyn_rank_view_verify_operator_bounds<R + 1>(rank, map, args...); } else { @@ -575,37 +572,22 @@ class DynRankView : public ViewTraits<DataType, Properties...> { (is_layout_left || is_layout_right || is_layout_stride) }; - template <class Space, bool = Kokkos::Impl::MemorySpaceAccess< - Space, typename traits::memory_space>::accessible> - struct verify_space { - KOKKOS_FORCEINLINE_FUNCTION static void check() {} - }; - - template <class Space> - struct verify_space<Space, false> { - KOKKOS_FORCEINLINE_FUNCTION static void check() { - Kokkos::abort( - "Kokkos::DynRankView ERROR: attempt to access inaccessible memory " - "space"); - }; - }; - // Bounds checking macros #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) // rank of the calling operator - included as first argument in ARG -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ - DynRankView::template verify_space< \ - Kokkos::Impl::ActiveExecutionMemorySpace>::check(); \ - Kokkos::Impl::dyn_rank_view_verify_operator_bounds< \ - typename traits::memory_space> \ +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ + Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \ + typename traits::memory_space>::check(); \ + Kokkos::Impl::dyn_rank_view_verify_operator_bounds< \ + typename traits::memory_space> \ ARG; #else -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ - DynRankView::template verify_space< \ - Kokkos::Impl::ActiveExecutionMemorySpace>::check(); +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ + Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \ + typename traits::memory_space>::check(); #endif diff --git a/packages/kokkos/containers/src/Kokkos_DynamicView.hpp b/packages/kokkos/containers/src/Kokkos_DynamicView.hpp index 06bd5566619926b5bb4c6f55e8a3166f90dcdb4b..cc949d4c556ab4abd982ea5334fee870c42ef305 100644 --- a/packages/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -76,6 +76,12 @@ struct ChunkArraySpace<Kokkos::Experimental::HIPSpace> { using memory_space = typename Kokkos::Experimental::HIPHostPinnedSpace; }; #endif +#ifdef KOKKOS_ENABLE_SYCL +template <> +struct ChunkArraySpace<Kokkos::Experimental::SYCLDeviceUSMSpace> { + using memory_space = typename Kokkos::Experimental::SYCLSharedUSMSpace; +}; +#endif } // end namespace Impl /** \brief Dynamic views are restricted to rank-one and no layout. diff --git a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp index 4fd084338ed731213d12792aca31826fcd89e75e..0f21a08ba3ba86ed176dc4c4535ef76c960e90bc 100644 --- a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp +++ b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp @@ -377,34 +377,20 @@ class OffsetView : public ViewTraits<DataType, Properties...> { std::is_same<typename traits::specialize, void>::value && (is_layout_left || is_layout_right || is_layout_stride); - template <class Space, bool = Kokkos::Impl::MemorySpaceAccess< - Space, typename traits::memory_space>::accessible> - struct verify_space { - KOKKOS_FORCEINLINE_FUNCTION static void check() {} - }; - - template <class Space> - struct verify_space<Space, false> { - KOKKOS_FORCEINLINE_FUNCTION static void check() { - Kokkos::abort( - "Kokkos::View ERROR: attempt to access inaccessible memory space"); - }; - }; - #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) -#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY(ARG) \ - OffsetView::template verify_space< \ - Kokkos::Impl::ActiveExecutionMemorySpace>::check(); \ - Kokkos::Experimental::Impl::offsetview_verify_operator_bounds< \ - typename traits::memory_space> \ +#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY(ARG) \ + Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \ + typename traits::memory_space>::check(); \ + Kokkos::Experimental::Impl::offsetview_verify_operator_bounds< \ + typename traits::memory_space> \ ARG; #else -#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY(ARG) \ - OffsetView::template verify_space< \ - Kokkos::Impl::ActiveExecutionMemorySpace>::check(); +#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY(ARG) \ + Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \ + typename traits::memory_space>::check(); #endif public: diff --git a/packages/kokkos/containers/src/Kokkos_ScatterView.hpp b/packages/kokkos/containers/src/Kokkos_ScatterView.hpp index 5e18f5a80eaba9ab4227bc648a7548d4bcb9802a..dcd4cf73e5d710bc427772a8a8de6384e80c9dae 100644 --- a/packages/kokkos/containers/src/Kokkos_ScatterView.hpp +++ b/packages/kokkos/containers/src/Kokkos_ScatterView.hpp @@ -649,13 +649,13 @@ struct ReduceDuplicatesBase { size_t stride; size_t start; size_t n; - ReduceDuplicatesBase(ValueType const* src_in, ValueType* dest_in, - size_t stride_in, size_t start_in, size_t n_in, - std::string const& name) + ReduceDuplicatesBase(ExecSpace const& exec_space, ValueType const* src_in, + ValueType* dest_in, size_t stride_in, size_t start_in, + size_t n_in, std::string const& name) : src(src_in), dst(dest_in), stride(stride_in), start(start_in), n(n_in) { parallel_for( std::string("Kokkos::ScatterView::ReduceDuplicates [") + name + "]", - RangePolicy<ExecSpace, size_t>(0, stride), + RangePolicy<ExecSpace, size_t>(exec_space, 0, stride), static_cast<Derived const&>(*this)); } }; @@ -667,9 +667,10 @@ template <typename ExecSpace, typename ValueType, typename Op> struct ReduceDuplicates : public ReduceDuplicatesBase<ExecSpace, ValueType, Op> { using Base = ReduceDuplicatesBase<ExecSpace, ValueType, Op>; - ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in, - size_t start_in, size_t n_in, std::string const& name) - : Base(src_in, dst_in, stride_in, start_in, n_in, name) {} + ReduceDuplicates(ExecSpace const& exec_space, ValueType const* src_in, + ValueType* dst_in, size_t stride_in, size_t start_in, + size_t n_in, std::string const& name) + : Base(exec_space, src_in, dst_in, stride_in, start_in, n_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { for (size_t j = Base::start; j < Base::n; ++j) { ScatterValue<ValueType, Op, ExecSpace, @@ -687,12 +688,12 @@ template <typename ExecSpace, typename ValueType, typename Op> struct ResetDuplicatesBase { using Derived = ResetDuplicates<ExecSpace, ValueType, Op>; ValueType* data; - ResetDuplicatesBase(ValueType* data_in, size_t size_in, - std::string const& name) + ResetDuplicatesBase(ExecSpace const& exec_space, ValueType* data_in, + size_t size_in, std::string const& name) : data(data_in) { parallel_for( std::string("Kokkos::ScatterView::ResetDuplicates [") + name + "]", - RangePolicy<ExecSpace, size_t>(0, size_in), + RangePolicy<ExecSpace, size_t>(exec_space, 0, size_in), static_cast<Derived const&>(*this)); } }; @@ -703,8 +704,9 @@ struct ResetDuplicatesBase { template <typename ExecSpace, typename ValueType, typename Op> struct ResetDuplicates : public ResetDuplicatesBase<ExecSpace, ValueType, Op> { using Base = ResetDuplicatesBase<ExecSpace, ValueType, Op>; - ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name) - : Base(data_in, size_in, name) {} + ResetDuplicates(ExecSpace const& exec_space, ValueType* data_in, + size_t size_in, std::string const& name) + : Base(exec_space, data_in, size_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { ScatterValue<ValueType, Op, ExecSpace, Kokkos::Experimental::ScatterNonAtomic> @@ -713,6 +715,16 @@ struct ResetDuplicates : public ResetDuplicatesBase<ExecSpace, ValueType, Op> { } }; +template <typename... P> +void check_scatter_view_allocation_properties_argument( + ViewCtorProp<P...> const&) { + static_assert(ViewCtorProp<P...>::has_execution_space && + ViewCtorProp<P...>::has_label && + ViewCtorProp<P...>::initialize, + "Allocation property must have an execution name as well as a " + "label, and must perform the view initialization"); +} + } // namespace Experimental } // namespace Impl } // namespace Kokkos @@ -762,10 +774,26 @@ class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated, ScatterView(View<RT, RP...> const& original_view) : internal_view(original_view) {} + template <typename RT, typename... P, typename... RP> + ScatterView(execution_space const& /* exec_space */, + View<RT, RP...> const& original_view) + : internal_view(original_view) {} + template <typename... Dims> ScatterView(std::string const& name, Dims... dims) : internal_view(name, dims...) {} + // This overload allows specifying an execution space instance to be + // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as + // first argument. + template <typename... P, typename... Dims> + ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, Dims... dims) + : internal_view(arg_prop, dims...) { + using ::Kokkos::Impl::Experimental:: + check_scatter_view_allocation_properties_argument; + check_scatter_view_allocation_properties_argument(arg_prop); + } + template <typename OtherDataType, typename OtherDeviceType> KOKKOS_FUNCTION ScatterView( const ScatterView<OtherDataType, Layout, OtherDeviceType, Op, @@ -796,27 +824,41 @@ class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated, template <typename DT, typename... RP> void contribute_into(View<DT, RP...> const& dest) const { + contribute_into(execution_space(), dest); + } + + template <typename DT, typename... RP> + void contribute_into(execution_space const& exec_space, + View<DT, RP...> const& dest) const { using dest_type = View<DT, RP...>; static_assert(std::is_same<typename dest_type::array_layout, Layout>::value, "ScatterView contribute destination has different layout"); static_assert( - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< - memory_space, typename dest_type::memory_space>::value, + Kokkos::Impl::SpaceAccessibility< + execution_space, typename dest_type::memory_space>::accessible, "ScatterView contribute destination memory space not accessible"); if (dest.data() == internal_view.data()) return; Kokkos::Impl::Experimental::ReduceDuplicates<execution_space, original_value_type, Op>( - internal_view.data(), dest.data(), 0, 0, 1, internal_view.label()); + exec_space, internal_view.data(), dest.data(), 0, 0, 1, + internal_view.label()); } - void reset() { + void reset(execution_space const& exec_space = execution_space()) { Kokkos::Impl::Experimental::ResetDuplicates<execution_space, original_value_type, Op>( - internal_view.data(), internal_view.size(), internal_view.label()); + exec_space, internal_view.data(), internal_view.size(), + internal_view.label()); } template <typename DT, typename... RP> void reset_except(View<DT, RP...> const& view) { - if (view.data() != internal_view.data()) reset(); + reset_except(execution_space(), view); + } + + template <typename DT, typename... RP> + void reset_except(const execution_space& exec_space, + View<DT, RP...> const& view) { + if (view.data() != internal_view.data()) reset(exec_space); } void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, @@ -928,10 +970,16 @@ class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op, template <typename RT, typename... RP> ScatterView(View<RT, RP...> const& original_view) + : ScatterView(execution_space(), original_view) {} + + template <typename RT, typename... P, typename... RP> + ScatterView(execution_space const& exec_space, + View<RT, RP...> const& original_view) : unique_token(), internal_view( view_alloc(WithoutInitializing, - std::string("duplicated_") + original_view.label()), + std::string("duplicated_") + original_view.label(), + exec_space), unique_token.size(), original_view.rank_dynamic > 0 ? original_view.extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -949,14 +997,32 @@ class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op, : KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - reset(); + reset(exec_space); } template <typename... Dims> ScatterView(std::string const& name, Dims... dims) - : internal_view(view_alloc(WithoutInitializing, name), + : ScatterView(view_alloc(execution_space(), name), dims...) {} + + // This overload allows specifying an execution space instance to be + // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as + // first argument. + template <typename... P, typename... Dims> + ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, Dims... dims) + : internal_view(view_alloc(WithoutInitializing, + static_cast<::Kokkos::Impl::ViewCtorProp< + void, std::string> const&>(arg_prop) + .value), unique_token.size(), dims...) { - reset(); + using ::Kokkos::Impl::Experimental:: + check_scatter_view_allocation_properties_argument; + check_scatter_view_allocation_properties_argument(arg_prop); + + auto const exec_space = + static_cast<::Kokkos::Impl::ViewCtorProp<void, execution_space> const&>( + arg_prop) + .value; + reset(exec_space); } template <typename OverrideContribution = Contribution> @@ -984,37 +1050,51 @@ class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op, template <typename DT, typename... RP> void contribute_into(View<DT, RP...> const& dest) const { + contribute_into(execution_space(), dest); + } + + template <typename DT, typename... RP> + void contribute_into(execution_space const& exec_space, + View<DT, RP...> const& dest) const { using dest_type = View<DT, RP...>; static_assert(std::is_same<typename dest_type::array_layout, Kokkos::LayoutRight>::value, "ScatterView deep_copy destination has different layout"); static_assert( - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< - memory_space, typename dest_type::memory_space>::value, + Kokkos::Impl::SpaceAccessibility< + execution_space, typename dest_type::memory_space>::accessible, "ScatterView deep_copy destination memory space not accessible"); bool is_equal = (dest.data() == internal_view.data()); size_t start = is_equal ? 1 : 0; Kokkos::Impl::Experimental::ReduceDuplicates<execution_space, original_value_type, Op>( - internal_view.data(), dest.data(), internal_view.stride(0), start, - internal_view.extent(0), internal_view.label()); + exec_space, internal_view.data(), dest.data(), internal_view.stride(0), + start, internal_view.extent(0), internal_view.label()); } - void reset() { + void reset(execution_space const& exec_space = execution_space()) { Kokkos::Impl::Experimental::ResetDuplicates<execution_space, original_value_type, Op>( - internal_view.data(), internal_view.size(), internal_view.label()); + exec_space, internal_view.data(), internal_view.size(), + internal_view.label()); } + template <typename DT, typename... RP> void reset_except(View<DT, RP...> const& view) { + reset_except(execution_space(), view); + } + + template <typename DT, typename... RP> + void reset_except(execution_space const& exec_space, + View<DT, RP...> const& view) { if (view.data() != internal_view.data()) { - reset(); + reset(exec_space); return; } Kokkos::Impl::Experimental::ResetDuplicates<execution_space, original_value_type, Op>( - internal_view.data() + view.size(), internal_view.size() - view.size(), - internal_view.label()); + exec_space, internal_view.data() + view.size(), + internal_view.size() - view.size(), internal_view.label()); } void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, @@ -1075,7 +1155,13 @@ class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op, ScatterView() = default; template <typename RT, typename... RP> - ScatterView(View<RT, RP...> const& original_view) : unique_token() { + ScatterView(View<RT, RP...> const& original_view) + : ScatterView(execution_space(), original_view) {} + + template <typename RT, typename... P, typename... RP> + ScatterView(execution_space const& exec_space, + View<RT, RP...> const& original_view) + : unique_token() { size_t arg_N[8] = {original_view.rank > 0 ? original_view.extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank > 1 ? original_view.extent(1) @@ -1094,14 +1180,27 @@ class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op, arg_N[internal_view_type::rank - 1] = unique_token.size(); internal_view = internal_view_type( view_alloc(WithoutInitializing, - std::string("duplicated_") + original_view.label()), + std::string("duplicated_") + original_view.label(), + exec_space), arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6], arg_N[7]); - reset(); + reset(exec_space); } template <typename... Dims> - ScatterView(std::string const& name, Dims... dims) { + ScatterView(std::string const& name, Dims... dims) + : ScatterView(view_alloc(execution_space(), name), dims...) {} + + // This overload allows specifying an execution space instance to be + // used by passing, e.g., Kokkos::view_alloc(exec_space, "label") as + // first argument. + template <typename... P, typename... Dims> + ScatterView(::Kokkos::Impl::ViewCtorProp<P...> const& arg_prop, + Dims... dims) { + using ::Kokkos::Impl::Experimental:: + check_scatter_view_allocation_properties_argument; + check_scatter_view_allocation_properties_argument(arg_prop); + original_view_type original_view; size_t arg_N[8] = {original_view.rank > 0 ? original_view.static_extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -1120,10 +1219,20 @@ class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op, KOKKOS_IMPL_CTOR_DEFAULT_ARG}; Kokkos::Impl::Experimental::args_to_array(arg_N, 0, dims...); arg_N[internal_view_type::rank - 1] = unique_token.size(); + + auto const name = + static_cast<::Kokkos::Impl::ViewCtorProp<void, std::string> const&>( + arg_prop) + .value; internal_view = internal_view_type(view_alloc(WithoutInitializing, name), arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6], arg_N[7]); - reset(); + + auto const exec_space = + static_cast<::Kokkos::Impl::ViewCtorProp<void, execution_space> const&>( + arg_prop) + .value; + reset(exec_space); } template <typename OtherDataType, typename OtherDeviceType> @@ -1166,6 +1275,12 @@ class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op, template <typename... RP> void contribute_into(View<RP...> const& dest) const { + contribute_into(execution_space(), dest); + } + + template <typename... RP> + void contribute_into(execution_space const& exec_space, + View<RP...> const& dest) const { using dest_type = View<RP...>; static_assert( std::is_same<typename dest_type::value_type, @@ -1175,34 +1290,42 @@ class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op, Kokkos::LayoutLeft>::value, "ScatterView deep_copy destination has different layout"); static_assert( - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< - memory_space, typename dest_type::memory_space>::value, + Kokkos::Impl::SpaceAccessibility< + execution_space, typename dest_type::memory_space>::accessible, "ScatterView deep_copy destination memory space not accessible"); auto extent = internal_view.extent(internal_view_type::rank - 1); bool is_equal = (dest.data() == internal_view.data()); size_t start = is_equal ? 1 : 0; Kokkos::Impl::Experimental::ReduceDuplicates<execution_space, original_value_type, Op>( - internal_view.data(), dest.data(), + exec_space, internal_view.data(), dest.data(), internal_view.stride(internal_view_type::rank - 1), start, extent, internal_view.label()); } - void reset() { + void reset(execution_space const& exec_space = execution_space()) { Kokkos::Impl::Experimental::ResetDuplicates<execution_space, original_value_type, Op>( - internal_view.data(), internal_view.size(), internal_view.label()); + exec_space, internal_view.data(), internal_view.size(), + internal_view.label()); } + template <typename DT, typename... RP> void reset_except(View<DT, RP...> const& view) { + reset_except(execution_space(), view); + } + + template <typename DT, typename... RP> + void reset_except(execution_space const& exec_space, + View<DT, RP...> const& view) { if (view.data() != internal_view.data()) { - reset(); + reset(exec_space); return; } Kokkos::Impl::Experimental::ResetDuplicates<execution_space, original_value_type, Op>( - internal_view.data() + view.size(), internal_view.size() - view.size(), - internal_view.label()); + exec_space, internal_view.data() + view.size(), + internal_view.size() - view.size(), internal_view.label()); } void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, @@ -1316,21 +1439,21 @@ template <typename Op = Kokkos::Experimental::ScatterSum, ScatterView< RT, typename ViewTraits<RT, RP...>::array_layout, typename ViewTraits<RT, RP...>::device_type, Op, - typename Kokkos::Impl::if_c< + std::conditional_t< std::is_same<Duplication, void>::value, typename Kokkos::Impl::Experimental::DefaultDuplication< typename ViewTraits<RT, RP...>::execution_space>::type, - Duplication>::type, - typename Kokkos::Impl::if_c< + Duplication>, + std::conditional_t< std::is_same<Contribution, void>::value, typename Kokkos::Impl::Experimental::DefaultContribution< typename ViewTraits<RT, RP...>::execution_space, - typename Kokkos::Impl::if_c< + typename std::conditional_t< std::is_same<Duplication, void>::value, typename Kokkos::Impl::Experimental::DefaultDuplication< typename ViewTraits<RT, RP...>::execution_space>::type, - Duplication>::type>::type, - Contribution>::type> + Duplication>>::type, + Contribution>> create_scatter_view(View<RT, RP...> const& original_view) { return original_view; // implicit ScatterView constructor call } @@ -1365,12 +1488,21 @@ create_scatter_view(Op, Duplication, Contribution, namespace Kokkos { namespace Experimental { +template <typename DT1, typename DT2, typename LY, typename ES, typename OP, + typename CT, typename DP, typename... VP> +void contribute( + typename ES::execution_space const& exec_space, View<DT1, VP...>& dest, + Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) { + src.contribute_into(exec_space, dest); +} + template <typename DT1, typename DT2, typename LY, typename ES, typename OP, typename CT, typename DP, typename... VP> void contribute( View<DT1, VP...>& dest, Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src) { - src.contribute_into(dest); + using execution_space = typename ES::execution_space; + contribute(execution_space{}, dest, src); } } // namespace Experimental diff --git a/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp index d2affda93affa2f8e1d03b72a1cf0e49c415d158..edb0e7261da93bb629cad4e9cc7c7d3118868288 100644 --- a/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/packages/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -264,26 +264,24 @@ class UnorderedMap { private: enum : size_type { invalid_index = ~static_cast<size_type>(0) }; - using impl_value_type = - typename Impl::if_c<is_set, int, declared_value_type>::type; + using impl_value_type = std::conditional_t<is_set, int, declared_value_type>; - using key_type_view = typename Impl::if_c< + using key_type_view = std::conditional_t< is_insertable_map, View<key_type *, device_type>, - View<const key_type *, device_type, MemoryTraits<RandomAccess> > >::type; + View<const key_type *, device_type, MemoryTraits<RandomAccess> > >; - using value_type_view = - typename Impl::if_c<is_insertable_map || is_modifiable_map, - View<impl_value_type *, device_type>, - View<const impl_value_type *, device_type, - MemoryTraits<RandomAccess> > >::type; + using value_type_view = std::conditional_t< + is_insertable_map || is_modifiable_map, + View<impl_value_type *, device_type>, + View<const impl_value_type *, device_type, MemoryTraits<RandomAccess> > >; - using size_type_view = typename Impl::if_c< + using size_type_view = std::conditional_t< is_insertable_map, View<size_type *, device_type>, - View<const size_type *, device_type, MemoryTraits<RandomAccess> > >::type; + View<const size_type *, device_type, MemoryTraits<RandomAccess> > >; using bitset_type = - typename Impl::if_c<is_insertable_map, Bitset<execution_space>, - ConstBitset<execution_space> >::type; + std::conditional_t<is_insertable_map, Bitset<execution_space>, + ConstBitset<execution_space> >; enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 }; enum { num_scalars = 3 }; @@ -540,10 +538,7 @@ class UnorderedMap { // Previously claimed an unused entry that was not inserted. // Release this unused entry immediately. if (!m_available_indexes.reset(new_index)) { - // FIXME_SYCL SYCL doesn't allow printf in kernels -#ifndef KOKKOS_ENABLE_SYCL - printf("Unable to free existing\n"); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF("Unable to free existing\n"); } } @@ -659,8 +654,8 @@ class UnorderedMap { /// /// 'const value_type' via Cuda texture fetch must return by value. KOKKOS_FORCEINLINE_FUNCTION - typename Impl::if_c<(is_set || has_const_value), impl_value_type, - impl_value_type &>::type + std::conditional_t<(is_set || has_const_value), impl_value_type, + impl_value_type &> value_at(size_type i) const { return m_values[is_set ? 0 : (i < capacity() ? i : capacity())]; } diff --git a/packages/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp b/packages/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp index 6e450598d1eb3f1c9b533044bfaa5c46f035d519..6047e60f3dd080b8cfe456627ccc80266e7df66b 100644 --- a/packages/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp +++ b/packages/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp @@ -57,10 +57,22 @@ namespace Kokkos { namespace Impl { +KOKKOS_FORCEINLINE_FUNCTION +unsigned rotate_left(unsigned i, int r) { + constexpr int size = static_cast<int>(sizeof(unsigned) * CHAR_BIT); + return r ? ((i << r) | (i >> (size - r))) : i; +} + KOKKOS_FORCEINLINE_FUNCTION unsigned rotate_right(unsigned i, int r) { - enum { size = static_cast<int>(sizeof(unsigned) * CHAR_BIT) }; + constexpr int size = static_cast<int>(sizeof(unsigned) * CHAR_BIT); + // FIXME_SYCL llvm.fshr.i32 missing + // (https://github.com/intel/llvm/issues/3308) +#ifdef __SYCL_DEVICE_ONLY__ + return rotate_left(i, size - r); +#else return r ? ((i >> r) | (i << (size - r))) : i; +#endif } template <typename Bitset> diff --git a/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp b/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp index b06ab0846c9a0f3d2dd2a082191030be68de5ae5..d7c4a5d1ffdf9969e3c158473e7fb5754113a665 100644 --- a/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp +++ b/packages/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp @@ -250,8 +250,8 @@ struct UnorderedMapPrint { uint32_t list = m_map.m_hash_lists(i); for (size_type curr = list, ii = 0; curr != invalid_index; curr = m_map.m_next_index[curr], ++ii) { - printf("%d[%d]: %d->%d\n", list, ii, m_map.key_at(curr), - m_map.value_at(curr)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF("%d[%d]: %d->%d\n", list, ii, + m_map.key_at(curr), m_map.value_at(curr)); } } }; diff --git a/packages/kokkos/containers/unit_tests/CMakeLists.txt b/packages/kokkos/containers/unit_tests/CMakeLists.txt index c84c5f6d5ec30ce6c9267dbd6c4719926fe81287..947d222c273dc4d87823ad3560a1af6c62a1e52b 100644 --- a/packages/kokkos/containers/unit_tests/CMakeLists.txt +++ b/packages/kokkos/containers/unit_tests/CMakeLists.txt @@ -2,6 +2,7 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) +KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/core/unit_test/category_files) foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP;SYCL) # Because there is always an exception to the rule @@ -41,11 +42,6 @@ foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP;SYCL) configure_file(${dir}/dummy.cpp ${file}) list(APPEND UnitTestSources ${file}) endforeach() - list(REMOVE_ITEM UnitTestSources - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Bitset.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_ScatterView.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_UnorderedMap.cpp - ) KOKKOS_ADD_EXECUTABLE_AND_TEST(UnitTest_${Tag} SOURCES ${UnitTestSources}) endif() endforeach() diff --git a/packages/kokkos/containers/unit_tests/Makefile b/packages/kokkos/containers/unit_tests/Makefile index f42b9b75190790ef693dc1b065781a32d61207e7..82669fe1ab7532b69556cafbb7131b595f9e5f8e 100644 --- a/packages/kokkos/containers/unit_tests/Makefile +++ b/packages/kokkos/containers/unit_tests/Makefile @@ -26,7 +26,7 @@ override LDFLAGS += -lpthread include $(KOKKOS_PATH)/Makefile.kokkos -KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files TEST_TARGETS = TARGETS = diff --git a/packages/kokkos/containers/unit_tests/TestCuda_Category.hpp b/packages/kokkos/containers/unit_tests/TestCuda_Category.hpp deleted file mode 100644 index 50935d7a34d1d2fe69311f33c71aaefb19f45080..0000000000000000000000000000000000000000 --- a/packages/kokkos/containers/unit_tests/TestCuda_Category.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_CUDA_HPP -#define KOKKOS_TEST_CUDA_HPP - -#define TEST_CATEGORY cuda -#define TEST_EXECSPACE Kokkos::Cuda - -#endif diff --git a/packages/kokkos/containers/unit_tests/TestDualView.hpp b/packages/kokkos/containers/unit_tests/TestDualView.hpp index 531caf0f85ce286cefae2f603c0f947dadf81594..3eee85ed10bd81bc8b511afa9f0fbde7ba244b8f 100644 --- a/packages/kokkos/containers/unit_tests/TestDualView.hpp +++ b/packages/kokkos/containers/unit_tests/TestDualView.hpp @@ -114,6 +114,8 @@ struct test_dualview_combinations { a.template modify<typename ViewType::execution_space>(); a.template sync<typename ViewType::host_mirror_space>(); + a.template sync<typename ViewType::host_mirror_space>( + Kokkos::DefaultExecutionSpace{}); a.h_view(5, 1) = 3; a.h_view(6, 1) = 4; @@ -122,11 +124,15 @@ struct test_dualview_combinations { ViewType b = Kokkos::subview(a, std::pair<unsigned int, unsigned int>(6, 9), std::pair<unsigned int, unsigned int>(0, 1)); a.template sync<typename ViewType::execution_space>(); + a.template sync<typename ViewType::execution_space>( + Kokkos::DefaultExecutionSpace{}); b.template modify<typename ViewType::execution_space>(); Kokkos::deep_copy(b.d_view, 2); a.template sync<typename ViewType::host_mirror_space>(); + a.template sync<typename ViewType::host_mirror_space>( + Kokkos::DefaultExecutionSpace{}); Scalar count = 0; for (unsigned int i = 0; i < a.d_view.extent(0); i++) for (unsigned int j = 0; j < a.d_view.extent(1); j++) @@ -180,6 +186,7 @@ struct test_dual_view_deep_copy { } else { a.modify_device(); a.sync_host(); + a.sync_host(Kokkos::DefaultExecutionSpace{}); } // Check device view is initialized as expected @@ -208,6 +215,7 @@ struct test_dual_view_deep_copy { b.template sync<typename ViewType::host_mirror_space>(); } else { b.sync_host(); + b.sync_host(Kokkos::DefaultExecutionSpace{}); } // Perform same checks on b as done on a @@ -302,6 +310,7 @@ struct test_dualview_resize { ASSERT_EQ(a.extent(1), m / factor); a.sync_device(); + a.sync_device(Kokkos::DefaultExecutionSpace{}); // Check device view is initialized as expected a_d_sum = 0; @@ -404,19 +413,14 @@ void test_dualview_resize() { Impl::test_dualview_resize<Scalar, Device>(); } -// FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL TEST(TEST_CATEGORY, dualview_combination) { test_dualview_combinations<int, TEST_EXECSPACE>(10, true); } -#endif TEST(TEST_CATEGORY, dualview_alloc) { test_dualview_alloc<int, TEST_EXECSPACE>(10); } -// FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL TEST(TEST_CATEGORY, dualview_combinations_without_init) { test_dualview_combinations<int, TEST_EXECSPACE>(10, false); } @@ -433,8 +437,133 @@ TEST(TEST_CATEGORY, dualview_realloc) { TEST(TEST_CATEGORY, dualview_resize) { test_dualview_resize<int, TEST_EXECSPACE>(); } + +namespace { +/** + * + * The following tests are a response to + * https://github.com/kokkos/kokkos/issues/3850 + * and + * https://github.com/kokkos/kokkos/pull/3857 + * + * DualViews were returning incorrect view types and taking + * inappropriate actions based on the templated view methods. + * + * Specifically, template view methods were always returning + * a device view if the memory space was UVM and a Kokkos::Device was passed. + * Sync/modify methods completely broke down So these tests exist to make sure + * that we keep the semantics of UVM DualViews intact. + */ +// modify if we have other UVM enabled backends +#ifdef KOKKOS_ENABLE_CUDA // OR other UVM builds +#define UVM_ENABLED_BUILD +#endif + +#ifdef UVM_ENABLED_BUILD +template <typename ExecSpace> +struct UVMSpaceFor; +#endif + +#ifdef KOKKOS_ENABLE_CUDA // specific to CUDA +template <> +struct UVMSpaceFor<Kokkos::Cuda> { + using type = Kokkos::CudaUVMSpace; +}; +#endif + +#ifdef UVM_ENABLED_BUILD +template <> +struct UVMSpaceFor<Kokkos::DefaultHostExecutionSpace> { + using type = typename UVMSpaceFor<Kokkos::DefaultExecutionSpace>::type; +}; +#else +template <typename ExecSpace> +struct UVMSpaceFor { + using type = typename ExecSpace::memory_space; +}; #endif +using ExecSpace = Kokkos::DefaultExecutionSpace; +using MemSpace = typename UVMSpaceFor<Kokkos::DefaultExecutionSpace>::type; +using DeviceType = Kokkos::Device<ExecSpace, MemSpace>; + +using DualViewType = Kokkos::DualView<double*, Kokkos::LayoutLeft, DeviceType>; +using d_device = DeviceType; +using h_device = Kokkos::Device< + Kokkos::DefaultHostExecutionSpace, + typename UVMSpaceFor<Kokkos::DefaultHostExecutionSpace>::type>; + +TEST(TEST_CATEGORY, dualview_device_correct_kokkos_device) { + DualViewType dv("myView", 100); + dv.clear_sync_state(); + auto v_d = dv.template view<d_device>(); + using vdt = decltype(v_d); + using vdt_d = vdt::device_type; + using vdt_d_e = vdt_d::execution_space; + ASSERT_STREQ(vdt_d_e::name(), Kokkos::DefaultExecutionSpace::name()); +} +TEST(TEST_CATEGORY, dualview_host_correct_kokkos_device) { + DualViewType dv("myView", 100); + dv.clear_sync_state(); + auto v_h = dv.template view<h_device>(); + using vht = decltype(v_h); + using vht_d = vht::device_type; + using vht_d_e = vht_d::execution_space; + ASSERT_STREQ(vht_d_e::name(), Kokkos::DefaultHostExecutionSpace::name()); +} + +TEST(TEST_CATEGORY, dualview_host_modify_template_device_sync) { + DualViewType dv("myView", 100); + dv.clear_sync_state(); + dv.modify_host(); + dv.template sync<d_device>(); + EXPECT_TRUE(!dv.need_sync_device()); + EXPECT_TRUE(!dv.need_sync_host()); + dv.clear_sync_state(); +} + +TEST(TEST_CATEGORY, dualview_host_modify_template_device_execspace_sync) { + DualViewType dv("myView", 100); + dv.clear_sync_state(); + dv.modify_host(); + dv.template sync<d_device::execution_space>(); + EXPECT_TRUE(!dv.need_sync_device()); + EXPECT_TRUE(!dv.need_sync_host()); + dv.clear_sync_state(); +} + +TEST(TEST_CATEGORY, dualview_device_modify_template_host_sync) { + DualViewType dv("myView", 100); + dv.clear_sync_state(); + dv.modify_device(); + dv.template sync<h_device>(); + EXPECT_TRUE(!dv.need_sync_device()); + EXPECT_TRUE(!dv.need_sync_host()); + dv.clear_sync_state(); +} +TEST(TEST_CATEGORY, dualview_device_modify_template_host_execspace_sync) { + DualViewType dv("myView", 100); + dv.clear_sync_state(); + dv.modify_device(); + dv.template sync<h_device::execution_space>(); + EXPECT_TRUE(!dv.need_sync_device()); + EXPECT_TRUE(!dv.need_sync_host()); + dv.clear_sync_state(); +} + +TEST(TEST_CATEGORY, + dualview_template_views_return_correct_executionspace_views) { + DualViewType dv("myView", 100); + dv.clear_sync_state(); + using hvt = decltype(dv.view<typename Kokkos::DefaultHostExecutionSpace>()); + using dvt = decltype(dv.view<typename Kokkos::DefaultExecutionSpace>()); + ASSERT_STREQ(Kokkos::DefaultExecutionSpace::name(), + dvt::device_type::execution_space::name()); + ASSERT_STREQ(Kokkos::DefaultHostExecutionSpace::name(), + hvt::device_type::execution_space::name()); +} + +} // anonymous namespace } // namespace Test #endif // KOKKOS_TEST_DUALVIEW_HPP diff --git a/packages/kokkos/containers/unit_tests/TestDynamicView.hpp b/packages/kokkos/containers/unit_tests/TestDynamicView.hpp index 4b9f9944172452578ebe37675b274385c3ce840c..f018793dd6f3b162acbf9db20174c47ac75fc1c0 100644 --- a/packages/kokkos/containers/unit_tests/TestDynamicView.hpp +++ b/packages/kokkos/containers/unit_tests/TestDynamicView.hpp @@ -243,8 +243,6 @@ struct TestDynamicView { } }; -// FIXME_SYCL needs resize_serial -#ifndef KOKKOS_ENABLE_SYCL TEST(TEST_CATEGORY, dynamic_view) { using TestDynView = TestDynamicView<double, TEST_EXECSPACE>; @@ -252,7 +250,6 @@ TEST(TEST_CATEGORY, dynamic_view) { TestDynView::run(100000 + 100 * i); } } -#endif } // namespace Test diff --git a/packages/kokkos/containers/unit_tests/TestHPX_Category.hpp b/packages/kokkos/containers/unit_tests/TestHPX_Category.hpp deleted file mode 100644 index 64fc7c0757baca29e2c0e02099a4234330378eb7..0000000000000000000000000000000000000000 --- a/packages/kokkos/containers/unit_tests/TestHPX_Category.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_HPX_HPP -#define KOKKOS_TEST_HPX_HPP - -#define TEST_CATEGORY hpx -#define TEST_EXECSPACE Kokkos::Experimental::HPX - -#endif diff --git a/packages/kokkos/containers/unit_tests/TestOffsetView.hpp b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp index 802813b13b81d9f0b048aeec7b17cccae2507ce3..9ddc226e291f6e7dc7d6bc960fad470fafeb9974 100644 --- a/packages/kokkos/containers/unit_tests/TestOffsetView.hpp +++ b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp @@ -130,8 +130,6 @@ void test_offsetview_construction() { } } - // FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL const int ovmin0 = ov.begin(0); const int ovend0 = ov.end(0); const int ovmin1 = ov.begin(1); @@ -178,7 +176,6 @@ void test_offsetview_construction() { } ASSERT_EQ(OVResult, answer) << "Bad data found in OffsetView"; -#endif #endif { @@ -215,8 +212,6 @@ void test_offsetview_construction() { point3_type{{extent0, extent1, extent2}}); #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) - // FIXME_SYCL requires MDRange policy -#ifdef KOKKOS_ENABLE_SYCL int view3DSum = 0; Kokkos::parallel_reduce( rangePolicy3DZero, @@ -239,7 +234,6 @@ void test_offsetview_construction() { ASSERT_EQ(view3DSum, offsetView3DSum) << "construction of OffsetView from View and begins array broken."; -#endif #endif } view_type viewFromOV = ov.view(); @@ -266,8 +260,6 @@ void test_offsetview_construction() { Kokkos::deep_copy(aView, ov); #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) - // FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL int sum = 0; Kokkos::parallel_reduce( rangePolicy2D, @@ -277,7 +269,6 @@ void test_offsetview_construction() { sum); ASSERT_EQ(sum, 0) << "deep_copy(view, offsetView) broken."; -#endif #endif } @@ -288,8 +279,6 @@ void test_offsetview_construction() { Kokkos::deep_copy(ov, aView); #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) - // FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL int sum = 0; Kokkos::parallel_reduce( rangePolicy2D, @@ -299,7 +288,6 @@ void test_offsetview_construction() { sum); ASSERT_EQ(sum, 0) << "deep_copy(offsetView, view) broken."; -#endif #endif } } @@ -471,8 +459,6 @@ void test_offsetview_subview() { ASSERT_EQ(offsetSubview.end(1), 9); #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) - // FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL using range_type = Kokkos::MDRangePolicy<Device, Kokkos::Rank<2>, Kokkos::IndexType<int> >; using point_type = typename range_type::point_type; @@ -498,7 +484,6 @@ void test_offsetview_subview() { sum); ASSERT_EQ(sum, 6 * (e0 - b0) * (e1 - b1)); -#endif #endif } @@ -701,12 +686,9 @@ void test_offsetview_offsets_rank3() { } #endif -// FIXME_SYCL needs MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL TEST(TEST_CATEGORY, offsetview_construction) { test_offsetview_construction<int, TEST_EXECSPACE>(); } -#endif TEST(TEST_CATEGORY, offsetview_unmanaged_construction) { test_offsetview_unmanaged_construction<int, TEST_EXECSPACE>(); diff --git a/packages/kokkos/containers/unit_tests/TestOpenMP_Category.hpp b/packages/kokkos/containers/unit_tests/TestOpenMP_Category.hpp deleted file mode 100644 index a0169d170294ad9e7b32d847de09875b37bce8e0..0000000000000000000000000000000000000000 --- a/packages/kokkos/containers/unit_tests/TestOpenMP_Category.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_OPENMP_HPP -#define KOKKOS_TEST_OPENMP_HPP - -#define TEST_CATEGORY openmp -#define TEST_EXECSPACE Kokkos::OpenMP - -#endif diff --git a/packages/kokkos/containers/unit_tests/TestSYCL_Category.hpp b/packages/kokkos/containers/unit_tests/TestSYCL_Category.hpp deleted file mode 100644 index 51fd3fc91118f55cf68cdac1cf2b532a3978364f..0000000000000000000000000000000000000000 --- a/packages/kokkos/containers/unit_tests/TestSYCL_Category.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_SYCL_HPP -#define KOKKOS_TEST_SYCL_HPP - -#define TEST_CATEGORY sycl -#define TEST_EXECSPACE Kokkos::Experimental::SYCL - -#endif diff --git a/packages/kokkos/containers/unit_tests/TestScatterView.hpp b/packages/kokkos/containers/unit_tests/TestScatterView.hpp index 3a3cb607a64e67908381bdb24e796c6ac40758c7..fdbce2d492009cf38d5491398d77423108edc6a5 100644 --- a/packages/kokkos/containers/unit_tests/TestScatterView.hpp +++ b/packages/kokkos/containers/unit_tests/TestScatterView.hpp @@ -437,6 +437,10 @@ struct test_scatter_view_config { Contribution, Op, NumberType>::orig_view_type; + void compile_constructor() { + auto sv = scatter_view_def(Kokkos::view_alloc(DeviceType{}, "label"), 10); + } + void run_test(int n) { // test allocation { diff --git a/packages/kokkos/containers/unit_tests/TestSerial_Category.hpp b/packages/kokkos/containers/unit_tests/TestSerial_Category.hpp deleted file mode 100644 index 2aa09a315ae01e70a4267e6214fe478bbd0a9592..0000000000000000000000000000000000000000 --- a/packages/kokkos/containers/unit_tests/TestSerial_Category.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_SERIAL_HPP -#define KOKKOS_TEST_SERIAL_HPP - -#define TEST_CATEGORY serial -#define TEST_EXECSPACE Kokkos::Serial - -#endif diff --git a/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp b/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp index 8bb267ce5d9701ea68538f0612f3bdcefcd3a0e0..a9a178f95e7b7fedabcb7b00b292d88603ff3f77 100644 --- a/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp +++ b/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp @@ -285,10 +285,7 @@ void run_test_graph4() { TEST(TEST_CATEGORY, staticcrsgraph) { TestStaticCrsGraph::run_test_graph<TEST_EXECSPACE>(); - // FIXME_SYCL requires MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL TestStaticCrsGraph::run_test_graph2<TEST_EXECSPACE>(); -#endif TestStaticCrsGraph::run_test_graph3<TEST_EXECSPACE>(1, 0); TestStaticCrsGraph::run_test_graph3<TEST_EXECSPACE>(1, 1000); TestStaticCrsGraph::run_test_graph3<TEST_EXECSPACE>(1, 10000); diff --git a/packages/kokkos/containers/unit_tests/TestThreads_Category.hpp b/packages/kokkos/containers/unit_tests/TestThreads_Category.hpp deleted file mode 100644 index 74a2b0da362e3226230c0f11e3a7fc987eb9a615..0000000000000000000000000000000000000000 --- a/packages/kokkos/containers/unit_tests/TestThreads_Category.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_THREADS_HPP -#define KOKKOS_TEST_THREADS_HPP - -#define TEST_CATEGORY threads -#define TEST_EXECSPACE Kokkos::Threads - -#endif diff --git a/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp index d39e0061c747c78abbd30d0284cc398a41714326..4413cfbc80e31271d1e2b830976796ade24aaa9a 100644 --- a/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp +++ b/packages/kokkos/containers/unit_tests/TestUnorderedMap.hpp @@ -163,7 +163,8 @@ struct TestFind { KOKKOS_INLINE_FUNCTION void operator()(typename execution_space::size_type i, value_type &errors) const { - const bool expect_to_find_i = (i < m_max_key); + const bool expect_to_find_i = + (i < typename execution_space::size_type(m_max_key)); const bool exists = m_map.exists(i); @@ -293,10 +294,11 @@ void test_deep_copy(uint32_t num_nodes) { } } -// FIXME_HIP wrong result in CI but works locally -#ifndef KOKKOS_ENABLE_HIP +// FIXME_SYCL wrong results on Nvidia GPUs but correct on Host and Intel GPUs +// FIXME_HIP // WORKAROUND MSVC -#ifndef _WIN32 +#if !(defined(KOKKOS_ENABLE_HIP) && (HIP_VERSION < 401)) && \ + !defined(_WIN32) && !defined(KOKKOS_ENABLE_SYCL) TEST(TEST_CATEGORY, UnorderedMap_insert) { for (int i = 0; i < 500; ++i) { test_insert<TEST_EXECSPACE>(100000, 90000, 100, true); @@ -304,7 +306,6 @@ TEST(TEST_CATEGORY, UnorderedMap_insert) { } } #endif -#endif TEST(TEST_CATEGORY, UnorderedMap_failed_insert) { for (int i = 0; i < 1000; ++i) test_failed_insert<TEST_EXECSPACE>(10000); diff --git a/packages/kokkos/core/perf_test/CMakeLists.txt b/packages/kokkos/core/perf_test/CMakeLists.txt index b7b817c910974b615f50e5b6bdb76e3429c66d27..9ff4b6006da8cb0358f2a9e53810b79ce59e8b02 100644 --- a/packages/kokkos/core/perf_test/CMakeLists.txt +++ b/packages/kokkos/core/perf_test/CMakeLists.txt @@ -9,6 +9,14 @@ # that in TriBITS KokkosAlgorithms can be disabled... #INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../../algorithms/src") +# FIXME_OPENMPTARGET - the NVIDIA HPC compiler nvc++ in the OpenMPTarget backend does not pass the perf_tests. +IF (KOKKOS_ENABLE_OPENMPTARGET + AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI + OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) + RETURN() +ENDIF() + + SET(SOURCES PerfTestMain.cpp PerfTestGramSchmidt.cpp @@ -68,8 +76,7 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) # This test currently times out for MSVC -# FIXME_SYCL these tests don't compile yet (require parallel_for). -IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC" AND NOT Kokkos_ENABLE_SYCL) +IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") KOKKOS_ADD_EXECUTABLE_AND_TEST( PerfTestExec SOURCES ${SOURCES} @@ -77,13 +84,11 @@ IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC" AND NOT Kokkos_ENABLE_SYCL) ) ENDIF() -# FIXME_SYCL -IF(NOT Kokkos_ENABLE_SYCL) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - PerformanceTest_Atomic - SOURCES test_atomic.cpp - CATEGORIES PERFORMANCE - ) +KOKKOS_ADD_EXECUTABLE_AND_TEST( + PerformanceTest_Atomic + SOURCES test_atomic.cpp + CATEGORIES PERFORMANCE +) IF(NOT KOKKOS_ENABLE_CUDA OR KOKKOS_ENABLE_CUDA_LAMBDA) KOKKOS_ADD_EXECUTABLE_AND_TEST( @@ -98,7 +103,6 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( SOURCES test_mempool.cpp CATEGORIES PERFORMANCE ) -ENDIF() IF(NOT Kokkos_ENABLE_OPENMPTARGET) # FIXME OPENMPTARGET needs tasking diff --git a/packages/kokkos/core/perf_test/PerfTestGramSchmidt.cpp b/packages/kokkos/core/perf_test/PerfTestGramSchmidt.cpp index 70186283c1a76789b1ab943b3793f36f55b9f258..dee21fd7a575bd5aa0f6838980c670510f475cab 100644 --- a/packages/kokkos/core/perf_test/PerfTestGramSchmidt.cpp +++ b/packages/kokkos/core/perf_test/PerfTestGramSchmidt.cpp @@ -69,7 +69,7 @@ struct InvNorm2 : public Kokkos::DotSingle<VectorView> { KOKKOS_INLINE_FUNCTION void final(value_type& result) const { - result = std::sqrt(result); + result = Kokkos::Experimental::sqrt(result); Rjj() = result; inv() = (0 < result) ? 1.0 / result : 0; } @@ -145,7 +145,7 @@ struct ModifiedGramSchmidt { // Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ; Kokkos::scale(tmp, Qj); - for (size_t k = j + 1; k < count; ++k) { + for (size_type k = j + 1; k < count; ++k) { const vector_type Qk = Kokkos::subview(Q_, Kokkos::ALL(), k); const value_view Rjk = Kokkos::subview(R_, j, k); @@ -165,7 +165,7 @@ struct ModifiedGramSchmidt { //-------------------------------------------------------------------------- - static double test(const size_t length, const size_t count, + static double test(const size_type length, const size_type count, const size_t iter = 1) { multivector_type Q_("Q", length, count); multivector_type R_("R", count, count); diff --git a/packages/kokkos/core/src/CMakeLists.txt b/packages/kokkos/core/src/CMakeLists.txt index e0590a78a4bce924e7b71de13f67603a275a8464..2ab0989805723ce32115d379dd39708b5edd8209 100644 --- a/packages/kokkos/core/src/CMakeLists.txt +++ b/packages/kokkos/core/src/CMakeLists.txt @@ -72,8 +72,6 @@ KOKKOS_ADD_LIBRARY( ADD_BUILD_OPTIONS # core should be given all the necessary compiler/linker flags ) -SET_TARGET_PROPERTIES(kokkoscore PROPERTIES VERSION ${Kokkos_VERSION}) - KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscore ${KOKKOS_TOP_BUILD_DIR} ${CMAKE_CURRENT_BINARY_DIR} @@ -87,3 +85,4 @@ KOKKOS_LINK_TPL(kokkoscore PUBLIC HPX) KOKKOS_LINK_TPL(kokkoscore PUBLIC LIBDL) KOKKOS_LINK_TPL(kokkoscore PUBLIC LIBRT) KOKKOS_LINK_TPL(kokkoscore PUBLIC PTHREAD) +KOKKOS_LINK_TPL(kokkoscore PUBLIC ROCM) diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 4a30c914f0808c675c3f7f5b3a88a1f94322b149..916f109758de4ba3cf469659d7458ae77cf464da 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -45,6 +45,10 @@ #include <Kokkos_Macros.hpp> #ifdef KOKKOS_ENABLE_CUDA +#include <Kokkos_Core.hpp> +#include <Kokkos_Cuda.hpp> +#include <Kokkos_CudaSpace.hpp> + #include <cstdlib> #include <iostream> #include <sstream> @@ -52,10 +56,6 @@ #include <algorithm> #include <atomic> -#include <Kokkos_Core.hpp> -#include <Kokkos_Cuda.hpp> -#include <Kokkos_CudaSpace.hpp> - //#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp> #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_MemorySpace.hpp> @@ -65,6 +65,22 @@ /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ +cudaStream_t Kokkos::Impl::cuda_get_deep_copy_stream() { + static cudaStream_t s = nullptr; + if (s == nullptr) { + cudaStreamCreate(&s); + } + return s; +} + +const std::unique_ptr<Kokkos::Cuda> &Kokkos::Impl::cuda_get_deep_copy_space( + bool initialize) { + static std::unique_ptr<Cuda> space = nullptr; + if (!space && initialize) + space = std::make_unique<Cuda>(Kokkos::Impl::cuda_get_deep_copy_stream()); + return space; +} + namespace Kokkos { namespace Impl { @@ -72,13 +88,6 @@ namespace { static std::atomic<int> num_uvm_allocations(0); -cudaStream_t get_deep_copy_stream() { - static cudaStream_t s = nullptr; - if (s == nullptr) { - cudaStreamCreate(&s); - } - return s; -} } // namespace DeepCopy<CudaSpace, CudaSpace, Cuda>::DeepCopy(void *dst, const void *src, @@ -115,7 +124,7 @@ DeepCopy<CudaSpace, HostSpace, Cuda>::DeepCopy(const Cuda &instance, void *dst, } void DeepCopyAsyncCuda(void *dst, const void *src, size_t n) { - cudaStream_t s = get_deep_copy_stream(); + cudaStream_t s = cuda_get_deep_copy_stream(); CUDA_SAFE_CALL(cudaMemcpyAsync(dst, src, n, cudaMemcpyDefault, s)); cudaStreamSynchronize(s); } @@ -128,14 +137,14 @@ void DeepCopyAsyncCuda(void *dst, const void *src, size_t n) { namespace Kokkos { -void CudaSpace::access_error() { +KOKKOS_DEPRECATED void CudaSpace::access_error() { const std::string msg( "Kokkos::CudaSpace::access_error attempt to execute Cuda function from " "non-Cuda space"); Kokkos::Impl::throw_runtime_exception(msg); } -void CudaSpace::access_error(const void *const) { +KOKKOS_DEPRECATED void CudaSpace::access_error(const void *const) { const std::string msg( "Kokkos::CudaSpace::access_error attempt to execute Cuda function from " "non-Cuda space"); @@ -459,79 +468,6 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::attach_texture_object( return tex_obj; } -//============================================================================== -// <editor-fold desc="SharedAllocationRecord::get_label()"> {{{1 - -std::string SharedAllocationRecord<Kokkos::CudaSpace, void>::get_label() const { - SharedAllocationHeader header; - - Kokkos::Impl::DeepCopy<Kokkos::HostSpace, Kokkos::CudaSpace>( - &header, RecordBase::head(), sizeof(SharedAllocationHeader)); - - return std::string(header.m_label); -} - -std::string SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::get_label() - const { - return std::string(RecordBase::head()->m_label); -} - -std::string -SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::get_label() const { - return std::string(RecordBase::head()->m_label); -} - -// </editor-fold> end SharedAllocationRecord::get_label() }}}1 -//============================================================================== - -//============================================================================== -// <editor-fold desc="SharedAllocationRecord allocate()"> {{{1 - -SharedAllocationRecord<Kokkos::CudaSpace, void> - *SharedAllocationRecord<Kokkos::CudaSpace, void>::allocate( - const Kokkos::CudaSpace &arg_space, const std::string &arg_label, - const size_t arg_alloc_size) { - return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); -} - -SharedAllocationRecord<Kokkos::CudaUVMSpace, void> - *SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::allocate( - const Kokkos::CudaUVMSpace &arg_space, const std::string &arg_label, - const size_t arg_alloc_size) { - return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); -} - -SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void> - *SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::allocate( - const Kokkos::CudaHostPinnedSpace &arg_space, - const std::string &arg_label, const size_t arg_alloc_size) { - return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); -} - -// </editor-fold> end SharedAllocationRecord allocate() }}}1 -//============================================================================== - -//============================================================================== -// <editor-fold desc="SharedAllocationRecord deallocate"> {{{1 - -void SharedAllocationRecord<Kokkos::CudaSpace, void>::deallocate( - SharedAllocationRecord<void, void> *arg_rec) { - delete static_cast<SharedAllocationRecord *>(arg_rec); -} - -void SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::deallocate( - SharedAllocationRecord<void, void> *arg_rec) { - delete static_cast<SharedAllocationRecord *>(arg_rec); -} - -void SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::deallocate( - SharedAllocationRecord<void, void> *arg_rec) { - delete static_cast<SharedAllocationRecord *>(arg_rec); -} - -// </editor-fold> end SharedAllocationRecord deallocate }}}1 -//============================================================================== - //============================================================================== // <editor-fold desc="SharedAllocationRecord destructors"> {{{1 @@ -580,7 +516,7 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord( const SharedAllocationRecord<void, void>::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord<void, void>( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord<Kokkos::CudaSpace, void>::s_root_record, #endif @@ -592,13 +528,7 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord( SharedAllocationHeader header; - // Fill in the Header information - header.m_record = static_cast<SharedAllocationRecord<void, void> *>(this); - - strncpy(header.m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); - // Set last element zero, in case c_str is too long - header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; + this->base_t::_fill_host_accessible_header_info(header, arg_label); // Copy to device memory Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(RecordBase::m_alloc_ptr, &header, @@ -611,7 +541,7 @@ SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::SharedAllocationRecord( const SharedAllocationRecord<void, void>::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord<void, void>( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::s_root_record, #endif @@ -620,16 +550,8 @@ SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::SharedAllocationRecord( sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc), m_tex_obj(0), m_space(arg_space) { - // Fill in the Header information, directly accessible via UVM - - RecordBase::m_alloc_ptr->m_record = this; - - strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); - - // Set last element zero, in case c_str is too long - RecordBase::m_alloc_ptr - ->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; + this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr, + arg_label); } SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>:: @@ -639,7 +561,7 @@ SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>:: const SharedAllocationRecord<void, void>::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord<void, void>( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::s_root_record, @@ -648,319 +570,13 @@ SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>:: arg_alloc_size), sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc), m_space(arg_space) { - // Fill in the Header information, directly accessible on the host - - RecordBase::m_alloc_ptr->m_record = this; - - strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); - // Set last element zero, in case c_str is too long - RecordBase::m_alloc_ptr - ->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; + this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr, + arg_label); } // </editor-fold> end SharedAllocationRecord constructors }}}1 //============================================================================== -//============================================================================== -// <editor-fold desc="SharedAllocationRecored::(re|de|)allocate_tracked"> {{{1 - -void *SharedAllocationRecord<Kokkos::CudaSpace, void>::allocate_tracked( - const Kokkos::CudaSpace &arg_space, const std::string &arg_alloc_label, - const size_t arg_alloc_size) { - if (!arg_alloc_size) return nullptr; - - SharedAllocationRecord *const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - - RecordBase::increment(r); - - return r->data(); -} - -void SharedAllocationRecord<Kokkos::CudaSpace, void>::deallocate_tracked( - void *const arg_alloc_ptr) { - if (arg_alloc_ptr != nullptr) { - SharedAllocationRecord *const r = get_record(arg_alloc_ptr); - - RecordBase::decrement(r); - } -} - -void *SharedAllocationRecord<Kokkos::CudaSpace, void>::reallocate_tracked( - void *const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord *const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - Kokkos::Impl::DeepCopy<CudaSpace, CudaSpace>( - r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); -} - -void *SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::allocate_tracked( - const Kokkos::CudaUVMSpace &arg_space, const std::string &arg_alloc_label, - const size_t arg_alloc_size) { - if (!arg_alloc_size) return nullptr; - - SharedAllocationRecord *const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - - RecordBase::increment(r); - - return r->data(); -} - -void SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::deallocate_tracked( - void *const arg_alloc_ptr) { - if (arg_alloc_ptr != nullptr) { - SharedAllocationRecord *const r = get_record(arg_alloc_ptr); - - RecordBase::decrement(r); - } -} - -void *SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::reallocate_tracked( - void *const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord *const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - Kokkos::Impl::DeepCopy<CudaUVMSpace, CudaUVMSpace>( - r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); -} - -void * -SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::allocate_tracked( - const Kokkos::CudaHostPinnedSpace &arg_space, - const std::string &arg_alloc_label, const size_t arg_alloc_size) { - if (!arg_alloc_size) return nullptr; - - SharedAllocationRecord *const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - - RecordBase::increment(r); - - return r->data(); -} - -void SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, - void>::deallocate_tracked(void *const - arg_alloc_ptr) { - if (arg_alloc_ptr != nullptr) { - SharedAllocationRecord *const r = get_record(arg_alloc_ptr); - - RecordBase::decrement(r); - } -} - -void * -SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::reallocate_tracked( - void *const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord *const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - Kokkos::Impl::DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace>( - r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); -} - -// </editor-fold> end SharedAllocationRecored::(re|de|)allocate_tracked }}}1 -//============================================================================== - -//============================================================================== -// <editor-fold desc="SharedAllocationRecord::get_record()"> {{{1 - -SharedAllocationRecord<Kokkos::CudaSpace, void> * -SharedAllocationRecord<Kokkos::CudaSpace, void>::get_record(void *alloc_ptr) { - using RecordCuda = SharedAllocationRecord<Kokkos::CudaSpace, void>; - - using Header = SharedAllocationHeader; - - // Copy the header from the allocation - Header head; - - Header const *const head_cuda = - alloc_ptr ? Header::get_header(alloc_ptr) : nullptr; - - if (alloc_ptr) { - Kokkos::Impl::DeepCopy<HostSpace, CudaSpace>( - &head, head_cuda, sizeof(SharedAllocationHeader)); - } - - RecordCuda *const record = - alloc_ptr ? static_cast<RecordCuda *>(head.m_record) : nullptr; - - if (!alloc_ptr || record->m_alloc_ptr != head_cuda) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , " - "void >::get_record ERROR")); - } - - return record; -} - -SharedAllocationRecord<Kokkos::CudaUVMSpace, void> *SharedAllocationRecord< - Kokkos::CudaUVMSpace, void>::get_record(void *alloc_ptr) { - using Header = SharedAllocationHeader; - using RecordCuda = SharedAllocationRecord<Kokkos::CudaUVMSpace, void>; - - Header *const h = - alloc_ptr ? reinterpret_cast<Header *>(alloc_ptr) - 1 : nullptr; - - if (!alloc_ptr || h->m_record->m_alloc_ptr != h) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::SharedAllocationRecord< " - "Kokkos::CudaUVMSpace , void >::get_record ERROR")); - } - - return static_cast<RecordCuda *>(h->m_record); -} - -SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void> - *SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::get_record( - void *alloc_ptr) { - using Header = SharedAllocationHeader; - using RecordCuda = SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>; - - Header *const h = - alloc_ptr ? reinterpret_cast<Header *>(alloc_ptr) - 1 : nullptr; - - if (!alloc_ptr || h->m_record->m_alloc_ptr != h) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::SharedAllocationRecord< " - "Kokkos::CudaHostPinnedSpace , void >::get_record ERROR")); - } - - return static_cast<RecordCuda *>(h->m_record); -} - -// </editor-fold> end SharedAllocationRecord::get_record() }}}1 -//============================================================================== - -//============================================================================== -// <editor-fold desc="SharedAllocationRecord::print_records()"> {{{1 - -// Iterate records to print orphaned memory ... -void SharedAllocationRecord<Kokkos::CudaSpace, void>::print_records( - std::ostream &s, const Kokkos::CudaSpace &, bool detail) { - (void)s; - (void)detail; -#ifdef KOKKOS_ENABLE_DEBUG - SharedAllocationRecord<void, void> *r = &s_root_record; - - char buffer[256]; - - SharedAllocationHeader head; - - if (detail) { - do { - if (r->m_alloc_ptr) { - Kokkos::Impl::DeepCopy<HostSpace, CudaSpace>( - &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); - } else { - head.m_label[0] = 0; - } - - // Formatting dependent on sizeof(uintptr_t) - const char *format_string; - - if (sizeof(uintptr_t) == sizeof(unsigned long)) { - format_string = - "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx " - "+ %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; - } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { - format_string = - "Cuda addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ " - "0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; - } - - snprintf(buffer, 256, format_string, reinterpret_cast<uintptr_t>(r), - reinterpret_cast<uintptr_t>(r->m_prev), - reinterpret_cast<uintptr_t>(r->m_next), - reinterpret_cast<uintptr_t>(r->m_alloc_ptr), r->m_alloc_size, - r->m_count, reinterpret_cast<uintptr_t>(r->m_dealloc), - head.m_label); - s << buffer; - r = r->m_next; - } while (r != &s_root_record); - } else { - do { - if (r->m_alloc_ptr) { - Kokkos::Impl::DeepCopy<HostSpace, CudaSpace>( - &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); - - // Formatting dependent on sizeof(uintptr_t) - const char *format_string; - - if (sizeof(uintptr_t) == sizeof(unsigned long)) { - format_string = "Cuda [ 0x%.12lx + %ld ] %s\n"; - } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { - format_string = "Cuda [ 0x%.12llx + %ld ] %s\n"; - } - - snprintf(buffer, 256, format_string, - reinterpret_cast<uintptr_t>(r->data()), r->size(), - head.m_label); - } else { - snprintf(buffer, 256, "Cuda [ 0 + 0 ]\n"); - } - s << buffer; - r = r->m_next; - } while (r != &s_root_record); - } -#else - Kokkos::Impl::throw_runtime_exception( - "SharedAllocationHeader<CudaSpace>::print_records only works with " - "KOKKOS_ENABLE_DEBUG enabled"); -#endif -} - -void SharedAllocationRecord<Kokkos::CudaUVMSpace, void>::print_records( - std::ostream &s, const Kokkos::CudaUVMSpace &, bool detail) { - (void)s; - (void)detail; -#ifdef KOKKOS_ENABLE_DEBUG - SharedAllocationRecord<void, void>::print_host_accessible_records( - s, "CudaUVM", &s_root_record, detail); -#else - Kokkos::Impl::throw_runtime_exception( - "SharedAllocationHeader<CudaSpace>::print_records only works with " - "KOKKOS_ENABLE_DEBUG enabled"); -#endif -} - -void SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>::print_records( - std::ostream &s, const Kokkos::CudaHostPinnedSpace &, bool detail) { - (void)s; - (void)detail; -#ifdef KOKKOS_ENABLE_DEBUG - SharedAllocationRecord<void, void>::print_host_accessible_records( - s, "CudaHostPinned", &s_root_record, detail); -#else - Kokkos::Impl::throw_runtime_exception( - "SharedAllocationHeader<CudaSpace>::print_records only works with " - "KOKKOS_ENABLE_DEBUG enabled"); -#endif -} - -// </editor-fold> end SharedAllocationRecord::print_records() }}}1 -//============================================================================== - void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes, bool to_device) { if ((ptr == nullptr) || (bytes == 0)) return; @@ -984,6 +600,29 @@ void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes, } // namespace Impl } // namespace Kokkos + +//============================================================================== +// <editor-fold desc="Explicit instantiations of CRTP Base classes"> {{{1 + +#include <impl/Kokkos_SharedAlloc_timpl.hpp> + +namespace Kokkos { +namespace Impl { + +// To avoid additional compilation cost for something that's (mostly?) not +// performance sensitive, we explicity instantiate these CRTP base classes here, +// where we have access to the associated *_timpl.hpp header files. +template class SharedAllocationRecordCommon<Kokkos::CudaSpace>; +template class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>; +template class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>; +template class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>; + +} // end namespace Impl +} // end namespace Kokkos + +// </editor-fold> end Explicit instantiations of CRTP Base classes }}}1 +//============================================================================== + #else void KOKKOS_CORE_SRC_CUDA_CUDASPACE_PREVENT_LINK_ERROR() {} #endif // KOKKOS_ENABLE_CUDA diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp index 0d6d3bdb3ac5389e894f01ad4edff845b63b7b53..0f4259072d97f26c0032e674bdf60b9031fcee11 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp @@ -140,7 +140,7 @@ inline int cuda_deduce_block_size(bool early_termination, } } - if (early_termination && blocks_per_sm != 0) break; + if (early_termination && opt_block_size != 0) break; } return opt_block_size; @@ -222,7 +222,8 @@ inline size_t get_shmem_per_sm_prefer_l1(cudaDeviceProp const& properties) { case 52: case 61: return 96; case 70: - case 80: return 8; + case 80: + case 86: return 8; case 75: return 32; default: Kokkos::Impl::throw_runtime_exception( diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp index a9a62380e5a4c26289bd96a08c3814ade0832cf1..ec9c434fe663900a5d5029896a5c98ce13266605 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp @@ -175,30 +175,42 @@ class half_t { return cast_from_half<unsigned long long>(*this); } + /** + * Conversion constructors. + * + * Support implicit conversions from impl_type, float, double -> half_t + * Mixed precision expressions require upcasting which is done in the + * "// Binary Arithmetic" operator overloads below. + * + * Support implicit conversions from integral types -> half_t. + * Expressions involving half_t with integral types require downcasting + * the integral types to half_t. Existing operator overloads can handle this + * with the addition of the below implicit conversion constructors. + */ KOKKOS_FUNCTION half_t(impl_type rhs) : val(rhs) {} KOKKOS_FUNCTION - explicit half_t(float rhs) : val(cast_to_half(rhs).val) {} + half_t(float rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(bool rhs) : val(cast_to_half(rhs).val) {} + half_t(double rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(double rhs) : val(cast_to_half(rhs).val) {} + explicit half_t(bool rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(short rhs) : val(cast_to_half(rhs).val) {} + half_t(short rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(int rhs) : val(cast_to_half(rhs).val) {} + half_t(int rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(long rhs) : val(cast_to_half(rhs).val) {} + half_t(long rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(long long rhs) : val(cast_to_half(rhs).val) {} + half_t(long long rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(unsigned short rhs) : val(cast_to_half(rhs).val) {} + half_t(unsigned short rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(unsigned int rhs) : val(cast_to_half(rhs).val) {} + half_t(unsigned int rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(unsigned long rhs) : val(cast_to_half(rhs).val) {} + half_t(unsigned long rhs) : val(cast_to_half(rhs).val) {} KOKKOS_FUNCTION - explicit half_t(unsigned long long rhs) : val(cast_to_half(rhs).val) {} + half_t(unsigned long long rhs) : val(cast_to_half(rhs).val) {} // Unary operators KOKKOS_FUNCTION @@ -243,7 +255,7 @@ class half_t { #else float tmp = __half2float(val); --tmp; - val = __float2half(tmp); + val = __float2half(tmp); #endif return *this; } @@ -276,88 +288,317 @@ class half_t { return *this; } + template <class T> + KOKKOS_FUNCTION void operator=(T rhs) volatile { + val = cast_to_half(rhs).val; + } + // Compound operators KOKKOS_FUNCTION half_t& operator+=(half_t rhs) { #ifdef __CUDA_ARCH__ val += rhs.val; #else - val = __float2half(__half2float(val) + __half2float(rhs.val)); + val = __float2half(__half2float(val) + __half2float(rhs.val)); #endif return *this; } + KOKKOS_FUNCTION + volatile half_t& operator+=(half_t rhs) volatile { +#ifdef __CUDA_ARCH__ + // Cuda 10 supports __half volatile stores but not volatile arithmetic + // operands. Cast away volatile-ness of val for arithmetic but not for store + // location. + val = const_cast<impl_type&>(val) + rhs.val; +#else + // Use non-volatile val_ref to suppress: + // "warning: implicit dereference will not access object of type โvolatile + // __halfโ in statement" + auto val_ref = const_cast<impl_type&>(val); + val_ref = __float2half(__half2float(const_cast<impl_type&>(val)) + + __half2float(rhs.val)); +#endif + return *this; + } + + // Compund operators: upcast overloads for += + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator+=(T& lhs, half_t rhs) { + lhs += static_cast<T>(rhs); + return lhs; + } + + KOKKOS_FUNCTION + half_t& operator+=(float rhs) { + float result = static_cast<float>(val) + rhs; + val = static_cast<impl_type>(result); + return *this; + } + + KOKKOS_FUNCTION + half_t& operator+=(double rhs) { + double result = static_cast<double>(val) + rhs; + val = static_cast<impl_type>(result); + return *this; + } + KOKKOS_FUNCTION half_t& operator-=(half_t rhs) { #ifdef __CUDA_ARCH__ val -= rhs.val; #else - val = __float2half(__half2float(val) - __half2float(rhs.val)); + val = __float2half(__half2float(val) - __half2float(rhs.val)); #endif return *this; } + KOKKOS_FUNCTION + volatile half_t& operator-=(half_t rhs) volatile { +#ifdef __CUDA_ARCH__ + // Cuda 10 supports __half volatile stores but not volatile arithmetic + // operands. Cast away volatile-ness of val for arithmetic but not for store + // location. + val = const_cast<impl_type&>(val) - rhs.val; +#else + // Use non-volatile val_ref to suppress: + // "warning: implicit dereference will not access object of type โvolatile + // __halfโ in statement" + auto val_ref = const_cast<impl_type&>(val); + val_ref = __float2half(__half2float(const_cast<impl_type&>(val)) - + __half2float(rhs.val)); +#endif + return *this; + } + + // Compund operators: upcast overloads for -= + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator-=(T& lhs, half_t rhs) { + lhs -= static_cast<T>(rhs); + return lhs; + } + + KOKKOS_FUNCTION + half_t& operator-=(float rhs) { + float result = static_cast<float>(val) - rhs; + val = static_cast<impl_type>(result); + return *this; + } + + KOKKOS_FUNCTION + half_t& operator-=(double rhs) { + double result = static_cast<double>(val) - rhs; + val = static_cast<impl_type>(result); + return *this; + } + KOKKOS_FUNCTION half_t& operator*=(half_t rhs) { #ifdef __CUDA_ARCH__ val *= rhs.val; #else - val = __float2half(__half2float(val) * __half2float(rhs.val)); + val = __float2half(__half2float(val) * __half2float(rhs.val)); #endif return *this; } + KOKKOS_FUNCTION + volatile half_t& operator*=(half_t rhs) volatile { +#ifdef __CUDA_ARCH__ + // Cuda 10 supports __half volatile stores but not volatile arithmetic + // operands. Cast away volatile-ness of val for arithmetic but not for store + // location. + val = const_cast<impl_type&>(val) * rhs.val; +#else + // Use non-volatile val_ref to suppress: + // "warning: implicit dereference will not access object of type โvolatile + // __halfโ in statement" + auto val_ref = const_cast<impl_type&>(val); + val_ref = __float2half(__half2float(const_cast<impl_type&>(val)) * + __half2float(rhs.val)); +#endif + return *this; + } + + // Compund operators: upcast overloads for *= + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator*=(T& lhs, half_t rhs) { + lhs *= static_cast<T>(rhs); + return lhs; + } + + KOKKOS_FUNCTION + half_t& operator*=(float rhs) { + float result = static_cast<float>(val) * rhs; + val = static_cast<impl_type>(result); + return *this; + } + + KOKKOS_FUNCTION + half_t& operator*=(double rhs) { + double result = static_cast<double>(val) * rhs; + val = static_cast<impl_type>(result); + return *this; + } + KOKKOS_FUNCTION half_t& operator/=(half_t rhs) { #ifdef __CUDA_ARCH__ val /= rhs.val; #else - val = __float2half(__half2float(val) / __half2float(rhs.val)); + val = __float2half(__half2float(val) / __half2float(rhs.val)); #endif return *this; } + KOKKOS_FUNCTION + volatile half_t& operator/=(half_t rhs) volatile { +#ifdef __CUDA_ARCH__ + // Cuda 10 supports __half volatile stores but not volatile arithmetic + // operands. Cast away volatile-ness of val for arithmetic but not for store + // location. + val = const_cast<impl_type&>(val) / rhs.val; +#else + // Use non-volatile val_ref to suppress: + // "warning: implicit dereference will not access object of type โvolatile + // __halfโ in statement" + auto val_ref = const_cast<impl_type&>(val); + val_ref = __float2half(__half2float(const_cast<impl_type&>(val)) / + __half2float(rhs.val)); +#endif + return *this; + } + + // Compund operators: upcast overloads for /= + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator/=(T& lhs, half_t rhs) { + lhs /= static_cast<T>(rhs); + return lhs; + } + + KOKKOS_FUNCTION + half_t& operator/=(float rhs) { + float result = static_cast<float>(val) / rhs; + val = static_cast<impl_type>(result); + return *this; + } + + KOKKOS_FUNCTION + half_t& operator/=(double rhs) { + double result = static_cast<double>(val) / rhs; + val = static_cast<impl_type>(result); + return *this; + } + // Binary Arithmetic KOKKOS_FUNCTION half_t friend operator+(half_t lhs, half_t rhs) { #ifdef __CUDA_ARCH__ lhs.val += rhs.val; #else - lhs.val = __float2half(__half2float(lhs.val) + __half2float(rhs.val)); + lhs.val = __float2half(__half2float(lhs.val) + __half2float(rhs.val)); #endif return lhs; } + // Binary Arithmetic upcast operators for + + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator+(half_t lhs, T rhs) { + return T(lhs) + rhs; + } + + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator+(T lhs, half_t rhs) { + return lhs + T(rhs); + } + KOKKOS_FUNCTION half_t friend operator-(half_t lhs, half_t rhs) { #ifdef __CUDA_ARCH__ lhs.val -= rhs.val; #else - lhs.val = __float2half(__half2float(lhs.val) - __half2float(rhs.val)); + lhs.val = __float2half(__half2float(lhs.val) - __half2float(rhs.val)); #endif return lhs; } + // Binary Arithmetic upcast operators for - + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator-(half_t lhs, T rhs) { + return T(lhs) - rhs; + } + + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator-(T lhs, half_t rhs) { + return lhs - T(rhs); + } + KOKKOS_FUNCTION half_t friend operator*(half_t lhs, half_t rhs) { #ifdef __CUDA_ARCH__ lhs.val *= rhs.val; #else - lhs.val = __float2half(__half2float(lhs.val) * __half2float(rhs.val)); + lhs.val = __float2half(__half2float(lhs.val) * __half2float(rhs.val)); #endif return lhs; } + // Binary Arithmetic upcast operators for * + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator*(half_t lhs, T rhs) { + return T(lhs) * rhs; + } + + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator*(T lhs, half_t rhs) { + return lhs * T(rhs); + } + KOKKOS_FUNCTION half_t friend operator/(half_t lhs, half_t rhs) { #ifdef __CUDA_ARCH__ lhs.val /= rhs.val; #else - lhs.val = __float2half(__half2float(lhs.val) / __half2float(rhs.val)); + lhs.val = __float2half(__half2float(lhs.val) / __half2float(rhs.val)); #endif return lhs; } + // Binary Arithmetic upcast operators for / + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator/(half_t lhs, T rhs) { + return T(lhs) / rhs; + } + + template <class T> + KOKKOS_FUNCTION std::enable_if_t< + std::is_same<T, float>::value || std::is_same<T, double>::value, T> friend + operator/(T lhs, half_t rhs) { + return lhs / T(rhs); + } + // Logical operators KOKKOS_FUNCTION bool operator!() const { diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp index b8e816345873ac756378ae13fee4db1fdf2dcaa6..016cb6cdcbdd37740613724bb99efb9b4c32d7d4 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -54,6 +54,7 @@ #include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp> #include <Cuda/Kokkos_Cuda_Instance.hpp> #include <Cuda/Kokkos_Cuda_Locks.hpp> +#include <Cuda/Kokkos_Cuda_UniqueToken.hpp> #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_Tools.hpp> @@ -248,11 +249,11 @@ void CudaInternal::print_configuration(std::ostream &s) const { const CudaInternalDevices &dev_info = CudaInternalDevices::singleton(); #if defined(KOKKOS_ENABLE_CUDA) - s << "macro KOKKOS_ENABLE_CUDA : defined" << std::endl; + s << "macro KOKKOS_ENABLE_CUDA : defined\n"; #endif #if defined(CUDA_VERSION) s << "macro CUDA_VERSION = " << CUDA_VERSION << " = version " - << CUDA_VERSION / 1000 << "." << (CUDA_VERSION % 1000) / 10 << std::endl; + << CUDA_VERSION / 1000 << "." << (CUDA_VERSION % 1000) / 10 << '\n'; #endif for (int i = 0; i < dev_info.m_cudaDevCount; ++i) { @@ -274,7 +275,6 @@ CudaInternal::~CudaInternal() { m_scratchConcurrentBitset) { std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()" << std::endl; - std::cerr.flush(); } m_cudaDev = -1; @@ -358,8 +358,7 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { if (m_cudaArch == 0) { std::stringstream ss; - ss << "Kokkos::Cuda::initialize ERROR: likely mismatch of architecture" - << std::endl; + ss << "Kokkos::Cuda::initialize ERROR: likely mismatch of architecture\n"; std::string msg = ss.str(); Kokkos::abort(msg.c_str()); } @@ -373,7 +372,7 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { "compute capability " << compiled_major << "." << compiled_minor << " on device with compute capability " << cudaProp.major << "." - << cudaProp.minor << " is not supported by CUDA!" << std::endl; + << cudaProp.minor << " is not supported by CUDA!\n"; std::string msg = ss.str(); Kokkos::abort(msg.c_str()); } @@ -458,7 +457,7 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>; Record *const r = - Record::allocate(Kokkos::CudaSpace(), "InternalScratchBitset", + Record::allocate(Kokkos::CudaSpace(), "Kokkos::InternalScratchBitset", sizeof(uint32_t) * buffer_bound); Record::increment(r); @@ -492,17 +491,11 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { #ifdef KOKKOS_ENABLE_CUDA_UVM if (Kokkos::show_warnings() && !cuda_launch_blocking()) { - std::cerr << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into " - "UVMSpace by default" - << std::endl; - std::cerr << " without setting " - "CUDA_LAUNCH_BLOCKING=1." - << std::endl; - std::cerr << " The code must call " - "Cuda().fence() after each kernel" - << std::endl; - std::cerr << " or will likely crash when " - "accessing data on the host." + std::cerr << R"warning( +Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default + without setting CUDA_LAUNCH_BLOCKING=1. + The code must call Cuda().fence() after each kernel + or will likely crash when accessing data on the host.)warning" << std::endl; } @@ -520,19 +513,13 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { if (Kokkos::show_warnings() && (!visible_devices_one && !force_device_alloc)) { - std::cerr << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into " - "UVMSpace by default" + std::cerr << R"warning( +Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default + without setting CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 or + setting CUDA_VISIBLE_DEVICES. + This could on multi GPU systems lead to severe performance" + penalties.)warning" << std::endl; - std::cerr << " without setting " - "CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 or " - << std::endl; - std::cerr - << " setting CUDA_VISIBLE_DEVICES." - << std::endl; - std::cerr << " This could on multi GPU " - "systems lead to severe performance" - << std::endl; - std::cerr << " penalties." << std::endl; } #endif @@ -575,7 +562,7 @@ Cuda::size_type *CudaInternal::scratch_flags(const Cuda::size_type size) const { if (m_scratchFlags) Record::decrement(Record::get_record(m_scratchFlags)); Record *const r = - Record::allocate(Kokkos::CudaSpace(), "InternalScratchFlags", + Record::allocate(Kokkos::CudaSpace(), "Kokkos::InternalScratchFlags", (sizeof(ScratchGrain) * m_scratchFlagsCount)); Record::increment(r); @@ -600,7 +587,7 @@ Cuda::size_type *CudaInternal::scratch_space(const Cuda::size_type size) const { if (m_scratchSpace) Record::decrement(Record::get_record(m_scratchSpace)); Record *const r = - Record::allocate(Kokkos::CudaSpace(), "InternalScratchSpace", + Record::allocate(Kokkos::CudaSpace(), "Kokkos::InternalScratchSpace", (sizeof(ScratchGrain) * m_scratchSpaceCount)); Record::increment(r); @@ -624,7 +611,7 @@ Cuda::size_type *CudaInternal::scratch_unified( Record::decrement(Record::get_record(m_scratchUnified)); Record *const r = Record::allocate( - Kokkos::CudaHostPinnedSpace(), "InternalScratchUnified", + Kokkos::CudaHostPinnedSpace(), "Kokkos::InternalScratchUnified", (sizeof(ScratchGrain) * m_scratchUnifiedCount)); Record::increment(r); @@ -646,8 +633,9 @@ Cuda::size_type *CudaInternal::scratch_functor( if (m_scratchFunctor) Record::decrement(Record::get_record(m_scratchFunctor)); - Record *const r = Record::allocate( - Kokkos::CudaSpace(), "InternalScratchFunctor", m_scratchFunctorSize); + Record *const r = + Record::allocate(Kokkos::CudaSpace(), "Kokkos::InternalScratchFunctor", + m_scratchFunctorSize); Record::increment(r); @@ -662,7 +650,7 @@ void *CudaInternal::resize_team_scratch_space(std::int64_t bytes, if (m_team_scratch_current_size == 0) { m_team_scratch_current_size = bytes; m_team_scratch_ptr = Kokkos::kokkos_malloc<Kokkos::CudaSpace>( - "CudaSpace::ScratchMemory", m_team_scratch_current_size); + "Kokkos::CudaSpace::TeamScratchMemory", m_team_scratch_current_size); } if ((bytes > m_team_scratch_current_size) || ((bytes < m_team_scratch_current_size) && (force_shrink))) { @@ -676,6 +664,9 @@ void *CudaInternal::resize_team_scratch_space(std::int64_t bytes, //---------------------------------------------------------------------------- void CudaInternal::finalize() { + // skip if finalize() has already been called + if (was_finalized) return; + was_finalized = true; if (nullptr != m_scratchSpace || nullptr != m_scratchFlags) { // Only finalize this if we're the singleton @@ -719,6 +710,11 @@ void CudaInternal::finalize() { if (this == &singleton()) { cudaFreeHost(constantMemHostStaging); cudaEventDestroy(constantMemReusable); + auto &deep_copy_space = + Kokkos::Impl::cuda_get_deep_copy_space(/*initialize*/ false); + if (deep_copy_space) + deep_copy_space->impl_internal_space_instance()->finalize(); + cudaStreamDestroy(cuda_get_deep_copy_stream()); } } @@ -821,62 +817,23 @@ Cuda::size_type Cuda::device_arch() { void Cuda::impl_finalize() { Impl::CudaInternal::singleton().finalize(); } Cuda::Cuda() - : m_space_instance(&Impl::CudaInternal::singleton()), m_counter(nullptr) { + : m_space_instance(&Impl::CudaInternal::singleton(), + [](Impl::CudaInternal *) {}) { Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor"); } Cuda::Cuda(cudaStream_t stream) - : m_space_instance(new Impl::CudaInternal), m_counter(new int(1)) { + : m_space_instance(new Impl::CudaInternal, [](Impl::CudaInternal *ptr) { + ptr->finalize(); + delete ptr; + }) { Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor"); m_space_instance->initialize(Impl::CudaInternal::singleton().m_cudaDev, stream); } -KOKKOS_FUNCTION Cuda::Cuda(Cuda &&other) noexcept { - m_space_instance = other.m_space_instance; - other.m_space_instance = nullptr; - m_counter = other.m_counter; - other.m_counter = nullptr; -} - -KOKKOS_FUNCTION Cuda::Cuda(const Cuda &other) - : m_space_instance(other.m_space_instance), m_counter(other.m_counter) { -#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA - if (m_counter) Kokkos::atomic_add(m_counter, 1); -#endif -} - -KOKKOS_FUNCTION Cuda &Cuda::operator=(Cuda &&other) noexcept { - m_space_instance = other.m_space_instance; - other.m_space_instance = nullptr; - m_counter = other.m_counter; - other.m_counter = nullptr; - return *this; -} - -KOKKOS_FUNCTION Cuda &Cuda::operator=(const Cuda &other) { - m_space_instance = other.m_space_instance; - m_counter = other.m_counter; -#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA - if (m_counter) Kokkos::atomic_add(m_counter, 1); -#endif - return *this; -} - -KOKKOS_FUNCTION Cuda::~Cuda() noexcept { -#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA - if (m_counter == nullptr) return; - int const count = Kokkos::atomic_fetch_sub(m_counter, 1); - if (count == 1) { - delete m_counter; - m_space_instance->finalize(); - delete m_space_instance; - } -#endif -} - void Cuda::print_configuration(std::ostream &s, const bool) { Impl::CudaInternal::singleton().print_configuration(s); } @@ -924,54 +881,53 @@ void CudaSpaceInitializer::fence() { Kokkos::Cuda::impl_static_fence(); } void CudaSpaceInitializer::print_configuration(std::ostream &msg, const bool detail) { - msg << "Device Execution Space:" << std::endl; - msg << " KOKKOS_ENABLE_CUDA: "; - msg << "yes" << std::endl; + msg << "Device Execution Space:\n"; + msg << " KOKKOS_ENABLE_CUDA: yes\n"; - msg << "Cuda Atomics:" << std::endl; + msg << "Cuda Atomics:\n"; msg << " KOKKOS_ENABLE_CUDA_ATOMICS: "; #ifdef KOKKOS_ENABLE_CUDA_ATOMICS - msg << "yes" << std::endl; + msg << "yes\n"; #else - msg << "no" << std::endl; + msg << "no\n"; #endif - msg << "Cuda Options:" << std::endl; + msg << "Cuda Options:\n"; msg << " KOKKOS_ENABLE_CUDA_LAMBDA: "; #ifdef KOKKOS_ENABLE_CUDA_LAMBDA - msg << "yes" << std::endl; + msg << "yes\n"; #else - msg << "no" << std::endl; + msg << "no\n"; #endif msg << " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC: "; #ifdef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC - msg << "yes" << std::endl; + msg << "yes\n"; #else - msg << "no" << std::endl; + msg << "no\n"; #endif msg << " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE: "; #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE - msg << "yes" << std::endl; + msg << "yes\n"; #else - msg << "no" << std::endl; + msg << "no\n"; #endif msg << " KOKKOS_ENABLE_CUDA_UVM: "; #ifdef KOKKOS_ENABLE_CUDA_UVM - msg << "yes" << std::endl; + msg << "yes\n"; #else - msg << "no" << std::endl; + msg << "no\n"; #endif msg << " KOKKOS_ENABLE_CUSPARSE: "; #ifdef KOKKOS_ENABLE_CUSPARSE - msg << "yes" << std::endl; + msg << "yes\n"; #else - msg << "no" << std::endl; + msg << "no\n"; #endif msg << " KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA: "; #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - msg << "yes" << std::endl; + msg << "yes\n"; #else - msg << "no" << std::endl; + msg << "no\n"; #endif msg << "\nCuda Runtime Configuration:" << std::endl; diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp index 13773d70c5a8d402d65833a0dbf198405975580f..aaec2c29260a5ad2b82e2daa653a58372253cd4d 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp @@ -17,30 +17,24 @@ namespace Kokkos { namespace Impl { struct CudaTraits { - enum : CudaSpace::size_type { WarpSize = 32 /* 0x0020 */ }; - enum : CudaSpace::size_type { - WarpIndexMask = 0x001f /* Mask for warpindex */ - }; - enum : CudaSpace::size_type { - WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ - }; - - enum : CudaSpace::size_type { - ConstantMemoryUsage = 0x008000 /* 32k bytes */ - }; - enum : CudaSpace::size_type { - ConstantMemoryCache = 0x002000 /* 8k bytes */ - }; - enum : CudaSpace::size_type { - KernelArgumentLimit = 0x001000 /* 4k bytes */ - }; - enum : CudaSpace::size_type { - MaxHierarchicalParallelism = 1024 /* team_size * vector_length */ - }; + static constexpr CudaSpace::size_type WarpSize = 32 /* 0x0020 */; + static constexpr CudaSpace::size_type WarpIndexMask = + 0x001f; /* Mask for warpindex */ + static constexpr CudaSpace::size_type WarpIndexShift = + 5; /* WarpSize == 1 << WarpShift */ + + static constexpr CudaSpace::size_type ConstantMemoryUsage = + 0x008000; /* 32k bytes */ + static constexpr CudaSpace::size_type ConstantMemoryCache = + 0x002000; /* 8k bytes */ + static constexpr CudaSpace::size_type KernelArgumentLimit = + 0x001000; /* 4k bytes */ + static constexpr CudaSpace::size_type MaxHierarchicalParallelism = + 1024; /* team_size * vector_length */ using ConstantGlobalBufferType = unsigned long[ConstantMemoryUsage / sizeof(unsigned long)]; - enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ }; + static constexpr int ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */; KOKKOS_INLINE_FUNCTION static CudaSpace::size_type warp_count( CudaSpace::size_type i) { diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp index 39404e0bf38f3867136edd93e9ed9d2e11ef0477..d892a893b330772ec5e4306ed20a44f8aa2369f1 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp @@ -158,6 +158,9 @@ inline void check_shmem_request(CudaInternal const* cuda_instance, int shmem) { } } +// This function needs to be template on DriverType and LaunchBounds +// so that the static bool is unique for each type combo +// KernelFuncPtr does not necessarily contain that type information. template <class DriverType, class LaunchBounds, class KernelFuncPtr> inline void configure_shmem_preference(KernelFuncPtr const& func, bool prefer_shmem) { @@ -355,8 +358,7 @@ struct CudaParallelLaunchKernelInvoker< if (!Impl::is_empty_launch(grid, block)) { Impl::check_shmem_request(cuda_instance, shmem); - Impl::configure_shmem_preference<DriverType, LaunchBounds, - decltype(base_t::get_kernel_func())>( + Impl::configure_shmem_preference<DriverType, LaunchBounds>( base_t::get_kernel_func(), prefer_shmem); void const* args[] = {&driver}; @@ -449,8 +451,7 @@ struct CudaParallelLaunchKernelInvoker< if (!Impl::is_empty_launch(grid, block)) { Impl::check_shmem_request(cuda_instance, shmem); - Impl::configure_shmem_preference<DriverType, LaunchBounds, - decltype(base_t::get_kernel_func())>( + Impl::configure_shmem_preference<DriverType, LaunchBounds>( base_t::get_kernel_func(), prefer_shmem); auto* driver_ptr = Impl::allocate_driver_storage_for_kernel(driver); @@ -627,9 +628,8 @@ struct CudaParallelLaunchImpl< get_cuda_func_attributes(), block, shmem, prefer_shmem); Impl::configure_shmem_preference< - DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>, - decltype(base_t::get_kernel_func())>(base_t::get_kernel_func(), - prefer_shmem); + DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>>( + base_t::get_kernel_func(), prefer_shmem); KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_MDRangePolicy.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_MDRangePolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..12b7f70a97495fca628580dda12b115cb5c25a12 --- /dev/null +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_MDRangePolicy.hpp @@ -0,0 +1,37 @@ +#ifndef KOKKOS_CUDA_MDRANGEPOLICY_HPP_ +#define KOKKOS_CUDA_MDRANGEPOLICY_HPP_ + +#include <KokkosExp_MDRangePolicy.hpp> + +namespace Kokkos { + +template <> +struct default_outer_direction<Kokkos::Cuda> { + using type = Iterate; + static constexpr Iterate value = Iterate::Left; +}; + +template <> +struct default_inner_direction<Kokkos::Cuda> { + using type = Iterate; + static constexpr Iterate value = Iterate::Left; +}; + +namespace Impl { + +// Settings for MDRangePolicy +template <> +inline TileSizeProperties get_tile_size_properties<Kokkos::Cuda>( + const Kokkos::Cuda& space) { + TileSizeProperties properties; + properties.max_threads = + space.impl_internal_space_instance()->m_maxThreadsPerSM; + properties.default_largest_tile_size = 16; + properties.default_tile_size = 2; + properties.max_total_tile_size = 512; + return properties; +} + +} // Namespace Impl +} // Namespace Kokkos +#endif diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index 131d180980965829968c0554a36ee282d5930ec1..2834e6f3de012b718ae06ebb6f87d7d24e3e5756 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -60,6 +60,7 @@ #include <Cuda/Kokkos_Cuda_ReduceScan.hpp> #include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp> #include <Cuda/Kokkos_Cuda_Locks.hpp> +#include <Cuda/Kokkos_Cuda_Team.hpp> #include <Kokkos_Vectorization.hpp> #include <Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp> @@ -67,6 +68,7 @@ #include <typeinfo> #include <KokkosExp_MDRangePolicy.hpp> +#include <impl/KokkosExp_IterateTileGPU.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -474,7 +476,7 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> { Policy const& get_policy() const { return m_policy; } - inline __device__ void operator()(void) const { + inline __device__ void operator()() const { const Member work_stride = blockDim.y * gridDim.x; const Member work_end = m_policy.end(); @@ -537,9 +539,23 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, Kokkos::Cuda> { const Policy m_rp; public: + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy& pol, const Functor&) { + cudaFuncAttributes attr = + CudaParallelLaunch<ParallelFor, + LaunchBounds>::get_cuda_func_attributes(); + auto const& prop = pol.space().cuda_device_prop(); + // Limits due to registers/SM, MDRange doesn't have + // shared memory constraints + int const regs_per_sm = prop.regsPerMultiprocessor; + int const regs_per_thread = attr.numRegs; + int const max_threads_per_sm = regs_per_sm / regs_per_thread; + return std::min( + max_threads_per_sm, + static_cast<int>(Kokkos::Impl::CudaTraits::MaxHierarchicalParallelism)); + } Policy const& get_policy() const { return m_rp; } - - inline __device__ void operator()(void) const { + inline __device__ void operator()() const { Kokkos::Impl::DeviceIterateTile<Policy::rank, Policy, FunctorType, typename Policy::work_tag>(m_rp, m_functor) .exec_range(); @@ -689,7 +705,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, public: Policy const& get_policy() const { return m_policy; } - __device__ inline void operator()(void) const { + __device__ inline void operator()() const { // Iterate this block through the league int64_t threadid = 0; if (m_scratch_size[1] > 0) { @@ -1248,8 +1264,21 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, using DummySHMEMReductionType = int; public: + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy& pol, const Functor&) { + cudaFuncAttributes attr = + CudaParallelLaunch<ParallelReduce, + LaunchBounds>::get_cuda_func_attributes(); + auto const& prop = pol.space().cuda_device_prop(); + // Limits due do registers/SM + int const regs_per_sm = prop.regsPerMultiprocessor; + int const regs_per_thread = attr.numRegs; + int const max_threads_per_sm = regs_per_sm / regs_per_thread; + return std::min( + max_threads_per_sm, + static_cast<int>(Kokkos::Impl::CudaTraits::MaxHierarchicalParallelism)); + } Policy const& get_policy() const { return m_policy; } - inline __device__ void exec_range(reference_type update) const { Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank, Policy, FunctorType, typename Policy::work_tag, @@ -1258,7 +1287,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, .exec_range(); } - inline __device__ void operator()(void) const { + inline __device__ void operator()() const { /* run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) ); } @@ -2074,7 +2103,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> { //---------------------------------------- - __device__ inline void initial(void) const { + __device__ inline void initial() const { const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / sizeof(size_type)> word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); @@ -2110,7 +2139,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> { //---------------------------------------- - __device__ inline void final(void) const { + __device__ inline void final() const { const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / sizeof(size_type)> word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); @@ -2195,7 +2224,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Cuda> { //---------------------------------------- - __device__ inline void operator()(void) const { + __device__ inline void operator()() const { #ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION if (m_run_serial) { typename ValueTraits::value_type value; @@ -2364,7 +2393,7 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, //---------------------------------------- - __device__ inline void initial(void) const { + __device__ inline void initial() const { const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / sizeof(size_type)> word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); @@ -2400,7 +2429,7 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, //---------------------------------------- - __device__ inline void final(void) const { + __device__ inline void final() const { const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / sizeof(size_type)> word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); @@ -2487,7 +2516,7 @@ class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, //---------------------------------------- - __device__ inline void operator()(void) const { + __device__ inline void operator()() const { #ifdef KOKKOS_IMPL_DEBUG_CUDA_SERIAL_EXECUTION if (m_run_serial) { typename ValueTraits::value_type value; diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp index 4b472f5d4fd8df7ae91a6ad04c3d3d2e15244196..e7806390155d46fd811a21432d9f9d268c457468 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp @@ -661,13 +661,14 @@ KOKKOS_INLINE_FUNCTION thread, count); } -template <typename iType> -KOKKOS_INLINE_FUNCTION - Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::CudaTeamMember> - ThreadVectorRange(const Impl::CudaTeamMember& thread, iType arg_begin, - iType arg_end) { +template <typename iType1, typename iType2> +KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, Impl::CudaTeamMember> +ThreadVectorRange(const Impl::CudaTeamMember& thread, iType1 arg_begin, + iType2 arg_end) { + using iType = typename std::common_type<iType1, iType2>::type; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::CudaTeamMember>( - thread, arg_begin, arg_end); + thread, iType(arg_begin), iType(arg_end)); } KOKKOS_INLINE_FUNCTION @@ -983,7 +984,7 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( //---------------------------------------------------------------------------- -/** \brief Intra-thread vector parallel exclusive prefix sum. +/** \brief Intra-thread vector parallel scan with reducer. * * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) * @@ -991,25 +992,25 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( * thread and a scan operation is performed. * The last call to closure has final == true. */ -template <typename iType, class Closure> -KOKKOS_INLINE_FUNCTION void parallel_scan( - const Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::CudaTeamMember>& - loop_boundaries, - const Closure& closure) { +template <typename iType, class Closure, typename ReducerType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type + parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::CudaTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { (void)loop_boundaries; (void)closure; + (void)reducer; #ifdef __CUDA_ARCH__ - // Extract value_type from closure - - using value_type = typename Kokkos::Impl::FunctorAnalysis< - Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure>::value_type; + using value_type = typename ReducerType::value_type; + value_type accum; + reducer.init(accum); + const value_type identity = accum; // Loop through boundaries by vector-length chunks // must scan at each iteration - value_type accum = 0; - // All thread "lanes" must loop the same number of times. // Determine an loop end for all thread "lanes." // Requires: @@ -1026,44 +1027,68 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( const int end = loop_boundaries.end + (rem ? blockDim.x - rem : 0); for (int i = threadIdx.x; i < end; i += blockDim.x) { - value_type val = 0; + value_type val = identity; - // First acquire per-lane contributions: - if (i < loop_boundaries.end) closure(i, val, false); + // First acquire per-lane contributions. + // This sets i's val to i-1's contribution + // to make the latter in_place_shfl_up an + // exclusive scan -- the final accumulation + // of i's val will be included in the second + // closure call later. + if (i < loop_boundaries.end && threadIdx.x > 0) closure(i - 1, val, false); - value_type sval = val; - - // Bottom up inclusive scan in triangular pattern + // Bottom up exclusive scan in triangular pattern // where each CUDA thread is the root of a reduction tree // from the zeroth "lane" to itself. // [t] += [t-1] if t >= 1 // [t] += [t-2] if t >= 2 // [t] += [t-4] if t >= 4 // ... - + // This differs from the non-reducer overload, where an inclusive scan was + // implemented, because in general the binary operator cannot be inverted + // and we would not be able to remove the inclusive contribution by + // inversion. for (int j = 1; j < (int)blockDim.x; j <<= 1) { - value_type tmp = 0; - Impl::in_place_shfl_up(tmp, sval, j, blockDim.x, active_mask); + value_type tmp = identity; + Impl::in_place_shfl_up(tmp, val, j, blockDim.x, active_mask); if (j <= (int)threadIdx.x) { - sval += tmp; + reducer.join(val, tmp); } } - // Include accumulation and remove value for exclusive scan: - val = accum + sval - val; + // Include accumulation + reducer.join(val, accum); - // Provide exclusive scan value: + // Update i's contribution into the val + // and add it to accum for next round if (i < loop_boundaries.end) closure(i, val, true); - - // Accumulate the last value in the inclusive scan: - Impl::in_place_shfl(sval, sval, mask, blockDim.x, active_mask); - - accum += sval; + Impl::in_place_shfl(accum, val, mask, blockDim.x, active_mask); } #endif } +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel exclusive prefix sum. + * + * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes in the + * thread and a scan operation is performed. + * The last call to closure has final == true. + */ +template <typename iType, class Closure> +KOKKOS_INLINE_FUNCTION void parallel_scan( + const Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::CudaTeamMember>& + loop_boundaries, + const Closure& closure) { + using value_type = typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure>::value_type; + value_type dummy; + parallel_scan(loop_boundaries, closure, Kokkos::Sum<value_type>(dummy)); +} + } // namespace Kokkos namespace Kokkos { diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index f24abb377dae3102dd26341d5a733ddfd3281a1a..c55956ede9665bc3005fa570d7ac120404a54d49 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -139,7 +139,7 @@ struct CudaLDGFetch { template <typename iType> KOKKOS_INLINE_FUNCTION ValueType operator[](const iType& i) const { -#ifdef __CUDA_ARCH__ +#if defined(__CUDA_ARCH__) && (350 <= _CUDA_ARCH__) AliasType v = __ldg(reinterpret_cast<const AliasType*>(&m_ptr[i])); return *(reinterpret_cast<ValueType*>(&v)); #else diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp index 05876a9f0226687c30b8f334c77dc65c1ca4e780..fc52e415145218afa2c495e9f055e051e9921305 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp @@ -46,6 +46,7 @@ #define KOKKOS_CUDA_WORKGRAPHPOLICY_HPP #include <Kokkos_Cuda.hpp> +#include <Cuda/Kokkos_Cuda_KernelLaunch.hpp> namespace Kokkos { namespace Impl { diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp index 89135b6c45b9483af071e6d921583b8954f93ae5..9278d1bdc9efcc2a76183085c974afef41413e3c 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp @@ -75,17 +75,6 @@ void hipOccupancy(int *numBlocks, int blockSize, int sharedmem) { hipOccupancy<DriverType, constant, HIPTraits::MaxThreadsPerBlock, 1>( numBlocks, blockSize, sharedmem); } -template <typename DriverType, typename LaunchBounds, bool Large> -struct HIPGetMaxBlockSize; - -template <typename DriverType, typename LaunchBounds> -int hip_get_max_block_size(typename DriverType::functor_type const &f, - size_t const vector_length, - size_t const shmem_extra_block, - size_t const shmem_extra_thread) { - return HIPGetMaxBlockSize<DriverType, LaunchBounds, true>::get_block_size( - f, vector_length, shmem_extra_block, shmem_extra_thread); -} template <class FunctorType, class LaunchBounds, typename F> int hip_internal_get_block_size(const F &condition_check, @@ -131,10 +120,6 @@ int hip_internal_get_block_size(const F &condition_check, int opt_block_size = (blocks_per_sm >= min_blocks_per_sm) ? block_size : min_blocks_per_sm; int opt_threads_per_sm = threads_per_sm; - // printf("BlockSizeMax: %i Shmem: %i %i %i %i Regs: %i %i Blocks: %i %i - // Achieved: %i %i Opt: %i %i\n",block_size, - // shmem_per_sm,max_shmem_per_block,functor_shmem,total_shmem, - // regs_per_sm,regs_per_wavefront,max_blocks_shmem,max_blocks_regs,blocks_per_sm,threads_per_sm,opt_block_size,opt_threads_per_sm); block_size -= HIPTraits::WarpSize; while (condition_check(blocks_per_sm) && (block_size >= HIPTraits::WarpSize)) { @@ -160,10 +145,6 @@ int hip_internal_get_block_size(const F &condition_check, opt_threads_per_sm = threads_per_sm; } } - // printf("BlockSizeMax: %i Shmem: %i %i %i %i Regs: %i %i Blocks: %i %i - // Achieved: %i %i Opt: %i %i\n",block_size, - // shmem_per_sm,max_shmem_per_block,functor_shmem,total_shmem, - // regs_per_sm,regs_per_wavefront,max_blocks_shmem,max_blocks_regs,blocks_per_sm,threads_per_sm,opt_block_size,opt_threads_per_sm); block_size -= HIPTraits::WarpSize; } return opt_block_size; @@ -178,62 +159,6 @@ int hip_get_max_block_size(const HIPInternal *hip_instance, [](int x) { return x == 0; }, hip_instance, attr, f, vector_length, shmem_block, shmem_thread); } -template <typename DriverType, class LaunchBounds> -struct HIPGetMaxBlockSize<DriverType, LaunchBounds, true> { - static int get_block_size(typename DriverType::functor_type const &f, - size_t const vector_length, - size_t const shmem_extra_block, - size_t const shmem_extra_thread) { - int numBlocks = 0; - int blockSize = LaunchBounds::maxTperB == 0 ? 1024 : LaunchBounds::maxTperB; - int sharedmem = - shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + - ::Kokkos::Impl::FunctorTeamShmemSize< - typename DriverType::functor_type>::value(f, blockSize / - vector_length); - - hipOccupancy<DriverType, true>(&numBlocks, blockSize, sharedmem); - - if (numBlocks > 0) return blockSize; - while (blockSize > HIPTraits::WarpSize && numBlocks == 0) { - blockSize /= 2; - sharedmem = - shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + - ::Kokkos::Impl::FunctorTeamShmemSize< - typename DriverType::functor_type>::value(f, blockSize / - vector_length); - - hipOccupancy<DriverType, true>(&numBlocks, blockSize, sharedmem); - } - int blockSizeUpperBound = blockSize * 2; - while (blockSize < blockSizeUpperBound && numBlocks > 0) { - blockSize += HIPTraits::WarpSize; - sharedmem = - shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + - ::Kokkos::Impl::FunctorTeamShmemSize< - typename DriverType::functor_type>::value(f, blockSize / - vector_length); - - hipOccupancy<DriverType, true>(&numBlocks, blockSize, sharedmem); - } - return blockSize - HIPTraits::WarpSize; - } -}; - -template <typename DriverType, typename LaunchBounds, bool Large> -struct HIPGetOptBlockSize; - -template <typename DriverType, typename LaunchBounds> -int hip_get_opt_block_size(typename DriverType::functor_type const &f, - size_t const vector_length, - size_t const shmem_extra_block, - size_t const shmem_extra_thread) { - return HIPGetOptBlockSize< - DriverType, LaunchBounds, - (HIPTraits::ConstantMemoryUseThreshold < - sizeof(DriverType))>::get_block_size(f, vector_length, shmem_extra_block, - shmem_extra_thread); -} template <typename FunctorType, typename LaunchBounds> int hip_get_opt_block_size(HIPInternal const *hip_instance, @@ -245,157 +170,6 @@ int hip_get_opt_block_size(HIPInternal const *hip_instance, shmem_block, shmem_thread); } -// FIXME_HIP the code is identical to the false struct except for -// hip_parallel_launch_constant_memory -template <typename DriverType> -struct HIPGetOptBlockSize<DriverType, Kokkos::LaunchBounds<0, 0>, true> { - static int get_block_size(typename DriverType::functor_type const &f, - size_t const vector_length, - size_t const shmem_extra_block, - size_t const shmem_extra_thread) { - int blockSize = HIPTraits::WarpSize / 2; - int numBlocks; - int sharedmem; - int maxOccupancy = 0; - int bestBlockSize = 0; - - while (blockSize < HIPTraits::MaxThreadsPerBlock) { - blockSize *= 2; - - // calculate the occupancy with that optBlockSize and check whether its - // larger than the largest one found so far - sharedmem = - shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + - ::Kokkos::Impl::FunctorTeamShmemSize< - typename DriverType::functor_type>::value(f, blockSize / - vector_length); - hipOccupancy<DriverType, true>(&numBlocks, blockSize, sharedmem); - if (maxOccupancy < numBlocks * blockSize) { - maxOccupancy = numBlocks * blockSize; - bestBlockSize = blockSize; - } - } - return bestBlockSize; - } -}; - -template <typename DriverType> -struct HIPGetOptBlockSize<DriverType, Kokkos::LaunchBounds<0, 0>, false> { - static int get_block_size(const typename DriverType::functor_type &f, - const size_t vector_length, - const size_t shmem_extra_block, - const size_t shmem_extra_thread) { - int blockSize = HIPTraits::WarpSize / 2; - int numBlocks; - int sharedmem; - int maxOccupancy = 0; - int bestBlockSize = 0; - - while (blockSize < HIPTraits::MaxThreadsPerBlock) { - blockSize *= 2; - sharedmem = - shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + - ::Kokkos::Impl::FunctorTeamShmemSize< - typename DriverType::functor_type>::value(f, blockSize / - vector_length); - - hipOccupancy<DriverType, false>(&numBlocks, blockSize, sharedmem); - - if (maxOccupancy < numBlocks * blockSize) { - maxOccupancy = numBlocks * blockSize; - bestBlockSize = blockSize; - } - } - return bestBlockSize; - } -}; - -// FIXME_HIP the code is identical to the false struct except for -// hip_parallel_launch_constant_memory -template <typename DriverType, unsigned int MaxThreadsPerBlock, - unsigned int MinBlocksPerSM> -struct HIPGetOptBlockSize< - DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>, - true> { - static int get_block_size(const typename DriverType::functor_type &f, - const size_t vector_length, - const size_t shmem_extra_block, - const size_t shmem_extra_thread) { - int blockSize = HIPTraits::WarpSize / 2; - int numBlocks; - int sharedmem; - int maxOccupancy = 0; - int bestBlockSize = 0; - int max_threads_per_block = - std::min(MaxThreadsPerBlock, - hip_internal_maximum_warp_count() * HIPTraits::WarpSize); - - while (blockSize < max_threads_per_block) { - blockSize *= 2; - - // calculate the occupancy with that optBlockSize and check whether its - // larger than the largest one found so far - sharedmem = - shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + - ::Kokkos::Impl::FunctorTeamShmemSize< - typename DriverType::functor_type>::value(f, blockSize / - vector_length); - hipOccupancy<DriverType, true, MaxThreadsPerBlock, MinBlocksPerSM>( - &numBlocks, blockSize, sharedmem); - if (numBlocks >= static_cast<int>(MinBlocksPerSM) && - blockSize <= static_cast<int>(MaxThreadsPerBlock)) { - if (maxOccupancy < numBlocks * blockSize) { - maxOccupancy = numBlocks * blockSize; - bestBlockSize = blockSize; - } - } - } - if (maxOccupancy > 0) return bestBlockSize; - return -1; - } -}; - -template <typename DriverType, unsigned int MaxThreadsPerBlock, - unsigned int MinBlocksPerSM> -struct HIPGetOptBlockSize< - DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>, - false> { - static int get_block_size(const typename DriverType::functor_type &f, - const size_t vector_length, - const size_t shmem_extra_block, - const size_t shmem_extra_thread) { - int blockSize = HIPTraits::WarpSize / 2; - int numBlocks; - int sharedmem; - int maxOccupancy = 0; - int bestBlockSize = 0; - int max_threads_per_block = - std::min(MaxThreadsPerBlock, - hip_internal_maximum_warp_count() * HIPTraits::WarpSize); - - while (blockSize < max_threads_per_block) { - blockSize *= 2; - sharedmem = - shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + - ::Kokkos::Impl::FunctorTeamShmemSize< - typename DriverType::functor_type>::value(f, blockSize / - vector_length); - - hipOccupancy<DriverType, false, MaxThreadsPerBlock, MinBlocksPerSM>( - &numBlocks, blockSize, sharedmem); - if (numBlocks >= int(MinBlocksPerSM) && - blockSize <= int(MaxThreadsPerBlock)) { - if (maxOccupancy < numBlocks * blockSize) { - maxOccupancy = numBlocks * blockSize; - bestBlockSize = blockSize; - } - } - } - if (maxOccupancy > 0) return bestBlockSize; - return -1; - } -}; - } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp index 45512038acafee993aaf50d752ade2763279c45a..18ef10e22cd39b30118f78882a3ce747c19b9901 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp @@ -164,6 +164,8 @@ HIPInternal &HIPInternal::singleton() { void HIPInternal::fence() const { HIP_SAFE_CALL(hipStreamSynchronize(m_stream)); + // can reset our cycle id now as well + m_cycleId = 0; } void HIPInternal::initialize(int hip_device_id, hipStream_t stream) { @@ -256,7 +258,7 @@ void HIPInternal::initialize(int hip_device_id, hipStream_t stream) { void>; Record *const r = Record::allocate(Kokkos::Experimental::HIPSpace(), - "InternalScratchBitset", + "Kokkos::InternalScratchBitset", sizeof(uint32_t) * buffer_bound); Record::increment(r); @@ -303,8 +305,10 @@ Kokkos::Experimental::HIP::size_type *HIPInternal::scratch_space( Kokkos::Impl::SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>; - static Record *const r = Record::allocate( - Kokkos::Experimental::HIPSpace(), "InternalScratchSpace", + if (m_scratchSpace) Record::decrement(Record::get_record(m_scratchSpace)); + + Record *const r = Record::allocate( + Kokkos::Experimental::HIPSpace(), "Kokkos::InternalScratchSpace", (sizeScratchGrain * m_scratchSpaceCount)); Record::increment(r); @@ -325,8 +329,10 @@ Kokkos::Experimental::HIP::size_type *HIPInternal::scratch_flags( Kokkos::Impl::SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>; + if (m_scratchFlags) Record::decrement(Record::get_record(m_scratchFlags)); + Record *const r = Record::allocate( - Kokkos::Experimental::HIPSpace(), "InternalScratchFlags", + Kokkos::Experimental::HIPSpace(), "Kokkos::InternalScratchFlags", (sizeScratchGrain * m_scratchFlagsCount)); Record::increment(r); @@ -345,7 +351,7 @@ void *HIPInternal::resize_team_scratch_space(std::int64_t bytes, if (m_team_scratch_current_size == 0) { m_team_scratch_current_size = bytes; m_team_scratch_ptr = Kokkos::kokkos_malloc<Kokkos::Experimental::HIPSpace>( - "HIPSpace::ScratchMemory", m_team_scratch_current_size); + "Kokkos::HIPSpace::TeamScratchMemory", m_team_scratch_current_size); } if ((bytes > m_team_scratch_current_size) || ((bytes < m_team_scratch_current_size) && (force_shrink))) { @@ -388,6 +394,40 @@ void HIPInternal::finalize() { m_team_scratch_current_size = 0; m_team_scratch_ptr = nullptr; } + if (nullptr != d_driverWorkArray) { + HIP_SAFE_CALL(hipHostFree(d_driverWorkArray)); + d_driverWorkArray = nullptr; + } +} + +char *HIPInternal::get_next_driver(size_t driverTypeSize) const { + std::lock_guard<std::mutex> const lock(m_mutexWorkArray); + if (d_driverWorkArray == nullptr) { + HIP_SAFE_CALL( + hipHostMalloc(&d_driverWorkArray, + m_maxDriverCycles * m_maxDriverTypeSize * sizeof(char), + hipHostMallocNonCoherent)); + } + if (driverTypeSize > m_maxDriverTypeSize) { + // fence handles the cycle id reset for us + fence(); + HIP_SAFE_CALL(hipHostFree(d_driverWorkArray)); + m_maxDriverTypeSize = driverTypeSize; + if (m_maxDriverTypeSize % 128 != 0) + m_maxDriverTypeSize = + m_maxDriverTypeSize + 128 - m_maxDriverTypeSize % 128; + HIP_SAFE_CALL( + hipHostMalloc(&d_driverWorkArray, + m_maxDriverCycles * m_maxDriverTypeSize * sizeof(char), + hipHostMallocNonCoherent)); + } else { + m_cycleId = (m_cycleId + 1) % m_maxDriverCycles; + if (m_cycleId == 0) { + // ensure any outstanding kernels are completed before we wrap around + fence(); + } + } + return &d_driverWorkArray[m_maxDriverTypeSize * m_cycleId]; } //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp index 07ec8625e6932647c0601fa8423354e25522321f..f4f88628e313a2d22d23a09e4ce25630d242a566 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp @@ -49,6 +49,8 @@ #include <Kokkos_HIP_Space.hpp> +#include <mutex> + namespace Kokkos { namespace Experimental { namespace Impl { @@ -83,33 +85,46 @@ class HIPInternal { public: using size_type = ::Kokkos::Experimental::HIP::size_type; - int m_hipDev; - int m_hipArch; - unsigned m_multiProcCount; - unsigned m_maxWarpCount; - unsigned m_maxBlock; - unsigned m_maxBlocksPerSM; - unsigned m_maxSharedWords; + int m_hipDev = -1; + int m_hipArch = -1; + unsigned m_multiProcCount = 0; + unsigned m_maxWarpCount = 0; + unsigned m_maxBlock = 0; + unsigned m_maxBlocksPerSM = 0; + unsigned m_maxSharedWords = 0; int m_regsPerSM; - int m_shmemPerSM; - int m_maxShmemPerBlock; - int m_maxThreadsPerSM; + int m_shmemPerSM = 0; + int m_maxShmemPerBlock = 0; + int m_maxThreadsPerSM = 0; + + // array of DriverTypes to be allocated in host-pinned memory for async + // kernel launches + mutable char *d_driverWorkArray = nullptr; + // number of kernel launches that can be in-flight w/o synchronization + const int m_maxDriverCycles = 100; + // max size of a DriverType [bytes] + mutable size_t m_maxDriverTypeSize = 1024 * 10; + // the current index in the driverWorkArray + mutable int m_cycleId = 0; + // mutex to access d_driverWorkArray + mutable std::mutex m_mutexWorkArray; // Scratch Spaces for Reductions - size_type m_scratchSpaceCount; - size_type m_scratchFlagsCount; + size_type m_scratchSpaceCount = 0; + size_type m_scratchFlagsCount = 0; - size_type *m_scratchSpace; - size_type *m_scratchFlags; + size_type *m_scratchSpace = nullptr; + size_type *m_scratchFlags = nullptr; uint32_t *m_scratchConcurrentBitset = nullptr; hipDeviceProp_t m_deviceProp; - hipStream_t m_stream; + hipStream_t m_stream = nullptr; // Team Scratch Level 1 Space - mutable int64_t m_team_scratch_current_size; - mutable void *m_team_scratch_ptr; + mutable int64_t m_team_scratch_current_size = 0; + mutable void *m_team_scratch_ptr = nullptr; + mutable std::mutex m_team_scratch_mutex; bool was_finalized = false; @@ -117,9 +132,7 @@ class HIPInternal { int verify_is_initialized(const char *const label) const; - int is_initialized() const { - return m_hipDev >= 0; - } // 0 != m_scratchSpace && 0 != m_scratchFlags ; } + int is_initialized() const { return m_hipDev >= 0; } void initialize(int hip_device_id, hipStream_t stream = nullptr); void finalize(); @@ -128,25 +141,12 @@ class HIPInternal { void fence() const; + // returns the next driver type pointer in our work array + char *get_next_driver(size_t driverTypeSize) const; + ~HIPInternal(); - HIPInternal() - : m_hipDev(-1), - m_hipArch(-1), - m_multiProcCount(0), - m_maxWarpCount(0), - m_maxBlock(0), - m_maxSharedWords(0), - m_shmemPerSM(0), - m_maxShmemPerBlock(0), - m_maxThreadsPerSM(0), - m_scratchSpaceCount(0), - m_scratchFlagsCount(0), - m_scratchSpace(nullptr), - m_scratchFlags(nullptr), - m_stream(nullptr), - m_team_scratch_current_size(0), - m_team_scratch_ptr(nullptr) {} + HIPInternal() = default; // Resizing of reduction related scratch spaces size_type *scratch_space(const size_type size); diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp index 3e972c7346b839abc0efa69533236a29f97ed3d4..f774423b378b0753a98c9e4df512b599910028dd 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp @@ -49,9 +49,9 @@ #if defined(__HIPCC__) -#include <Kokkos_HIP_Space.hpp> #include <HIP/Kokkos_HIP_Error.hpp> #include <HIP/Kokkos_HIP_Instance.hpp> +#include <Kokkos_HIP_Space.hpp> // Must use global variable on the device with HIP-Clang #ifdef __HIP__ @@ -127,93 +127,87 @@ struct HIPDispatchProperties { HIPLaunchMechanism launch_mechanism = l; }; -template <class DriverType, class LaunchBounds = Kokkos::LaunchBounds<>, - HIPLaunchMechanism LaunchMechanism = HIPLaunchMechanism::LocalMemory> -struct HIPParallelLaunch; +template <typename DriverType, typename LaunchBounds, + HIPLaunchMechanism LaunchMechanism> +struct HIPParallelLaunchKernelFunc; -template <class DriverType, unsigned int MaxThreadsPerBlock, +template <typename DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM> -struct HIPParallelLaunch< +struct HIPParallelLaunchKernelFunc< DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>, HIPLaunchMechanism::LocalMemory> { - inline HIPParallelLaunch(const DriverType &driver, const dim3 &grid, - const dim3 &block, const int shmem, - const HIPInternal *hip_instance, - const bool /*prefer_shmem*/) { - if ((grid.x != 0) && ((block.x * block.y * block.z) != 0)) { - if (hip_instance->m_maxShmemPerBlock < shmem) { - Kokkos::Impl::throw_runtime_exception( - "HIPParallelLaunch FAILED: shared memory request is too large"); - } - - KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE(); - - // FIXME_HIP -- there is currently an error copying (some) structs - // by value to the device in HIP-Clang / VDI - // As a workaround, we can malloc the DriverType and explictly copy over. - // To remove once solved in HIP - DriverType *d_driver; - HIP_SAFE_CALL(hipMalloc(&d_driver, sizeof(DriverType))); - HIP_SAFE_CALL(hipMemcpyAsync(d_driver, &driver, sizeof(DriverType), - hipMemcpyHostToDevice, - hip_instance->m_stream)); - hip_parallel_launch_local_memory<DriverType, MaxThreadsPerBlock, - MinBlocksPerSM> - <<<grid, block, shmem, hip_instance->m_stream>>>(d_driver); + static auto get_kernel_func() { + return hip_parallel_launch_local_memory<DriverType, MaxThreadsPerBlock, + MinBlocksPerSM>; + } +}; -#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) - HIP_SAFE_CALL(hipGetLastError()); - hip_instance->fence(); -#endif - HIP_SAFE_CALL(hipFree(d_driver)); - } +template <typename DriverType> +struct HIPParallelLaunchKernelFunc<DriverType, Kokkos::LaunchBounds<0, 0>, + HIPLaunchMechanism::LocalMemory> { + static auto get_kernel_func() { + return hip_parallel_launch_local_memory<DriverType, 1024, 1>; } +}; - static hipFuncAttributes get_hip_func_attributes() { - static hipFuncAttributes attr = []() { - hipFuncAttributes attr; - HIP_SAFE_CALL(hipFuncGetAttributes( - &attr, - reinterpret_cast<void const *>( - hip_parallel_launch_local_memory<DriverType, MaxThreadsPerBlock, - MinBlocksPerSM>))); - return attr; - }(); - return attr; +template <typename DriverType, typename LaunchBounds, + HIPLaunchMechanism LaunchMechanism> +struct HIPParallelLaunchKernelInvoker; + +template <typename DriverType, typename LaunchBounds> +struct HIPParallelLaunchKernelInvoker<DriverType, LaunchBounds, + HIPLaunchMechanism::LocalMemory> + : HIPParallelLaunchKernelFunc<DriverType, LaunchBounds, + HIPLaunchMechanism::LocalMemory> { + using base_t = HIPParallelLaunchKernelFunc<DriverType, LaunchBounds, + HIPLaunchMechanism::LocalMemory>; + + static void invoke_kernel(DriverType const *driver, dim3 const &grid, + dim3 const &block, int shmem, + HIPInternal const *hip_instance) { + (base_t::get_kernel_func())<<<grid, block, shmem, hip_instance->m_stream>>>( + driver); } }; -template <class DriverType> -struct HIPParallelLaunch<DriverType, Kokkos::LaunchBounds<0, 0>, - HIPLaunchMechanism::LocalMemory> { - inline HIPParallelLaunch(const DriverType &driver, const dim3 &grid, - const dim3 &block, const int shmem, - const HIPInternal *hip_instance, - const bool /*prefer_shmem*/) { +template <typename DriverType, typename LaunchBounds = Kokkos::LaunchBounds<>, + HIPLaunchMechanism LaunchMechanism = HIPLaunchMechanism::LocalMemory> +struct HIPParallelLaunch; + +template <typename DriverType, unsigned int MaxThreadsPerBlock, + unsigned int MinBlocksPerSM> +struct HIPParallelLaunch< + DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>, + HIPLaunchMechanism::LocalMemory> + : HIPParallelLaunchKernelInvoker< + DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>, + HIPLaunchMechanism::LocalMemory> { + using base_t = HIPParallelLaunchKernelInvoker< + DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>, + HIPLaunchMechanism::LocalMemory>; + + HIPParallelLaunch(const DriverType &driver, const dim3 &grid, + const dim3 &block, const int shmem, + const HIPInternal *hip_instance, + const bool /*prefer_shmem*/) { if ((grid.x != 0) && ((block.x * block.y * block.z) != 0)) { if (hip_instance->m_maxShmemPerBlock < shmem) { - Kokkos::Impl::throw_runtime_exception(std::string( - "HIPParallelLaunch FAILED: shared memory request is too large")); + Kokkos::Impl::throw_runtime_exception( + "HIPParallelLaunch FAILED: shared memory request is too large"); } KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE(); // Invoke the driver function on the device - - // FIXME_HIP -- see note about struct copy by value above - DriverType *d_driver; - HIP_SAFE_CALL(hipMalloc(&d_driver, sizeof(DriverType))); - HIP_SAFE_CALL(hipMemcpyAsync(d_driver, &driver, sizeof(DriverType), - hipMemcpyHostToDevice, - hip_instance->m_stream)); - hip_parallel_launch_local_memory<DriverType, 1024, 1> - <<<grid, block, shmem, hip_instance->m_stream>>>(d_driver); + DriverType *d_driver = reinterpret_cast<DriverType *>( + hip_instance->get_next_driver(sizeof(DriverType))); + std::memcpy((void *)d_driver, (void *)&driver, sizeof(DriverType)); + base_t::invoke_kernel(d_driver, grid, block, shmem, hip_instance); #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) HIP_SAFE_CALL(hipGetLastError()); hip_instance->fence(); #endif - HIP_SAFE_CALL(hipFree(d_driver)); } } @@ -221,8 +215,7 @@ struct HIPParallelLaunch<DriverType, Kokkos::LaunchBounds<0, 0>, static hipFuncAttributes attr = []() { hipFuncAttributes attr; HIP_SAFE_CALL(hipFuncGetAttributes( - &attr, reinterpret_cast<void const *>( - hip_parallel_launch_local_memory<DriverType, 1024, 1>))); + &attr, reinterpret_cast<void const *>(base_t::get_kernel_func()))); return attr; }(); return attr; diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_MDRangePolicy.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_MDRangePolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ce1aff9586d25911104d17d53860409f3e73b10b --- /dev/null +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_MDRangePolicy.hpp @@ -0,0 +1,37 @@ +#ifndef KOKKOS_HIP_MDRANGEPOLICY_HPP_ +#define KOKKOS_HIP_MDRANGEPOLICY_HPP_ + +#include <KokkosExp_MDRangePolicy.hpp> + +namespace Kokkos { + +template <> +struct default_outer_direction<Kokkos::Experimental::HIP> { + using type = Iterate; + static constexpr Iterate value = Iterate::Left; +}; + +template <> +struct default_inner_direction<Kokkos::Experimental::HIP> { + using type = Iterate; + static constexpr Iterate value = Iterate::Left; +}; + +namespace Impl { + +// Settings for MDRangePolicy +template <> +inline TileSizeProperties get_tile_size_properties<Kokkos::Experimental::HIP>( + const Kokkos::Experimental::HIP& space) { + TileSizeProperties properties; + properties.max_threads = + space.impl_internal_space_instance()->m_maxThreadsPerSM; + properties.default_largest_tile_size = 16; + properties.default_tile_size = 4; + properties.max_total_tile_size = 1024; + return properties; +} + +} // Namespace Impl +} // Namespace Kokkos +#endif diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp index 6b831ff7a3dd82d9d8a54ccc6f6f759548f5a65f..35e7d6fb853ae9e4f245e0fe0c2a71f4f2d4d6c2 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp @@ -49,6 +49,7 @@ #include <HIP/Kokkos_HIP_KernelLaunch.hpp> #include <HIP/Kokkos_HIP_ReduceScan.hpp> #include <KokkosExp_MDRangePolicy.hpp> +#include <impl/KokkosExp_IterateTileGPU.hpp> #include <Kokkos_Parallel.hpp> namespace Kokkos { @@ -72,7 +73,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, ParallelFor& operator=(ParallelFor const&) = delete; public: - inline __device__ void operator()(void) const { + inline __device__ void operator()() const { Kokkos::Impl::DeviceIterateTile<Policy::rank, Policy, FunctorType, typename Policy::work_tag>(m_policy, m_functor) @@ -175,6 +176,25 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy) : m_functor(arg_functor), m_policy(arg_policy) {} + + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy& pol, const Functor&) { + using closure_type = + ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, + Kokkos::Experimental::HIP>; + hipFuncAttributes attr = Kokkos::Experimental::Impl::HIPParallelLaunch< + closure_type, LaunchBounds>::get_hip_func_attributes(); + auto const& prop = pol.space().hip_device_prop(); + // Limits due to registers/SM, MDRange doesn't have + // shared memory constraints + int const regs_per_sm = prop.regsPerMultiprocessor; + int const regs_per_thread = attr.numRegs; + int const max_threads_per_sm = regs_per_sm / regs_per_thread; + return std::min( + max_threads_per_sm, + static_cast<int>( + Kokkos::Experimental::Impl::HIPTraits::MaxThreadsPerBlock)); + } }; // ParallelReduce @@ -231,7 +251,7 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, DeviceIteratePattern(m_policy, m_functor, update).exec_range(); } - inline __device__ void operator()(void) const { + inline __device__ void operator()() const { const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / sizeof(size_type)> word_count(ValueTraits::value_size( @@ -291,13 +311,19 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, ::Kokkos::Experimental::Impl::HIPTraits::MaxThreadsPerBlock; int shmem_size = ::Kokkos::Impl::hip_single_inter_block_reduce_scan_shmem< false, FunctorType, WorkTag>(f, n); + using closure_type = Impl::ParallelReduce<FunctorType, Policy, ReducerType>; + hipFuncAttributes attr = ::Kokkos::Experimental::Impl::HIPParallelLaunch< + closure_type, LaunchBounds>::get_hip_func_attributes(); while ( (n && (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size)) || - (n > static_cast<unsigned>( - ::Kokkos::Experimental::Impl::hip_get_max_block_size< - ParallelReduce, LaunchBounds>(f, 1, shmem_size, 0)))) { + (n > + static_cast<unsigned>( + ::Kokkos::Experimental::Impl::hip_get_max_block_size<FunctorType, + LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, f, 1, + shmem_size, 0)))) { n >>= 1; shmem_size = ::Kokkos::Impl::hip_single_inter_block_reduce_scan_shmem< false, FunctorType, WorkTag>(f, n); @@ -391,6 +417,23 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, memory_space>::accessible), m_scratch_space(nullptr), m_scratch_flags(nullptr) {} + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy& pol, const Functor&) { + using closure_type = + ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, + ReducerType, Kokkos::Experimental::HIP>; + hipFuncAttributes attr = Kokkos::Experimental::Impl::HIPParallelLaunch< + closure_type, LaunchBounds>::get_hip_func_attributes(); + auto const& prop = pol.space().hip_device_prop(); + // Limits due do registers/SM + int const regs_per_sm = prop.regsPerMultiprocessor; + int const regs_per_thread = attr.numRegs; + int const max_threads_per_sm = regs_per_sm / regs_per_thread; + return std::min( + max_threads_per_sm, + static_cast<int>( + Kokkos::Experimental::Impl::HIPTraits::MaxThreadsPerBlock)); + } }; } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp index 5607f1c91a5da80cfe6111f28476dc8610e30160..7d2825eeb4c6be1d060d1e8d7c3eb67097729ccf 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp @@ -92,7 +92,7 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, public: using functor_type = FunctorType; - inline __device__ void operator()(void) const { + inline __device__ void operator()() const { const Member work_stride = blockDim.y * gridDim.x; const Member work_end = m_policy.end(); @@ -174,11 +174,14 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, size_type* m_scratch_space = nullptr; size_type* m_scratch_flags = nullptr; - // FIXME_HIP_PERFORMANCE Need a rule to choose when to use shared memory and - // when to use shuffle +#if HIP_VERSION < 401 static bool constexpr UseShflReduction = ((sizeof(value_type) > 2 * sizeof(double)) && static_cast<bool>(ValueTraits::StaticValueSize)); +#else + static bool constexpr UseShflReduction = + static_cast<bool>(ValueTraits::StaticValueSize); +#endif private: struct ShflReductionTag {}; @@ -330,13 +333,19 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, int shmem_size = hip_single_inter_block_reduce_scan_shmem<false, FunctorType, WorkTag>( f, n); + using closure_type = Impl::ParallelReduce<FunctorType, Policy, ReducerType>; + hipFuncAttributes attr = ::Kokkos::Experimental::Impl::HIPParallelLaunch< + closure_type, LaunchBounds>::get_hip_func_attributes(); while ( (n && (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size)) || - (n > static_cast<unsigned int>( - Kokkos::Experimental::Impl::hip_get_max_block_size< - ParallelReduce, LaunchBounds>(f, 1, shmem_size, 0)))) { + (n > + static_cast<unsigned int>( + ::Kokkos::Experimental::Impl::hip_get_max_block_size<FunctorType, + LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, f, 1, + shmem_size, 0)))) { n >>= 1; shmem_size = hip_single_inter_block_reduce_scan_shmem<false, FunctorType, WorkTag>( @@ -493,7 +502,7 @@ class ParallelScanHIPBase { //---------------------------------------- - __device__ inline void initial(void) const { + __device__ inline void initial() const { const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / sizeof(size_type)> word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); @@ -529,7 +538,7 @@ class ParallelScanHIPBase { //---------------------------------------- - __device__ inline void final(void) const { + __device__ inline void final() const { const integral_nonzero_constant<size_type, ValueTraits::StaticValueSize / sizeof(size_type)> word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); @@ -606,7 +615,7 @@ class ParallelScanHIPBase { public: //---------------------------------------- - __device__ inline void operator()(void) const { + __device__ inline void operator()() const { if (!m_final) { initial(); } else { diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp index 5da83d289e2f1fa0c30dbddd3e9dd8d47c571af1..96c3ff2a751027a4eb05b03c99487207c9acf708 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp @@ -433,6 +433,9 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, int m_shmem_size; void* m_scratch_ptr[2]; int m_scratch_size[2]; + // Only let one ParallelFor/Reduce modify the team scratch memory. The + // constructor acquires the mutex which is released in the destructor. + std::unique_lock<std::mutex> m_scratch_lock; template <typename TagType> __device__ inline @@ -449,7 +452,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, } public: - __device__ inline void operator()(void) const { + __device__ inline void operator()() const { // Iterate this block through the league int64_t threadid = 0; if (m_scratch_size[1] > 0) { @@ -513,7 +516,10 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, m_policy(arg_policy), m_league_size(arg_policy.league_size()), m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()) { + m_vector_size(arg_policy.impl_vector_length()), + m_scratch_lock(m_policy.space() + .impl_internal_space_instance() + ->m_team_scratch_mutex) { hipFuncAttributes attr = ::Kokkos::Experimental::Impl::HIPParallelLaunch< ParallelFor, launch_bounds>::get_hip_func_attributes(); m_team_size = @@ -640,6 +646,9 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, const size_type m_league_size; int m_team_size; const size_type m_vector_size; + // Only let one ParallelFor/Reduce modify the team scratch memory. The + // constructor acquires the mutex which is released in the destructor. + std::unique_lock<std::mutex> m_scratch_lock; template <class TagType> __device__ inline @@ -877,7 +886,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, m_scratch_ptr{nullptr, nullptr}, m_league_size(arg_policy.league_size()), m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()) { + m_vector_size(arg_policy.impl_vector_length()), + m_scratch_lock(m_policy.space() + .impl_internal_space_instance() + ->m_team_scratch_mutex) { hipFuncAttributes attr = Kokkos::Experimental::Impl::HIPParallelLaunch< ParallelReduce, launch_bounds>::get_hip_func_attributes(); m_team_size = @@ -976,7 +988,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, m_scratch_ptr{nullptr, nullptr}, m_league_size(arg_policy.league_size()), m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()) { + m_vector_size(arg_policy.impl_vector_length()), + m_scratch_lock(m_policy.space() + .impl_internal_space_instance() + ->m_team_scratch_mutex) { hipFuncAttributes attr = Kokkos::Experimental::Impl::HIPParallelLaunch< ParallelReduce, launch_bounds>::get_hip_func_attributes(); m_team_size = diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp index 00cef28f826d05befc14925f9b58bbf095a097c0..15ca089d14740b6a2c42c69945a17a0c7bfa1bcc 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp @@ -42,12 +42,6 @@ //@HEADER */ -#include <stdlib.h> -#include <iostream> -#include <sstream> -#include <stdexcept> -#include <algorithm> -#include <atomic> #include <Kokkos_Macros.hpp> #include <Kokkos_Core.hpp> @@ -57,6 +51,13 @@ #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_MemorySpace.hpp> +#include <stdlib.h> +#include <iostream> +#include <sstream> +#include <stdexcept> +#include <algorithm> +#include <atomic> + /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ namespace Kokkos { @@ -172,14 +173,14 @@ void DeepCopyAsyncHIP(void* dst, void const* src, size_t n) { namespace Kokkos { -void Experimental::HIPSpace::access_error() { +KOKKOS_DEPRECATED void Experimental::HIPSpace::access_error() { const std::string msg( "Kokkos::Experimental::HIPSpace::access_error attempt to execute " "Experimental::HIP function from non-HIP space"); Kokkos::Impl::throw_runtime_exception(msg); } -void Experimental::HIPSpace::access_error(const void* const) { +KOKKOS_DEPRECATED void Experimental::HIPSpace::access_error(const void* const) { const std::string msg( "Kokkos::Experimental::HIPSpace::access_error attempt to execute " "Experimental::HIP function from non-HIP space"); @@ -326,45 +327,6 @@ SharedAllocationRecord<void, void> SharedAllocationRecord< Kokkos::Experimental::HIPHostPinnedSpace, void>::s_root_record; #endif -std::string SharedAllocationRecord<Kokkos::Experimental::HIPSpace, - void>::get_label() const { - SharedAllocationHeader header; - - Kokkos::Impl::DeepCopy<Kokkos::HostSpace, Kokkos::Experimental::HIPSpace>( - &header, RecordBase::head(), sizeof(SharedAllocationHeader)); - - return std::string(header.m_label); -} - -std::string SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, - void>::get_label() const { - return std::string(RecordBase::head()->m_label); -} - -SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>* -SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>::allocate( - const Kokkos::Experimental::HIPSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size) { - return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); -} - -SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>* -SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>:: - allocate(const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size) { - return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); -} - -void SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>::deallocate( - SharedAllocationRecord<void, void>* arg_rec) { - delete static_cast<SharedAllocationRecord*>(arg_rec); -} - -void SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>:: - deallocate(SharedAllocationRecord<void, void>* arg_rec) { - delete static_cast<SharedAllocationRecord*>(arg_rec); -} - SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>::~SharedAllocationRecord() { const char* label = nullptr; @@ -393,7 +355,7 @@ SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>:: const SharedAllocationRecord<void, void>::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord<void, void>( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>::s_root_record, @@ -405,13 +367,7 @@ SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>:: SharedAllocationHeader header; - // Fill in the Header information - header.m_record = static_cast<SharedAllocationRecord<void, void>*>(this); - - strncpy(header.m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); - // Set last element zero, in case c_str is too long - header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; + this->base_t::_fill_host_accessible_header_info(header, arg_label); // Copy to device memory Kokkos::Impl::DeepCopy<Kokkos::Experimental::HIPSpace, HostSpace>( @@ -425,7 +381,7 @@ SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>:: const SharedAllocationRecord<void, void>::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord<void, void>( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>::s_root_record, @@ -435,223 +391,8 @@ SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>:: sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc), m_space(arg_space) { // Fill in the Header information, directly accessible via host pinned memory - - RecordBase::m_alloc_ptr->m_record = this; - - strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); - // Set last element zero, in case c_str is too long - RecordBase::m_alloc_ptr - ->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; -} - -//---------------------------------------------------------------------------- - -void* SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>:: - allocate_tracked(const Kokkos::Experimental::HIPSpace& arg_space, - const std::string& arg_alloc_label, - const size_t arg_alloc_size) { - if (!arg_alloc_size) return nullptr; - - SharedAllocationRecord* const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - - RecordBase::increment(r); - - return r->data(); -} - -void SharedAllocationRecord<Kokkos::Experimental::HIPSpace, - void>::deallocate_tracked(void* const - arg_alloc_ptr) { - if (arg_alloc_ptr != nullptr) { - SharedAllocationRecord* const r = get_record(arg_alloc_ptr); - - RecordBase::decrement(r); - } -} - -void* SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>:: - reallocate_tracked(void* const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord* const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord* const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - Kokkos::Impl::DeepCopy<Kokkos::Experimental::HIPSpace, - Kokkos::Experimental::HIPSpace>( - r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); -} - -void* SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>:: - allocate_tracked(const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, - const std::string& arg_alloc_label, - const size_t arg_alloc_size) { - if (!arg_alloc_size) return nullptr; - - SharedAllocationRecord* const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - - RecordBase::increment(r); - - return r->data(); -} - -void SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, - void>::deallocate_tracked(void* const - arg_alloc_ptr) { - if (arg_alloc_ptr) { - SharedAllocationRecord* const r = get_record(arg_alloc_ptr); - - RecordBase::decrement(r); - } -} - -void* SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>:: - reallocate_tracked(void* const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord* const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord* const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - using HIPHostPinnedSpace = Kokkos::Experimental::HIPHostPinnedSpace; - Kokkos::Impl::DeepCopy<HIPHostPinnedSpace, HIPHostPinnedSpace>( - r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); -} - -//---------------------------------------------------------------------------- - -SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>* -SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>::get_record( - void* alloc_ptr) { - using Header = SharedAllocationHeader; - using RecordHIP = - SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>; - - // Copy the header from the allocation - Header head; - - Header const* const head_hip = - alloc_ptr ? Header::get_header(alloc_ptr) : nullptr; - - if (alloc_ptr) { - Kokkos::Impl::DeepCopy<HostSpace, Kokkos::Experimental::HIPSpace>( - &head, head_hip, sizeof(SharedAllocationHeader)); - } - - RecordHIP* const record = - alloc_ptr ? static_cast<RecordHIP*>(head.m_record) : nullptr; - - if (!alloc_ptr || record->m_alloc_ptr != head_hip) { - Kokkos::Impl::throw_runtime_exception(std::string( - "Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HIPSpace " - ", void >::get_record ERROR")); - } - - return record; -} - -SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>* -SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, - void>::get_record(void* alloc_ptr) { - using Header = SharedAllocationHeader; - using RecordHIP = - SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void>; - - Header* const h = - alloc_ptr ? reinterpret_cast<Header*>(alloc_ptr) - 1 : nullptr; - - if (!alloc_ptr || h->m_record->m_alloc_ptr != h) { - Kokkos::Impl::throw_runtime_exception(std::string( - "Kokkos::Impl::SharedAllocationRecord< " - "Kokkos::Experimental::HIPHostPinnedSpace , void >::get_record ERROR")); - } - - return static_cast<RecordHIP*>(h->m_record); -} - -// Iterate records to print orphaned memory ... -void SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>:: - print_records(std::ostream& s, const Kokkos::Experimental::HIPSpace&, - bool detail) { -#ifdef KOKKOS_ENABLE_DEBUG - SharedAllocationRecord<void, void>* r = &s_root_record; - - char buffer[256]; - - SharedAllocationHeader head; - - if (detail) { - do { - if (r->m_alloc_ptr) { - Kokkos::Impl::DeepCopy<HostSpace, Kokkos::Experimental::HIPSpace>( - &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); - } else { - head.m_label[0] = 0; - } - - // Formatting dependent on sizeof(uintptr_t) - const char* format_string; - - if (sizeof(uintptr_t) == sizeof(unsigned long)) { - format_string = - "HIP addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + " - "%.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; - } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { - format_string = - "HIP addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ " - "0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; - } - - snprintf(buffer, 256, format_string, reinterpret_cast<uintptr_t>(r), - reinterpret_cast<uintptr_t>(r->m_prev), - reinterpret_cast<uintptr_t>(r->m_next), - reinterpret_cast<uintptr_t>(r->m_alloc_ptr), r->m_alloc_size, - r->m_count, reinterpret_cast<uintptr_t>(r->m_dealloc), - head.m_label); - s << buffer; - r = r->m_next; - } while (r != &s_root_record); - } else { - do { - if (r->m_alloc_ptr) { - Kokkos::Impl::DeepCopy<HostSpace, Kokkos::Experimental::HIPSpace>( - &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); - - // Formatting dependent on sizeof(uintptr_t) - const char* format_string; - - if (sizeof(uintptr_t) == sizeof(unsigned long)) { - format_string = "HIP [ 0x%.12lx + %ld ] %s\n"; - } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { - format_string = "HIP [ 0x%.12llx + %ld ] %s\n"; - } - - snprintf(buffer, 256, format_string, - reinterpret_cast<uintptr_t>(r->data()), r->size(), - head.m_label); - } else { - snprintf(buffer, 256, "HIP [ 0 + 0 ]\n"); - } - s << buffer; - r = r->m_next; - } while (r != &s_root_record); - } -#else - (void)s; - (void)detail; - throw_runtime_exception( - "Kokkos::Impl::SharedAllocationRecord<HIPSpace>::print_records" - " only works with KOKKOS_ENABLE_DEBUG enabled"); -#endif + this->base_t::_fill_host_accessible_header_info(*RecordBase::m_alloc_ptr, + arg_label); } } // namespace Impl @@ -680,63 +421,22 @@ void HIP::impl_initialize(const HIP::SelectDevice config) { void HIP::impl_finalize() { Impl::HIPInternal::singleton().finalize(); } HIP::HIP() - : m_space_instance(&Impl::HIPInternal::singleton()), m_counter(nullptr) { + : m_space_instance(&Impl::HIPInternal::singleton(), + [](Impl::HIPInternal*) {}) { Impl::HIPInternal::singleton().verify_is_initialized( "HIP instance constructor"); } HIP::HIP(hipStream_t const stream) - : m_space_instance(new Impl::HIPInternal), m_counter(new int(1)) { + : m_space_instance(new Impl::HIPInternal, [](Impl::HIPInternal* ptr) { + ptr->finalize(); + delete ptr; + }) { Impl::HIPInternal::singleton().verify_is_initialized( "HIP instance constructor"); m_space_instance->initialize(Impl::HIPInternal::singleton().m_hipDev, stream); } -KOKKOS_FUNCTION HIP::HIP(HIP&& other) noexcept { - m_space_instance = other.m_space_instance; - other.m_space_instance = nullptr; - m_counter = other.m_counter; - other.m_counter = nullptr; -} - -KOKKOS_FUNCTION HIP::HIP(HIP const& other) - : m_space_instance(other.m_space_instance), m_counter(other.m_counter) { -#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU - if (m_counter) Kokkos::atomic_add(m_counter, 1); -#endif -} - -KOKKOS_FUNCTION HIP& HIP::operator=(HIP&& other) noexcept { - m_space_instance = other.m_space_instance; - other.m_space_instance = nullptr; - m_counter = other.m_counter; - other.m_counter = nullptr; - - return *this; -} - -KOKKOS_FUNCTION HIP& HIP::operator=(HIP const& other) { - m_space_instance = other.m_space_instance; - m_counter = other.m_counter; -#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU - if (m_counter) Kokkos::atomic_add(m_counter, 1); -#endif - - return *this; -} - -KOKKOS_FUNCTION HIP::~HIP() noexcept { -#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU - if (m_counter == nullptr) return; - int const count = Kokkos::atomic_fetch_sub(m_counter, 1); - if (count == 1) { - delete m_counter; - m_space_instance->finalize(); - delete m_space_instance; - } -#endif -} - void HIP::print_configuration(std::ostream& s, const bool) { Impl::HIPInternal::singleton().print_configuration(s); } @@ -810,3 +510,26 @@ void HIPSpaceInitializer::print_configuration(std::ostream& msg, } // namespace Impl } // namespace Kokkos + +//============================================================================== +// <editor-fold desc="Explicit instantiations of CRTP Base classes"> {{{1 + +#include <impl/Kokkos_SharedAlloc_timpl.hpp> + +namespace Kokkos { +namespace Impl { + +// To avoid additional compilation cost for something that's (mostly?) not +// performance sensitive, we explicity instantiate these CRTP base classes here, +// where we have access to the associated *_timpl.hpp header files. +template class HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::HIPSpace>; +template class SharedAllocationRecordCommon<Kokkos::Experimental::HIPSpace>; +template class SharedAllocationRecordCommon< + Kokkos::Experimental::HIPHostPinnedSpace>; + +} // end namespace Impl +} // end namespace Kokkos + +// </editor-fold> end Explicit instantiations of CRTP Base classes }}}1 +//============================================================================== diff --git a/packages/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp b/packages/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp index 7571510c31fa6d082017150120cf6ef67e83a321..fe52886ced7c7a72454f9e731b3b5b4778f90073 100644 --- a/packages/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp +++ b/packages/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp @@ -644,13 +644,14 @@ KOKKOS_INLINE_FUNCTION thread, count); } -template <typename iType> -KOKKOS_INLINE_FUNCTION - Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HIPTeamMember> - ThreadVectorRange(const Impl::HIPTeamMember& thread, iType arg_begin, - iType arg_end) { +template <typename iType1, typename iType2> +KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, Impl::HIPTeamMember> +ThreadVectorRange(const Impl::HIPTeamMember& thread, iType1 arg_begin, + iType2 arg_end) { + using iType = typename std::common_type<iType1, iType2>::type; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HIPTeamMember>( - thread, arg_begin, arg_end); + thread, iType(arg_begin), iType(arg_end)); } KOKKOS_INLINE_FUNCTION @@ -961,7 +962,7 @@ KOKKOS_INLINE_FUNCTION //---------------------------------------------------------------------------- -/** \brief Intra-thread vector parallel exclusive prefix sum. +/** \brief Intra-thread vector parallel scan with reducer. * * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) * @@ -969,22 +970,21 @@ KOKKOS_INLINE_FUNCTION * thread and a scan operation is performed. * The last call to closure has final == true. */ -template <typename iType, class Closure> -KOKKOS_INLINE_FUNCTION void parallel_scan( - const Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HIPTeamMember>& - loop_boundaries, - const Closure& closure) { +template <typename iType, class Closure, typename ReducerType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type + parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { #ifdef __HIP_DEVICE_COMPILE__ - // Extract value_type from closure - - using value_type = typename Kokkos::Impl::FunctorAnalysis< - Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure>::value_type; + using value_type = typename ReducerType::value_type; + value_type accum; + reducer.init(accum); + const value_type identity = accum; // Loop through boundaries by vector-length chunks // must scan at each iteration - value_type accum = 0; - // All thread "lanes" must loop the same number of times. // Determine an loop end for all thread "lanes." // Requires: @@ -997,47 +997,72 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( const int end = loop_boundaries.end + (rem ? blockDim.x - rem : 0); for (int i = threadIdx.x; i < end; i += blockDim.x) { - value_type val = 0; - - // First acquire per-lane contributions: - if (i < loop_boundaries.end) closure(i, val, false); + value_type val = identity; - value_type sval = val; + // First acquire per-lane contributions. + // This sets i's val to i-1's contribution + // to make the latter in_place_shfl_up an + // exclusive scan -- the final accumulation + // of i's val will be included in the second + // closure call later. + if (i < loop_boundaries.end && threadIdx.x > 0) closure(i - 1, val, false); - // Bottom up inclusive scan in triangular pattern + // Bottom up exclusive scan in triangular pattern // where each HIP thread is the root of a reduction tree // from the zeroth "lane" to itself. // [t] += [t-1] if t >= 1 // [t] += [t-2] if t >= 2 // [t] += [t-4] if t >= 4 // ... - + // This differs from the non-reducer overload, where an inclusive scan was + // implemented, because in general the binary operator cannot be inverted + // and we would not be able to remove the inclusive contribution by + // inversion. for (int j = 1; j < static_cast<int>(blockDim.x); j <<= 1) { - value_type tmp = 0; - ::Kokkos::Experimental::Impl::in_place_shfl_up(tmp, sval, j, blockDim.x); + value_type tmp = identity; + ::Kokkos::Experimental::Impl::in_place_shfl_up(tmp, val, j, blockDim.x); if (j <= static_cast<int>(threadIdx.x)) { - sval += tmp; + reducer.join(val, tmp); } } - // Include accumulation and remove value for exclusive scan: - val = accum + sval - val; + // Include accumulation + reducer.join(val, accum); - // Provide exclusive scan value: + // Update i's contribution into the val + // and add it to accum for next round if (i < loop_boundaries.end) closure(i, val, true); - - // Accumulate the last value in the inclusive scan: - ::Kokkos::Experimental::Impl::in_place_shfl(sval, sval, blockDim.x - 1, + ::Kokkos::Experimental::Impl::in_place_shfl(accum, val, blockDim.x - 1, blockDim.x); - - accum += sval; } #else (void)loop_boundaries; (void)closure; + (void)reducer; #endif } +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel exclusive prefix sum. + * + * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes in the + * thread and a scan operation is performed. + * The last call to closure has final == true. + */ +template <typename iType, class Closure> +KOKKOS_INLINE_FUNCTION void parallel_scan( + const Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HIPTeamMember>& + loop_boundaries, + const Closure& closure) { + using value_type = typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure>::value_type; + value_type dummy; + parallel_scan(loop_boundaries, closure, Kokkos::Sum<value_type>(dummy)); +} + } // namespace Kokkos namespace Kokkos { diff --git a/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp index 140376425c2910c2d50a73d68f4fee27e57ee8cf..b7d8e62f696073bfa4794b362401aaca288de021 100644 --- a/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp +++ b/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -48,17 +48,11 @@ #include <initializer_list> #include <Kokkos_Layout.hpp> - +#include <Kokkos_Array.hpp> #include <impl/KokkosExp_Host_IterateTile.hpp> #include <Kokkos_ExecPolicy.hpp> -#include <Kokkos_Parallel.hpp> #include <type_traits> -#if defined(KOKKOS_ENABLE_CUDA) || \ - (defined(__HIPCC__) && defined(KOKKOS_ENABLE_HIP)) -#include <impl/KokkosExp_IterateTileGPU.hpp> -#endif - namespace Kokkos { // ------------------------------------------------------------------ // @@ -74,22 +68,14 @@ enum class Iterate template <typename ExecSpace> struct default_outer_direction { - using type = Iterate; -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) - static constexpr Iterate value = Iterate::Left; -#else + using type = Iterate; static constexpr Iterate value = Iterate::Right; -#endif }; template <typename ExecSpace> struct default_inner_direction { - using type = Iterate; -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) - static constexpr Iterate value = Iterate::Left; -#else + using type = Iterate; static constexpr Iterate value = Iterate::Right; -#endif }; // Iteration Pattern @@ -179,6 +165,25 @@ constexpr NVCC_WONT_LET_ME_CALL_YOU_Array to_array_potentially_narrowing( } return a; } + +struct TileSizeProperties { + int max_threads; + int default_largest_tile_size; + int default_tile_size; + int max_total_tile_size; +}; + +template <typename ExecutionSpace> +TileSizeProperties get_tile_size_properties(const ExecutionSpace&) { + // Host settings + TileSizeProperties properties; + properties.max_threads = std::numeric_limits<int>::max(); + properties.default_largest_tile_size = 0; + properties.default_tile_size = 2; + properties.max_total_tile_size = std::numeric_limits<int>::max(); + return properties; +} + } // namespace Impl // multi-dimensional iteration pattern @@ -208,7 +213,7 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits<Properties...> { using launch_bounds = typename traits::launch_bounds; using member_type = typename range_policy::member_type; - enum { rank = static_cast<int>(iteration_pattern::rank) }; + static constexpr int rank = iteration_pattern::rank; using index_type = typename traits::index_type; using array_index_type = std::int64_t; @@ -231,37 +236,20 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits<Properties...> { point_type m_tile_end = {}; index_type m_num_tiles = 1; index_type m_prod_tile_dims = 1; + bool m_tune_tile_size = false; - /* - // NDE enum impl definition alternative - replace static constexpr int ? - enum { outer_direction = static_cast<int> ( - (iteration_pattern::outer_direction != Iterate::Default) - ? iteration_pattern::outer_direction - : default_outer_direction< typename traits::execution_space>::value ) }; - - enum { inner_direction = static_cast<int> ( - iteration_pattern::inner_direction != Iterate::Default - ? iteration_pattern::inner_direction - : default_inner_direction< typename traits::execution_space>::value ) }; - - enum { Right = static_cast<int>( Iterate::Right ) }; - enum { Left = static_cast<int>( Iterate::Left ) }; - */ - // static constexpr int rank = iteration_pattern::rank; - - static constexpr int outer_direction = static_cast<int>( + static constexpr auto outer_direction = (iteration_pattern::outer_direction != Iterate::Default) ? iteration_pattern::outer_direction - : default_outer_direction<typename traits::execution_space>::value); + : default_outer_direction<typename traits::execution_space>::value; - static constexpr int inner_direction = static_cast<int>( + static constexpr auto inner_direction = iteration_pattern::inner_direction != Iterate::Default ? iteration_pattern::inner_direction - : default_inner_direction<typename traits::execution_space>::value); + : default_inner_direction<typename traits::execution_space>::value; - // Ugly ugly workaround intel 14 not handling scoped enum correctly - static constexpr int Right = static_cast<int>(Iterate::Right); - static constexpr int Left = static_cast<int>(Iterate::Left); + static constexpr auto Right = Iterate::Right; + static constexpr auto Left = Iterate::Left; KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const { return m_space; @@ -320,7 +308,7 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits<Properties...> { point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{}) : m_space(work_space), m_lower(lower), m_upper(upper), m_tile(tile) { - init(); + init_helper(Impl::get_tile_size_properties(work_space)); } template <typename T, std::size_t NT = rank, @@ -354,93 +342,56 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits<Properties...> { m_tile(p.m_tile), m_tile_end(p.m_tile_end), m_num_tiles(p.m_num_tiles), - m_prod_tile_dims(p.m_prod_tile_dims) {} + m_prod_tile_dims(p.m_prod_tile_dims), + m_tune_tile_size(p.m_tune_tile_size) {} + + void impl_change_tile_size(const point_type& tile) { + m_tile = tile; + init_helper(Impl::get_tile_size_properties(m_space)); + } + bool impl_tune_tile_size() const { return m_tune_tile_size; } private: - void init() { - // Host - if (true -#if defined(KOKKOS_ENABLE_CUDA) - && !std::is_same<typename traits::execution_space, Kokkos::Cuda>::value -#endif -#if defined(KOKKOS_ENABLE_HIP) - && !std::is_same<typename traits::execution_space, - Kokkos::Experimental::HIP>::value -#endif - ) { - index_type span; - for (int i = 0; i < rank; ++i) { - span = m_upper[i] - m_lower[i]; - if (m_tile[i] <= 0) { - if (((int)inner_direction == (int)Right && (i < rank - 1)) || - ((int)inner_direction == (int)Left && (i > 0))) { - m_tile[i] = 2; - } else { - m_tile[i] = (span == 0 ? 1 : span); - } - } - m_tile_end[i] = - static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]); - m_num_tiles *= m_tile_end[i]; - m_prod_tile_dims *= m_tile[i]; - } + void init_helper(Impl::TileSizeProperties properties) { + m_prod_tile_dims = 1; + int increment = 1; + int rank_start = 0; + int rank_end = rank; + if (inner_direction == Iterate::Right) { + increment = -1; + rank_start = rank - 1; + rank_end = -1; } -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) - else // Cuda or HIP - { - index_type span; - int increment = 1; - int rank_start = 0; - int rank_end = rank; - if ((int)inner_direction == (int)Right) { - increment = -1; - rank_start = rank - 1; - rank_end = -1; - } - bool is_cuda_exec_space = -#if defined(KOKKOS_ENABLE_CUDA) - std::is_same<typename traits::execution_space, Kokkos::Cuda>::value; -#else - false; -#endif - for (int i = rank_start; i != rank_end; i += increment) { - span = m_upper[i] - m_lower[i]; - if (m_tile[i] <= 0) { - // TODO: determine what is a good default tile size for Cuda and HIP - // may be rank dependent - if (((int)inner_direction == (int)Right && (i < rank - 1)) || - ((int)inner_direction == (int)Left && (i > 0))) { - if (m_prod_tile_dims < 256) { - m_tile[i] = (is_cuda_exec_space) ? 2 : 4; - } else { - m_tile[i] = 1; - } + for (int i = rank_start; i != rank_end; i += increment) { + const index_type length = m_upper[i] - m_lower[i]; + if (m_tile[i] <= 0) { + m_tune_tile_size = true; + if ((inner_direction == Iterate::Right && (i < rank - 1)) || + (inner_direction == Iterate::Left && (i > 0))) { + if (m_prod_tile_dims * properties.default_tile_size < + static_cast<index_type>(properties.max_total_tile_size)) { + m_tile[i] = properties.default_tile_size; } else { - m_tile[i] = 16; + m_tile[i] = 1; } - } - m_tile_end[i] = - static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]); - m_num_tiles *= m_tile_end[i]; - m_prod_tile_dims *= m_tile[i]; - } - if (m_prod_tile_dims > - 1024) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 - // max per dim (Kepler), but product num_threads < 1024 - if (is_cuda_exec_space) { - printf(" Tile dimensions exceed Cuda limits\n"); - Kokkos::abort( - "Cuda ExecSpace Error: MDRange tile dims exceed maximum number " - "of threads per block - choose smaller tile dims"); } else { - printf(" Tile dimensions exceed HIP limits\n"); - Kokkos::abort( - "HIP ExecSpace Error: MDRange tile dims exceed maximum number of " - "threads per block - choose smaller tile dims"); + m_tile[i] = properties.default_largest_tile_size == 0 + ? std::max<int>(length, 1) + : properties.default_largest_tile_size; } } + m_tile_end[i] = + static_cast<index_type>((length + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; + m_prod_tile_dims *= m_tile[i]; + } + if (m_prod_tile_dims > static_cast<index_type>(properties.max_threads)) { + printf(" Product of tile dimensions exceed maximum limit: %d\n", + static_cast<int>(properties.max_threads)); + Kokkos::abort( + "ExecSpace Error: MDRange tile dims exceed maximum number " + "of threads per block - choose smaller tile dims"); } -#endif } }; diff --git a/packages/kokkos/core/src/Kokkos_AnonymousSpace.hpp b/packages/kokkos/core/src/Kokkos_AnonymousSpace.hpp index 8e226a078d1adfa275ad5a8c0263dfeeb41c4787..fb94049d7ad7ed588b00cc1f9351162de32f08e5 100644 --- a/packages/kokkos/core/src/Kokkos_AnonymousSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_AnonymousSpace.hpp @@ -104,20 +104,6 @@ struct MemorySpaceAccess<Kokkos::AnonymousSpace, Kokkos::AnonymousSpace> { enum : bool { deepcopy = true }; }; -template <typename OtherSpace> -struct VerifyExecutionCanAccessMemorySpace<OtherSpace, Kokkos::AnonymousSpace> { - enum { value = 1 }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void *) {} -}; - -template <typename OtherSpace> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::AnonymousSpace, OtherSpace> { - enum { value = 1 }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void *) {} -}; - } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_Complex.hpp b/packages/kokkos/core/src/Kokkos_Complex.hpp index fb2925a066f545bce8636ea76aabc5794f78f587..6578723fc8e5dab1e605b1a5dc80f1daf4b2ebfb 100644 --- a/packages/kokkos/core/src/Kokkos_Complex.hpp +++ b/packages/kokkos/core/src/Kokkos_Complex.hpp @@ -45,14 +45,13 @@ #define KOKKOS_COMPLEX_HPP #include <Kokkos_Atomic.hpp> +#include <Kokkos_MathematicalFunctions.hpp> #include <Kokkos_NumericTraits.hpp> +#include <impl/Kokkos_Error.hpp> #include <complex> +#include <type_traits> #include <iosfwd> -#ifdef KOKKOS_ENABLE_SYCL -#include <CL/sycl.hpp> -#endif - namespace Kokkos { /// \class complex @@ -220,10 +219,11 @@ class // Conditional noexcept, just in case RType throws on divide-by-zero KOKKOS_CONSTEXPR_14 KOKKOS_INLINE_FUNCTION complex& operator/=( const complex<RealType>& y) noexcept(noexcept(RealType{} / RealType{})) { + using Kokkos::Experimental::fabs; // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. - const RealType s = std::fabs(y.real()) + std::fabs(y.imag()); + const RealType s = fabs(y.real()) + fabs(y.imag()); // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, @@ -248,10 +248,11 @@ class KOKKOS_INLINE_FUNCTION complex& operator/=( const std::complex<RealType>& y) noexcept(noexcept(RealType{} / RealType{})) { + using Kokkos::Experimental::fabs; // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. - const RealType s = std::fabs(y.real()) + std::fabs(y.imag()); + const RealType s = fabs(y.real()) + fabs(y.imag()); // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, @@ -693,35 +694,96 @@ KOKKOS_INLINE_FUNCTION RealType real(const complex<RealType>& x) noexcept { return x.real(); } +//! Constructs a complex number from magnitude and phase angle +template <class T> +KOKKOS_INLINE_FUNCTION complex<T> polar(const T& r, const T& theta = T()) { + using Kokkos::Experimental::cos; + using Kokkos::Experimental::sin; + KOKKOS_EXPECTS(r >= 0); + return complex<T>(r * cos(theta), r * sin(theta)); +} + //! Absolute value (magnitude) of a complex number. template <class RealType> KOKKOS_INLINE_FUNCTION RealType abs(const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::hypot; -#else - using std::hypot; -#endif + using Kokkos::Experimental::hypot; return hypot(x.real(), x.imag()); } //! Power of a complex number -template <class RealType> -KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> pow(const complex<RealType>& x, - const RealType& e) { - RealType r = abs(x); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::atan; - using cl::sycl::cos; - using cl::sycl::pow; - using cl::sycl::sin; -#else - using std::atan; - using std::cos; - using std::pow; - using std::sin; -#endif - RealType phi = atan(x.imag() / x.real()); - return pow(r, e) * Kokkos::complex<RealType>(cos(phi * e), sin(phi * e)); +template <class T> +KOKKOS_INLINE_FUNCTION complex<T> pow(const complex<T>& x, const T& y) { + using Kokkos::Experimental::atan2; + using Kokkos::Experimental::pow; + T r = abs(x); + T theta = atan2(x.imag(), x.real()); + return polar(pow(r, y), y * theta); +} + +template <class T> +KOKKOS_INLINE_FUNCTION complex<T> pow(const T& x, const complex<T>& y) { + return pow(complex<T>(x), y); +} + +template <class T> +KOKKOS_INLINE_FUNCTION complex<T> pow(const complex<T>& x, + const complex<T>& y) { + using Kokkos::Experimental::log; + + return x == T() ? T() : exp(y * log(x)); +} + +namespace Impl { +// NOTE promote would also be useful for math functions +template <class T, bool = std::is_integral<T>::value> +struct promote { + using type = double; +}; +template <class T> +struct promote<T, false> {}; +template <> +struct promote<long double> { + using type = long double; +}; +template <> +struct promote<double> { + using type = double; +}; +template <> +struct promote<float> { + using type = float; +}; +template <class T> +using promote_t = typename promote<T>::type; +template <class T, class U> +struct promote_2 { + using type = decltype(promote_t<T>() + promote_t<U>()); +}; +template <class T, class U> +using promote_2_t = typename promote_2<T, U>::type; +} // namespace Impl + +template <class T, class U, + class = std::enable_if_t<std::is_arithmetic<T>::value>> +KOKKOS_INLINE_FUNCTION complex<Impl::promote_2_t<T, U>> pow( + const T& x, const complex<U>& y) { + using type = Impl::promote_2_t<T, U>; + return pow(type(x), complex<type>(y)); +} + +template <class T, class U, + class = std::enable_if_t<std::is_arithmetic<U>::value>> +KOKKOS_INLINE_FUNCTION complex<Impl::promote_2_t<T, U>> pow(const complex<T>& x, + const U& y) { + using type = Impl::promote_2_t<T, U>; + return pow(complex<type>(x), type(y)); +} + +template <class T, class U> +KOKKOS_INLINE_FUNCTION complex<Impl::promote_2_t<T, U>> pow( + const complex<T>& x, const complex<U>& y) { + using type = Impl::promote_2_t<T, U>; + return pow(complex<type>(x), complex<type>(y)); } //! Square root of a complex number. This is intended to match the stdc++ @@ -729,26 +791,21 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> pow(const complex<RealType>& x, template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sqrt( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::abs; - using cl::sycl::sqrt; -#else - using std::abs; - using std::sqrt; -#endif + using Kokkos::Experimental::fabs; + using Kokkos::Experimental::sqrt; RealType r = x.real(); RealType i = x.imag(); if (r == RealType()) { - RealType t = sqrt(abs(i) / 2); + RealType t = sqrt(fabs(i) / 2); return Kokkos::complex<RealType>(t, i < RealType() ? -t : t); } else { - RealType t = sqrt(2 * (abs(x) + abs(r))); + RealType t = sqrt(2 * (abs(x) + fabs(r))); RealType u = t / 2; - return r > RealType() - ? Kokkos::complex<RealType>(u, i / t) - : Kokkos::complex<RealType>(abs(i) / t, i < RealType() ? -u : u); + return r > RealType() ? Kokkos::complex<RealType>(u, i / t) + : Kokkos::complex<RealType>(fabs(i) / t, + i < RealType() ? -u : u); } } @@ -762,15 +819,9 @@ KOKKOS_INLINE_FUNCTION complex<RealType> conj( //! Exponential of a complex number. template <class RealType> KOKKOS_INLINE_FUNCTION complex<RealType> exp(const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::cos; - using cl::sycl::exp; - using cl::sycl::sin; -#else - using std::cos; - using std::exp; - using std::sin; -#endif + using Kokkos::Experimental::cos; + using Kokkos::Experimental::exp; + using Kokkos::Experimental::sin; return exp(x.real()) * complex<RealType>(cos(x.imag()), sin(x.imag())); } @@ -778,14 +829,9 @@ KOKKOS_INLINE_FUNCTION complex<RealType> exp(const complex<RealType>& x) { template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> log( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::atan; - using cl::sycl::log; -#else - using std::atan; - using std::log; -#endif - RealType phi = atan(x.imag() / x.real()); + using Kokkos::Experimental::atan2; + using Kokkos::Experimental::log; + RealType phi = atan2(x.imag(), x.real()); return Kokkos::complex<RealType>(log(abs(x)), phi); } @@ -793,17 +839,10 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> log( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sin( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::cos; - using cl::sycl::cosh; - using cl::sycl::sin; - using cl::sycl::sinh; -#else - using std::cos; - using std::cosh; - using std::sin; - using std::sinh; -#endif + using Kokkos::Experimental::cos; + using Kokkos::Experimental::cosh; + using Kokkos::Experimental::sin; + using Kokkos::Experimental::sinh; return Kokkos::complex<RealType>(sin(x.real()) * cosh(x.imag()), cos(x.real()) * sinh(x.imag())); } @@ -812,17 +851,10 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sin( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> cos( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::cos; - using cl::sycl::cosh; - using cl::sycl::sin; - using cl::sycl::sinh; -#else - using std::cos; - using std::cosh; - using std::sin; - using std::sinh; -#endif + using Kokkos::Experimental::cos; + using Kokkos::Experimental::cosh; + using Kokkos::Experimental::sin; + using Kokkos::Experimental::sinh; return Kokkos::complex<RealType>(cos(x.real()) * cosh(x.imag()), -sin(x.real()) * sinh(x.imag())); } @@ -838,17 +870,10 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> tan( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sinh( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::cos; - using cl::sycl::cosh; - using cl::sycl::sin; - using cl::sycl::sinh; -#else - using std::cos; - using std::cosh; - using std::sin; - using std::sinh; -#endif + using Kokkos::Experimental::cos; + using Kokkos::Experimental::cosh; + using Kokkos::Experimental::sin; + using Kokkos::Experimental::sinh; return Kokkos::complex<RealType>(sinh(x.real()) * cos(x.imag()), cosh(x.real()) * sin(x.imag())); } @@ -857,17 +882,10 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> sinh( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> cosh( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::cos; - using cl::sycl::cosh; - using cl::sycl::sin; - using cl::sycl::sinh; -#else - using std::cos; - using std::cosh; - using std::sin; - using std::sinh; -#endif + using Kokkos::Experimental::cos; + using Kokkos::Experimental::cosh; + using Kokkos::Experimental::sin; + using Kokkos::Experimental::sinh; return Kokkos::complex<RealType>(cosh(x.real()) * cos(x.imag()), sinh(x.real()) * sin(x.imag())); } @@ -898,13 +916,8 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> acosh( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> atanh( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::atan2; - using cl::sycl::log; -#else - using std::atan2; - using std::log; -#endif + using Kokkos::Experimental::atan2; + using Kokkos::Experimental::log; const RealType i2 = x.imag() * x.imag(); const RealType r = RealType(1.0) - i2 - x.real() * x.real(); @@ -933,12 +946,7 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> asin( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> acos( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::acos; - -#else - using std::acos; -#endif + using Kokkos::Experimental::acos; Kokkos::complex<RealType> t = asin(x); RealType pi_2 = acos(RealType(0.0)); return Kokkos::complex<RealType>(pi_2 - t.real(), -t.imag()); @@ -948,13 +956,8 @@ KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> acos( template <class RealType> KOKKOS_INLINE_FUNCTION Kokkos::complex<RealType> atan( const complex<RealType>& x) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL - using cl::sycl::atan2; - using cl::sycl::log; -#else - using std::atan2; - using std::log; -#endif + using Kokkos::Experimental::atan2; + using Kokkos::Experimental::log; const RealType r2 = x.real() * x.real(); const RealType i = RealType(1.0) - r2 - x.imag() * x.imag(); @@ -996,12 +999,13 @@ KOKKOS_INLINE_FUNCTION operator/(const complex<RealType1>& x, const complex<RealType2>& y) noexcept(noexcept(RealType1{} / RealType2{})) { + using Kokkos::Experimental::fabs; // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. using common_real_type = typename std::common_type<RealType1, RealType2>::type; - const common_real_type s = std::fabs(real(y)) + std::fabs(imag(y)); + const common_real_type s = fabs(real(y)) + fabs(imag(y)); // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, @@ -1046,7 +1050,7 @@ std::istream& operator>>(std::istream& is, complex<RealType>& x) { } template <class T> -struct reduction_identity<Kokkos::complex<T> > { +struct reduction_identity<Kokkos::complex<T>> { using t_red_ident = reduction_identity<T>; KOKKOS_FORCEINLINE_FUNCTION constexpr static Kokkos::complex<T> sum() noexcept { diff --git a/packages/kokkos/core/src/Kokkos_Core.hpp b/packages/kokkos/core/src/Kokkos_Core.hpp index 4dac463a667169472c94e5b2076b1c224019a379..c3771ab393f3aaf8f77cb474056d90e867ff03da 100644 --- a/packages/kokkos/core/src/Kokkos_Core.hpp +++ b/packages/kokkos/core/src/Kokkos_Core.hpp @@ -58,6 +58,7 @@ #include <Kokkos_AnonymousSpace.hpp> #include <Kokkos_LogicalSpaces.hpp> #include <Kokkos_Pair.hpp> +#include <Kokkos_MathematicalFunctions.hpp> #include <Kokkos_MemoryPool.hpp> #include <Kokkos_Array.hpp> #include <Kokkos_View.hpp> @@ -86,6 +87,10 @@ struct InitArguments { int skip_device; bool disable_warnings; bool tune_internals; + bool tool_help = false; + std::string tool_lib = {}; + std::string tool_args = {}; + InitArguments(int nt = -1, int nn = -1, int dv = -1, bool dw = false, bool ti = false) : num_threads{nt}, @@ -139,6 +144,10 @@ void pre_initialize(const InitArguments& args); void post_initialize(const InitArguments& args); +void declare_configuration_metadata(const std::string& category, + const std::string& key, + const std::string& value); + } // namespace Impl bool is_initialized() noexcept; diff --git a/packages/kokkos/core/src/Kokkos_Core_fwd.hpp b/packages/kokkos/core/src/Kokkos_Core_fwd.hpp index 7502719c73d004e8f4bf79ac214209c210645c7b..fe7eba3f6ef178848d2ea832341014d6dc5d1003 100644 --- a/packages/kokkos/core/src/Kokkos_Core_fwd.hpp +++ b/packages/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -50,6 +50,7 @@ // and compiler environment then sets a collection of #define macros. #include <Kokkos_Macros.hpp> +#include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_Utilities.hpp> #include <Kokkos_MasterLock.hpp> @@ -180,7 +181,6 @@ using DefaultHostExecutionSpace KOKKOS_IMPL_DEFAULT_HOST_EXEC_SPACE_ANNOTATION = // a given memory space. namespace Kokkos { - namespace Impl { #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) && \ @@ -196,16 +196,22 @@ using ActiveExecutionMemorySpace = Kokkos::HostSpace; using ActiveExecutionMemorySpace = void; #endif -template <class ActiveSpace, class MemorySpace> -struct VerifyExecutionCanAccessMemorySpace { - enum { value = 0 }; +template <typename DstMemorySpace, typename SrcMemorySpace> +struct MemorySpaceAccess; + +template <typename DstMemorySpace, typename SrcMemorySpace, + bool = Kokkos::Impl::MemorySpaceAccess<DstMemorySpace, + SrcMemorySpace>::accessible> +struct verify_space { + KOKKOS_FUNCTION static void check() {} }; -template <class Space> -struct VerifyExecutionCanAccessMemorySpace<Space, Space> { - enum { value = 1 }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void *) {} +template <typename DstMemorySpace, typename SrcMemorySpace> +struct verify_space<DstMemorySpace, SrcMemorySpace, false> { + KOKKOS_FUNCTION static void check() { + Kokkos::abort( + "Kokkos::View ERROR: attempt to access inaccessible memory space"); + }; }; // Base class for exec space initializer factories @@ -220,13 +226,13 @@ class LogicalMemorySpace; } // namespace Kokkos -#define KOKKOS_RESTRICT_EXECUTION_TO_DATA(DATA_SPACE, DATA_PTR) \ - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \ - Kokkos::Impl::ActiveExecutionMemorySpace, DATA_SPACE>::verify(DATA_PTR) +#define KOKKOS_RESTRICT_EXECUTION_TO_DATA(DATA_SPACE, DATA_PTR) \ + Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \ + DATA_SPACE>::check(); -#define KOKKOS_RESTRICT_EXECUTION_TO_(DATA_SPACE) \ - Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \ - Kokkos::Impl::ActiveExecutionMemorySpace, DATA_SPACE>::verify() +#define KOKKOS_RESTRICT_EXECUTION_TO_(DATA_SPACE) \ + Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \ + DATA_SPACE>::check(); //---------------------------------------------------------------------------- @@ -256,8 +262,7 @@ template <class ViewTypeA, class ViewTypeB, class Layout, class ExecSpace, int Rank, typename iType> struct ViewCopy; -template <class Functor, class Policy, class EnableFunctor = void, - class EnablePolicy = void> +template <class Functor, class Policy> struct FunctorPolicyExecutionSpace; //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/Kokkos_Crs.hpp b/packages/kokkos/core/src/Kokkos_Crs.hpp index 4a573d82c044b532a58274b52d539f495d4f0ba6..1a10500b19a55f4f963807dd2cf1a28e6062f98c 100644 --- a/packages/kokkos/core/src/Kokkos_Crs.hpp +++ b/packages/kokkos/core/src/Kokkos_Crs.hpp @@ -199,7 +199,7 @@ class CrsRowMapFromCounts { public: KOKKOS_INLINE_FUNCTION void operator()(index_type i, value_type& update, bool final_pass) const { - if (i < m_in.size()) { + if (i < static_cast<index_type>(m_in.size())) { update += m_in(i); if (final_pass) m_out(i + 1) = update; } else if (final_pass) { diff --git a/packages/kokkos/core/src/Kokkos_Cuda.hpp b/packages/kokkos/core/src/Kokkos_Cuda.hpp index 81e11f3f1285f85d2424b9e98930e3b6cb051162..7a218120bb7bb3b053335946ae25ad58c8a85e6d 100644 --- a/packages/kokkos/core/src/Kokkos_Cuda.hpp +++ b/packages/kokkos/core/src/Kokkos_Cuda.hpp @@ -63,6 +63,7 @@ #include <Kokkos_MemoryTraits.hpp> #include <impl/Kokkos_Tags.hpp> #include <impl/Kokkos_ExecSpaceInitializer.hpp> +#include <impl/Kokkos_HostSharedPtr.hpp> /*--------------------------------------------------------------------------*/ @@ -198,16 +199,6 @@ class Cuda { Cuda(); - KOKKOS_FUNCTION Cuda(Cuda&& other) noexcept; - - KOKKOS_FUNCTION Cuda(const Cuda& other); - - KOKKOS_FUNCTION Cuda& operator=(Cuda&& other) noexcept; - - KOKKOS_FUNCTION Cuda& operator=(const Cuda& other); - - KOKKOS_FUNCTION ~Cuda() noexcept; - Cuda(cudaStream_t stream); //-------------------------------------------------------------------------- @@ -253,13 +244,12 @@ class Cuda { static const char* name(); inline Impl::CudaInternal* impl_internal_space_instance() const { - return m_space_instance; + return m_space_instance.get(); } uint32_t impl_instance_id() const noexcept { return 0; } private: - Impl::CudaInternal* m_space_instance; - int* m_counter; + Kokkos::Impl::HostSharedPtr<Impl::CudaInternal> m_space_instance; }; namespace Tools { @@ -319,38 +309,8 @@ struct MemorySpaceAccess<Kokkos::CudaUVMSpace, #endif -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace, - Kokkos::Cuda::scratch_memory_space> { - enum : bool { value = true }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void*) {} -}; - -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace, - Kokkos::Cuda::scratch_memory_space> { - enum : bool { value = false }; - inline static void verify(void) { CudaSpace::access_error(); } - inline static void verify(const void* p) { CudaSpace::access_error(p); } -}; - } // namespace Impl } // namespace Kokkos -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -#include <Cuda/Kokkos_Cuda_KernelLaunch.hpp> -#include <Cuda/Kokkos_Cuda_Instance.hpp> -#include <Cuda/Kokkos_Cuda_View.hpp> -#include <Cuda/Kokkos_Cuda_Team.hpp> -#include <Cuda/Kokkos_Cuda_Parallel.hpp> -#include <Cuda/Kokkos_Cuda_Task.hpp> -#include <Cuda/Kokkos_Cuda_UniqueToken.hpp> - -#include <KokkosExp_MDRangePolicy.hpp> -//---------------------------------------------------------------------------- - #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ #endif /* #ifndef KOKKOS_CUDA_HPP */ diff --git a/packages/kokkos/core/src/Kokkos_CudaSpace.hpp b/packages/kokkos/core/src/Kokkos_CudaSpace.hpp index fc1c0e2f8a1047cae0c57b31b2ea61d6bec92def..e10fae93c7ca01ce90f31b5d22ca9bff7d113884 100644 --- a/packages/kokkos/core/src/Kokkos_CudaSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_CudaSpace.hpp @@ -53,8 +53,10 @@ #include <iosfwd> #include <typeinfo> #include <string> +#include <memory> #include <Kokkos_HostSpace.hpp> +#include <impl/Kokkos_SharedAlloc.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> @@ -119,8 +121,8 @@ class CudaSpace { /*--------------------------------*/ /** \brief Error reporting for HostSpace attempt to access CudaSpace */ - static void access_error(); - static void access_error(const void* const); + KOKKOS_DEPRECATED static void access_error(); + KOKKOS_DEPRECATED static void access_error(const void* const); private: int m_device; ///< Which Cuda device @@ -128,42 +130,6 @@ class CudaSpace { static constexpr const char* m_name = "Cuda"; friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>; }; - -namespace Impl { -/// \brief Initialize lock array for arbitrary size atomics. -/// -/// Arbitrary atomics are implemented using a hash table of locks -/// where the hash value is derived from the address of the -/// object for which an atomic operation is performed. -/// This function initializes the locks to zero (unset). -void init_lock_arrays_cuda_space(); - -/// \brief Retrieve the pointer to the lock array for arbitrary size atomics. -/// -/// Arbitrary atomics are implemented using a hash table of locks -/// where the hash value is derived from the address of the -/// object for which an atomic operation is performed. -/// This function retrieves the lock array pointer. -/// If the array is not yet allocated it will do so. -int* atomic_lock_array_cuda_space_ptr(bool deallocate = false); - -/// \brief Retrieve the pointer to the scratch array for team and thread private -/// global memory. -/// -/// Team and Thread private scratch allocations in -/// global memory are acquired via locks. -/// This function retrieves the lock array pointer. -/// If the array is not yet allocated it will do so. -int* scratch_lock_array_cuda_space_ptr(bool deallocate = false); - -/// \brief Retrieve the pointer to the scratch array for unique identifiers. -/// -/// Unique identifiers in the range 0-Cuda::concurrency -/// are provided via locks. -/// This function retrieves the lock array pointer. -/// If the array is not yet allocated it will do so. -int* threadid_lock_array_cuda_space_ptr(bool deallocate = false); -} // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ @@ -313,6 +279,11 @@ class CudaHostPinnedSpace { namespace Kokkos { namespace Impl { +cudaStream_t cuda_get_deep_copy_stream(); + +const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space( + bool initialize = true); + static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaSpace>::assignable, ""); @@ -784,104 +755,21 @@ struct DeepCopy<HostSpace, CudaHostPinnedSpace, ExecutionSpace> { namespace Kokkos { namespace Impl { -/** Running in CudaSpace attempting to access HostSpace: error */ -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace, - Kokkos::HostSpace> { - enum : bool { value = false }; - KOKKOS_INLINE_FUNCTION static void verify(void) { - Kokkos::abort("Cuda code attempted to access HostSpace memory"); - } - - KOKKOS_INLINE_FUNCTION static void verify(const void*) { - Kokkos::abort("Cuda code attempted to access HostSpace memory"); - } -}; - -/** Running in CudaSpace accessing CudaUVMSpace: ok */ -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace, - Kokkos::CudaUVMSpace> { - enum : bool { value = true }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void*) {} -}; - -/** Running in CudaSpace accessing CudaHostPinnedSpace: ok */ -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::CudaSpace, - Kokkos::CudaHostPinnedSpace> { - enum : bool { value = true }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void*) {} -}; - -/** Running in CudaSpace attempting to access an unknown space: error */ -template <class OtherSpace> -struct VerifyExecutionCanAccessMemorySpace< - typename std::enable_if<!std::is_same<Kokkos::CudaSpace, OtherSpace>::value, - Kokkos::CudaSpace>::type, - OtherSpace> { - enum : bool { value = false }; - KOKKOS_INLINE_FUNCTION static void verify(void) { - Kokkos::abort("Cuda code attempted to access unknown Space memory"); - } - - KOKKOS_INLINE_FUNCTION static void verify(const void*) { - Kokkos::abort("Cuda code attempted to access unknown Space memory"); - } -}; - -//---------------------------------------------------------------------------- -/** Running in HostSpace attempting to access CudaSpace */ -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace, - Kokkos::CudaSpace> { - enum : bool { value = false }; - inline static void verify(void) { CudaSpace::access_error(); } - inline static void verify(const void* p) { CudaSpace::access_error(p); } -}; - -/** Running in HostSpace accessing CudaUVMSpace is OK */ -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace, - Kokkos::CudaUVMSpace> { - enum : bool { value = true }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - -/** Running in HostSpace accessing CudaHostPinnedSpace is OK */ -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace, - Kokkos::CudaHostPinnedSpace> { - enum : bool { value = true }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void*) {} -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - template <> class SharedAllocationRecord<Kokkos::CudaSpace, void> - : public SharedAllocationRecord<void, void> { + : public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> { private: friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>; + friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>; + friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>; using RecordBase = SharedAllocationRecord<void, void>; + using base_t = + HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>; SharedAllocationRecord(const SharedAllocationRecord&) = delete; SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - static void deallocate(RecordBase*); - static ::cudaTextureObject_t attach_texture_object( const unsigned sizeof_alias, void* const alloc_ptr, const size_t alloc_size); @@ -890,39 +778,19 @@ class SharedAllocationRecord<Kokkos::CudaSpace, void> static RecordBase s_root_record; #endif - ::cudaTextureObject_t m_tex_obj; + ::cudaTextureObject_t m_tex_obj = 0; const Kokkos::CudaSpace m_space; protected: ~SharedAllocationRecord(); - SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {} + SharedAllocationRecord() = default; SharedAllocationRecord( const Kokkos::CudaSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, - const RecordBase::function_type arg_dealloc = &deallocate); + const RecordBase::function_type arg_dealloc = &base_t::deallocate); public: - std::string get_label() const; - - static SharedAllocationRecord* allocate(const Kokkos::CudaSpace& arg_space, - const std::string& arg_label, - const size_t arg_alloc_size); - - /**\brief Allocate tracked memory in the space */ - static void* allocate_tracked(const Kokkos::CudaSpace& arg_space, - const std::string& arg_label, - const size_t arg_alloc_size); - - /**\brief Reallocate tracked memory in the space */ - static void* reallocate_tracked(void* const arg_alloc_ptr, - const size_t arg_alloc_size); - - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void* const arg_alloc_ptr); - - static SharedAllocationRecord* get_record(void* arg_alloc_ptr); - template <typename AliasType> inline ::cudaTextureObject_t attach_texture_object() { static_assert((std::is_same<AliasType, int>::value || @@ -945,57 +813,35 @@ class SharedAllocationRecord<Kokkos::CudaSpace, void> // Texture object is attached to the entire allocation range return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr); } - - static void print_records(std::ostream&, const Kokkos::CudaSpace&, - bool detail = false); }; template <> class SharedAllocationRecord<Kokkos::CudaUVMSpace, void> - : public SharedAllocationRecord<void, void> { + : public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> { private: + friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>; + + using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>; using RecordBase = SharedAllocationRecord<void, void>; SharedAllocationRecord(const SharedAllocationRecord&) = delete; SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - static void deallocate(RecordBase*); - static RecordBase s_root_record; - ::cudaTextureObject_t m_tex_obj; + ::cudaTextureObject_t m_tex_obj = 0; const Kokkos::CudaUVMSpace m_space; protected: ~SharedAllocationRecord(); - SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {} + SharedAllocationRecord() = default; SharedAllocationRecord( const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, - const RecordBase::function_type arg_dealloc = &deallocate); + const RecordBase::function_type arg_dealloc = &base_t::deallocate); public: - std::string get_label() const; - - static SharedAllocationRecord* allocate(const Kokkos::CudaUVMSpace& arg_space, - const std::string& arg_label, - const size_t arg_alloc_size); - - /**\brief Allocate tracked memory in the space */ - static void* allocate_tracked(const Kokkos::CudaUVMSpace& arg_space, - const std::string& arg_label, - const size_t arg_alloc_size); - - /**\brief Reallocate tracked memory in the space */ - static void* reallocate_tracked(void* const arg_alloc_ptr, - const size_t arg_alloc_size); - - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void* const arg_alloc_ptr); - - static SharedAllocationRecord* get_record(void* arg_alloc_ptr); - template <typename AliasType> inline ::cudaTextureObject_t attach_texture_object() { static_assert((std::is_same<AliasType, int>::value || @@ -1019,57 +865,32 @@ class SharedAllocationRecord<Kokkos::CudaUVMSpace, void> // Texture object is attached to the entire allocation range return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr); } - - static void print_records(std::ostream&, const Kokkos::CudaUVMSpace&, - bool detail = false); }; template <> class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void> - : public SharedAllocationRecord<void, void> { + : public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> { private: + friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>; + using RecordBase = SharedAllocationRecord<void, void>; + using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>; SharedAllocationRecord(const SharedAllocationRecord&) = delete; SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - static void deallocate(RecordBase*); - static RecordBase s_root_record; const Kokkos::CudaHostPinnedSpace m_space; protected: ~SharedAllocationRecord(); - SharedAllocationRecord() : RecordBase(), m_space() {} + SharedAllocationRecord() = default; SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, const RecordBase::function_type arg_dealloc = &deallocate); - - public: - std::string get_label() const; - - static SharedAllocationRecord* allocate( - const Kokkos::CudaHostPinnedSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size); - /**\brief Allocate tracked memory in the space */ - static void* allocate_tracked(const Kokkos::CudaHostPinnedSpace& arg_space, - const std::string& arg_label, - const size_t arg_alloc_size); - - /**\brief Reallocate tracked memory in the space */ - static void* reallocate_tracked(void* const arg_alloc_ptr, - const size_t arg_alloc_size); - - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void* const arg_alloc_ptr); - - static SharedAllocationRecord* get_record(void* arg_alloc_ptr); - - static void print_records(std::ostream&, const Kokkos::CudaHostPinnedSpace&, - bool detail = false); }; } // namespace Impl diff --git a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp index 3afe0817013445d38cbcb12f65a76471d9cadb04..55aed13670e69838d94fff2735d421cc49a11835 100644 --- a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -856,11 +856,12 @@ KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType> ThreadVectorRange(const TeamMemberType&, const iType& count) = delete; -template <typename iType, class TeamMemberType, class _never_use_this_overload> -KOKKOS_INLINE_FUNCTION_DELETED - Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType> - ThreadVectorRange(const TeamMemberType&, const iType& arg_begin, - const iType& arg_end) = delete; +template <typename iType1, typename iType2, class TeamMemberType, + class _never_use_this_overload> +KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, TeamMemberType> +ThreadVectorRange(const TeamMemberType&, const iType1& arg_begin, + const iType2& arg_end) = delete; namespace Impl { @@ -902,85 +903,6 @@ struct ParallelConstructName<FunctorType, TagType, false> { } // namespace Kokkos namespace Kokkos { -namespace Experimental { - -namespace Impl { -template <class Property, class Policy> -struct PolicyPropertyAdaptor; - -template <unsigned long P, template <class...> class Policy, - class... Properties> -struct PolicyPropertyAdaptor<WorkItemProperty::ImplWorkItemProperty<P>, - Policy<Properties...>> { - using policy_in_t = Policy<Properties...>; - static_assert(is_execution_policy<policy_in_t>::value, ""); - using policy_out_t = Policy<typename policy_in_t::traits::execution_space, - typename policy_in_t::traits::schedule_type, - typename policy_in_t::traits::work_tag, - typename policy_in_t::traits::index_type, - typename policy_in_t::traits::iteration_pattern, - typename policy_in_t::traits::launch_bounds, - WorkItemProperty::ImplWorkItemProperty<P>, - typename policy_in_t::traits::occupancy_control>; -}; - -template <template <class...> class Policy, class... Properties> -struct PolicyPropertyAdaptor<DesiredOccupancy, Policy<Properties...>> { - using policy_in_t = Policy<Properties...>; - static_assert(is_execution_policy<policy_in_t>::value, ""); - using policy_out_t = Policy<typename policy_in_t::traits::execution_space, - typename policy_in_t::traits::schedule_type, - typename policy_in_t::traits::work_tag, - typename policy_in_t::traits::index_type, - typename policy_in_t::traits::iteration_pattern, - typename policy_in_t::traits::launch_bounds, - typename policy_in_t::traits::work_item_property, - DesiredOccupancy>; - static_assert(policy_out_t::experimental_contains_desired_occupancy, ""); -}; - -template <template <class...> class Policy, class... Properties> -struct PolicyPropertyAdaptor<MaximizeOccupancy, Policy<Properties...>> { - using policy_in_t = Policy<Properties...>; - static_assert(is_execution_policy<policy_in_t>::value, ""); - using policy_out_t = Policy<typename policy_in_t::traits::execution_space, - typename policy_in_t::traits::schedule_type, - typename policy_in_t::traits::work_tag, - typename policy_in_t::traits::index_type, - typename policy_in_t::traits::iteration_pattern, - typename policy_in_t::traits::launch_bounds, - typename policy_in_t::traits::work_item_property, - MaximizeOccupancy>; - static_assert(!policy_out_t::experimental_contains_desired_occupancy, ""); -}; -} // namespace Impl - -template <class PolicyType, unsigned long P> -constexpr typename Impl::PolicyPropertyAdaptor< - WorkItemProperty::ImplWorkItemProperty<P>, PolicyType>::policy_out_t -require(const PolicyType p, WorkItemProperty::ImplWorkItemProperty<P>) { - return typename Impl::PolicyPropertyAdaptor< - WorkItemProperty::ImplWorkItemProperty<P>, PolicyType>::policy_out_t(p); -} - -template <typename Policy> -/*constexpr*/ typename Impl::PolicyPropertyAdaptor<DesiredOccupancy, - Policy>::policy_out_t -prefer(Policy const& p, DesiredOccupancy occ) { - typename Impl::PolicyPropertyAdaptor<DesiredOccupancy, Policy>::policy_out_t - pwo{p}; - pwo.impl_set_desired_occupancy(occ); - return pwo; -} - -template <typename Policy> -constexpr typename Impl::PolicyPropertyAdaptor<MaximizeOccupancy, - Policy>::policy_out_t -prefer(Policy const& p, MaximizeOccupancy) { - return {p}; -} - -} // namespace Experimental namespace Impl { diff --git a/packages/kokkos/core/src/Kokkos_HBWSpace.hpp b/packages/kokkos/core/src/Kokkos_HBWSpace.hpp index 80a8f3ad368645f1f58aa27b129e6684088f8798..d0366b599cf8c80c92812e386ced90f6fa77eb93 100644 --- a/packages/kokkos/core/src/Kokkos_HBWSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -316,29 +316,5 @@ struct DeepCopy<Kokkos::Experimental::HBWSpace, HostSpace, ExecutionSpace> { } // namespace Kokkos -namespace Kokkos { - -namespace Impl { - -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace, - Kokkos::Experimental::HBWSpace> { - enum : bool { value = true }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::Experimental::HBWSpace, - Kokkos::HostSpace> { - enum : bool { value = true }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - -} // namespace Impl - -} // namespace Kokkos - #endif #endif // #define KOKKOS_HBWSPACE_HPP diff --git a/packages/kokkos/core/src/Kokkos_HIP.hpp b/packages/kokkos/core/src/Kokkos_HIP.hpp index 7afda3b43e0ddba6e8ef8776a244a0e03deba492..33cf8321c80282d5346c66afb5ee9b4be589576b 100644 --- a/packages/kokkos/core/src/Kokkos_HIP.hpp +++ b/packages/kokkos/core/src/Kokkos_HIP.hpp @@ -57,6 +57,7 @@ #include <impl/Kokkos_Tags.hpp> #include <HIP/Kokkos_HIP_Instance.hpp> +#include <HIP/Kokkos_HIP_MDRangePolicy.hpp> #include <HIP/Kokkos_HIP_Parallel_Range.hpp> #include <HIP/Kokkos_HIP_Parallel_MDRange.hpp> #include <HIP/Kokkos_HIP_Parallel_Team.hpp> diff --git a/packages/kokkos/core/src/Kokkos_HIP_Space.hpp b/packages/kokkos/core/src/Kokkos_HIP_Space.hpp index 5d045aa27be69a3d8ca3518ffe0ea58fe11451b1..17bd681aa4b7b7aa8d98bb8253c86db81de6ce05 100644 --- a/packages/kokkos/core/src/Kokkos_HIP_Space.hpp +++ b/packages/kokkos/core/src/Kokkos_HIP_Space.hpp @@ -61,6 +61,7 @@ #include <impl/Kokkos_Profiling_Interface.hpp> #include <impl/Kokkos_ExecSpaceInitializer.hpp> +#include <impl/Kokkos_HostSharedPtr.hpp> #include <hip/hip_runtime_api.h> /*--------------------------------------------------------------------------*/ @@ -117,8 +118,8 @@ class HIPSpace { /*--------------------------------*/ /** \brief Error reporting for HostSpace attempt to access HIPSpace */ - static void access_error(); - static void access_error(const void* const); + KOKKOS_DEPRECATED static void access_error(); + KOKKOS_DEPRECATED static void access_error(const void* const); private: int m_device; ///< Which HIP device @@ -128,43 +129,6 @@ class HIPSpace { }; } // namespace Experimental - -namespace Impl { - -/// \brief Initialize lock array for arbitrary size atomics. -/// -/// Arbitrary atomics are implemented using a hash table of locks -/// where the hash value is derived from the address of the -/// object for which an atomic operation is performed. -/// This function initializes the locks to zero (unset). -void init_lock_arrays_hip_space(); - -/// \brief Retrieve the pointer to the lock array for arbitrary size atomics. -/// -/// Arbitrary atomics are implemented using a hash table of locks -/// where the hash value is derived from the address of the -/// object for which an atomic operation is performed. -/// This function retrieves the lock array pointer. -/// If the array is not yet allocated it will do so. -int* atomic_lock_array_hip_space_ptr(bool deallocate = false); - -/// \brief Retrieve the pointer to the scratch array for team and thread private -/// global memory. -/// -/// Team and Thread private scratch allocations in -/// global memory are acquired via locks. -/// This function retrieves the lock array pointer. -/// If the array is not yet allocated it will do so. -int* scratch_lock_array_hip_space_ptr(bool deallocate = false); - -/// \brief Retrieve the pointer to the scratch array for unique identifiers. -/// -/// Unique identifiers in the range 0-HIP::concurrency -/// are provided via locks. -/// This function retrieves the lock array pointer. -/// If the array is not yet allocated it will do so. -int* threadid_lock_array_hip_space_ptr(bool deallocate = false); -} // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ @@ -483,88 +447,21 @@ struct DeepCopy<HostSpace, Kokkos::Experimental::HIPHostPinnedSpace, namespace Kokkos { namespace Impl { -/** Running in HIPSpace attempting to access HostSpace: error */ -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::Experimental::HIPSpace, - Kokkos::HostSpace> { - enum : bool { value = false }; - KOKKOS_INLINE_FUNCTION static void verify(void) { - Kokkos::abort("HIP code attempted to access HostSpace memory"); - } - - KOKKOS_INLINE_FUNCTION static void verify(const void*) { - Kokkos::abort("HIP code attempted to access HostSpace memory"); - } -}; - -/** Running in HIPSpace accessing HIPHostPinnedSpace: ok */ -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::Experimental::HIPSpace, Kokkos::Experimental::HIPHostPinnedSpace> { - enum : bool { value = true }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void*) {} -}; - -/** Running in HIPSpace attempting to access an unknown space: error */ -template <class OtherSpace> -struct VerifyExecutionCanAccessMemorySpace< - typename std::enable_if< - !std::is_same<Kokkos::Experimental::HIPSpace, OtherSpace>::value, - Kokkos::Experimental::HIPSpace>::type, - OtherSpace> { - enum : bool { value = false }; - KOKKOS_INLINE_FUNCTION static void verify(void) { - Kokkos::abort("HIP code attempted to access unknown Space memory"); - } - - KOKKOS_INLINE_FUNCTION static void verify(const void*) { - Kokkos::abort("HIP code attempted to access unknown Space memory"); - } -}; - -//---------------------------------------------------------------------------- -/** Running in HostSpace attempting to access HIPSpace */ -template <> -struct VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace, - Kokkos::Experimental::HIPSpace> { - enum : bool { value = false }; - inline static void verify(void) { - Kokkos::Experimental::HIPSpace::access_error(); - } - inline static void verify(const void* p) { - Kokkos::Experimental::HIPSpace::access_error(p); - } -}; - -/** Running in HostSpace accessing HIPHostPinnedSpace is OK */ -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::HostSpace, Kokkos::Experimental::HIPHostPinnedSpace> { - enum : bool { value = true }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void*) {} -}; -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - template <> class SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void> - : public SharedAllocationRecord<void, void> { + : public HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::HIPSpace> { private: + friend class SharedAllocationRecordCommon<Kokkos::Experimental::HIPSpace>; + friend class HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::HIPSpace>; + using base_t = HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::HIPSpace>; using RecordBase = SharedAllocationRecord<void, void>; SharedAllocationRecord(const SharedAllocationRecord&) = delete; SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - static void deallocate(RecordBase*); - #ifdef KOKKOS_ENABLE_DEBUG static RecordBase s_root_record; #endif @@ -577,45 +474,23 @@ class SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void> SharedAllocationRecord( const Kokkos::Experimental::HIPSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, - const RecordBase::function_type arg_dealloc = &deallocate); - - public: - std::string get_label() const; - - static SharedAllocationRecord* allocate( - const Kokkos::Experimental::HIPSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size); - - /**\brief Allocate tracked memory in the space */ - static void* allocate_tracked(const Kokkos::Experimental::HIPSpace& arg_space, - const std::string& arg_label, - const size_t arg_alloc_size); - - /**\brief Reallocate tracked memory in the space */ - static void* reallocate_tracked(void* const arg_alloc_ptr, - const size_t arg_alloc_size); - - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void* const arg_alloc_ptr); - - static SharedAllocationRecord* get_record(void* arg_alloc_ptr); - - static void print_records(std::ostream&, - const Kokkos::Experimental::HIPSpace&, - bool detail = false); + const RecordBase::function_type arg_dealloc = &base_t::deallocate); }; template <> class SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void> - : public SharedAllocationRecord<void, void> { + : public SharedAllocationRecordCommon< + Kokkos::Experimental::HIPHostPinnedSpace> { private: + friend class SharedAllocationRecordCommon< + Kokkos::Experimental::HIPHostPinnedSpace>; + using base_t = + SharedAllocationRecordCommon<Kokkos::Experimental::HIPHostPinnedSpace>; using RecordBase = SharedAllocationRecord<void, void>; SharedAllocationRecord(const SharedAllocationRecord&) = delete; SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - static void deallocate(RecordBase*); - #ifdef KOKKOS_ENABLE_DEBUG static RecordBase s_root_record; #endif @@ -624,36 +499,12 @@ class SharedAllocationRecord<Kokkos::Experimental::HIPHostPinnedSpace, void> protected: ~SharedAllocationRecord(); - SharedAllocationRecord() : RecordBase(), m_space() {} + SharedAllocationRecord() = default; SharedAllocationRecord( const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, - const RecordBase::function_type arg_dealloc = &deallocate); - - public: - std::string get_label() const; - - static SharedAllocationRecord* allocate( - const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size); - /**\brief Allocate tracked memory in the space */ - static void* allocate_tracked( - const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size); - - /**\brief Reallocate tracked memory in the space */ - static void* reallocate_tracked(void* const arg_alloc_ptr, - const size_t arg_alloc_size); - - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void* const arg_alloc_ptr); - - static SharedAllocationRecord* get_record(void* arg_alloc_ptr); - - static void print_records(std::ostream&, - const Kokkos::Experimental::HIPHostPinnedSpace&, - bool detail = false); + const RecordBase::function_type arg_dealloc = &base_t::deallocate); }; } // namespace Impl } // namespace Kokkos @@ -687,13 +538,6 @@ class HIP { HIP(); HIP(hipStream_t stream); - KOKKOS_FUNCTION HIP(HIP&& other) noexcept; - KOKKOS_FUNCTION HIP(HIP const& other); - KOKKOS_FUNCTION HIP& operator=(HIP&&) noexcept; - KOKKOS_FUNCTION HIP& operator=(HIP const&); - - KOKKOS_FUNCTION ~HIP() noexcept; - //@} //------------------------------------ //! \name Functions that all Kokkos devices must implement. @@ -749,14 +593,13 @@ class HIP { static const char* name(); inline Impl::HIPInternal* impl_internal_space_instance() const { - return m_space_instance; + return m_space_instance.get(); } uint32_t impl_instance_id() const noexcept { return 0; } private: - Impl::HIPInternal* m_space_instance; - int* m_counter; + Kokkos::Impl::HostSharedPtr<Impl::HIPInternal> m_space_instance; }; } // namespace Experimental namespace Tools { @@ -794,27 +637,6 @@ struct MemorySpaceAccess<Kokkos::Experimental::HIPSpace, enum : bool { deepcopy = false }; }; -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::Experimental::HIP::memory_space, - Kokkos::Experimental::HIP::scratch_memory_space> { - enum : bool { value = true }; - KOKKOS_INLINE_FUNCTION static void verify(void) {} - KOKKOS_INLINE_FUNCTION static void verify(const void*) {} -}; - -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::HostSpace, Kokkos::Experimental::HIP::scratch_memory_space> { - enum : bool { value = false }; - inline static void verify(void) { - Kokkos::Experimental::HIPSpace::access_error(); - } - inline static void verify(const void* p) { - Kokkos::Experimental::HIPSpace::access_error(p); - } -}; - } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_HPX.hpp b/packages/kokkos/core/src/Kokkos_HPX.hpp index 279744b77986203e65245fda8960faed227aa9c1..2100b49c116cfaecd35205aa60708ed1535578ca 100644 --- a/packages/kokkos/core/src/Kokkos_HPX.hpp +++ b/packages/kokkos/core/src/Kokkos_HPX.hpp @@ -523,14 +523,6 @@ struct MemorySpaceAccess<Kokkos::Experimental::HPX::memory_space, enum : bool { deepcopy = false }; }; -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::Experimental::HPX::memory_space, - Kokkos::Experimental::HPX::scratch_memory_space> { - enum : bool { value = true }; - inline static void verify(void) {} - inline static void verify(const void *) {} -}; } // namespace Impl } // namespace Kokkos @@ -1172,6 +1164,15 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, : m_functor(arg_functor), m_mdr_policy(arg_policy), m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy &, const Functor &) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } }; } // namespace Impl } // namespace Kokkos @@ -1715,6 +1716,15 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, m_reducer(reducer), m_result_ptr(reducer.view().data()), m_force_synchronous(!reducer.view().impl_track().has_record()) {} + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy &, const Functor &) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } }; } // namespace Impl } // namespace Kokkos @@ -2438,13 +2448,14 @@ KOKKOS_INLINE_FUNCTION thread, count); } -template <typename iType> -KOKKOS_INLINE_FUNCTION - Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HPXTeamMember> - ThreadVectorRange(const Impl::HPXTeamMember &thread, const iType &i_begin, - const iType &i_end) { +template <typename iType1, typename iType2> +KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, Impl::HPXTeamMember> +ThreadVectorRange(const Impl::HPXTeamMember &thread, const iType1 &i_begin, + const iType2 &i_end) { + using iType = typename std::common_type<iType1, iType2>::type; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::HPXTeamMember>( - thread, i_begin, i_end); + thread, iType(i_begin), iType(i_end)); } KOKKOS_INLINE_FUNCTION @@ -2615,6 +2626,27 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( } } +/** \brief Intra-thread vector parallel scan with reducer + * + */ +template <typename iType, class FunctorType, typename ReducerType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type + parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::HPXTeamMember> &loop_boundaries, + const FunctorType &lambda, const ReducerType &reducer) { + typename ReducerType::value_type scan_val; + reducer.init(scan_val); + +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, true); + } +} + template <class FunctorType> KOKKOS_INLINE_FUNCTION void single( const Impl::VectorSingleStruct<Impl::HPXTeamMember> &, diff --git a/packages/kokkos/core/src/Kokkos_HostSpace.hpp b/packages/kokkos/core/src/Kokkos_HostSpace.hpp index 1d34eaf007a5df248835bd4b9cd4d977022a9091..ba69fbad393ee391eff2b59c34d4ae526fa7af29 100644 --- a/packages/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_HostSpace.hpp @@ -242,17 +242,17 @@ namespace Impl { template <> class SharedAllocationRecord<Kokkos::HostSpace, void> - : public SharedAllocationRecord<void, void> { + : public SharedAllocationRecordCommon<Kokkos::HostSpace> { private: friend Kokkos::HostSpace; + friend class SharedAllocationRecordCommon<Kokkos::HostSpace>; + using base_t = SharedAllocationRecordCommon<Kokkos::HostSpace>; using RecordBase = SharedAllocationRecord<void, void>; SharedAllocationRecord(const SharedAllocationRecord&) = delete; SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - static void deallocate(RecordBase*); - #ifdef KOKKOS_ENABLE_DEBUG /**\brief Root record for tracked allocations from this HostSpace instance */ static RecordBase s_root_record; @@ -275,10 +275,6 @@ class SharedAllocationRecord<Kokkos::HostSpace, void> const RecordBase::function_type arg_dealloc = &deallocate); public: - inline std::string get_label() const { - return std::string(RecordBase::head()->m_label); - } - KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate( const Kokkos::HostSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size) { @@ -291,23 +287,6 @@ class SharedAllocationRecord<Kokkos::HostSpace, void> return (SharedAllocationRecord*)0; #endif } - - /**\brief Allocate tracked memory in the space */ - static void* allocate_tracked(const Kokkos::HostSpace& arg_space, - const std::string& arg_label, - const size_t arg_alloc_size); - - /**\brief Reallocate tracked memory in the space */ - static void* reallocate_tracked(void* const arg_alloc_ptr, - const size_t arg_alloc_size); - - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void* const arg_alloc_ptr); - - static SharedAllocationRecord* get_record(void* arg_alloc_ptr); - - static void print_records(std::ostream&, const Kokkos::HostSpace&, - bool detail = false); }; } // namespace Impl diff --git a/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp b/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp index 979e54da4e475cf9e7bfd7b5e3c0b3c0a3fe7c81..caa41b79b096dd2e7f2697f164d2cc3819834fc2 100644 --- a/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp +++ b/packages/kokkos/core/src/Kokkos_LogicalSpaces.hpp @@ -264,10 +264,10 @@ class SharedAllocationRecord<Kokkos::Experimental::LogicalMemorySpace< static_cast<SharedAllocationRecord<void, void>*>(this); strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); + SharedAllocationHeader::maximum_label_length - 1); // Set last element zero, in case c_str is too long RecordBase::m_alloc_ptr - ->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; + ->m_label[SharedAllocationHeader::maximum_label_length - 1] = '\0'; } public: diff --git a/packages/kokkos/core/src/Kokkos_Macros.hpp b/packages/kokkos/core/src/Kokkos_Macros.hpp index 874b0dcc59f6f0d8b26a107d2b60f0f8912c29e2..0d0185346540bf929b4305d6ad496b2f02e39c69 100644 --- a/packages/kokkos/core/src/Kokkos_Macros.hpp +++ b/packages/kokkos/core/src/Kokkos_Macros.hpp @@ -382,6 +382,12 @@ #define KOKKOS_IMPL_DEVICE_FUNCTION #endif +// Temporary solution for SYCL not supporting printf in kernels. +// Might disappear at any point once we have found another solution. +#if !defined(KOKKOS_IMPL_DO_NOT_USE_PRINTF) +#define KOKKOS_IMPL_DO_NOT_USE_PRINTF(...) printf(__VA_ARGS__) +#endif + //---------------------------------------------------------------------------- // Define final version of functions. This is so that clang tidy can find these // macros more easily diff --git a/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp b/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp new file mode 100644 index 0000000000000000000000000000000000000000..50223651e7d189e07cd94f9bf48eb6c5dcaa62d2 --- /dev/null +++ b/packages/kokkos/core/src/Kokkos_MathematicalFunctions.hpp @@ -0,0 +1,233 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_HPP +#define KOKKOS_MATHEMATICAL_FUNCTIONS_HPP + +#include <Kokkos_Macros.hpp> +#include <cmath> +#include <algorithm> +#include <type_traits> + +#ifdef KOKKOS_ENABLE_SYCL +#include <CL/sycl.hpp> +#endif + +namespace Kokkos { +namespace Experimental { + +#if defined(KOKKOS_ENABLE_SYCL) +#define NAMESPACE_MATH_FUNCTIONS sycl +#else +#define NAMESPACE_MATH_FUNCTIONS std +#endif + +#define KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, RETURNTYPE, ARGTYPE) \ + KOKKOS_INLINE_FUNCTION RETURNTYPE FUNC(ARGTYPE x) { \ + using NAMESPACE_MATH_FUNCTIONS::FUNC; \ + return FUNC(x); \ + } + +#define KOKKOS_IMPL_UNARY_FUNCTION_INTEGRAL(FUNC, RETURNTYPE) \ + template <typename Integer, \ + typename = std::enable_if_t<std::is_integral<Integer>::value>> \ + KOKKOS_INLINE_FUNCTION RETURNTYPE FUNC(Integer x) { \ + return Kokkos::Experimental::FUNC(static_cast<double>(x)); \ + } + +#define KOKKOS_IMPL_BINARY_FUNCTION_FLOATING_POINT(FUNC, TYPE) \ + KOKKOS_INLINE_FUNCTION TYPE FUNC(TYPE x, TYPE y) { \ + using NAMESPACE_MATH_FUNCTIONS::FUNC; \ + return FUNC(x, y); \ + } + +// NOTE long double overloads are not available on the device +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) + +#define KOKKOS_IMPL_BINARY_FUNCTION_ARITHMETIC(FUNC) \ + template <typename Arithmetic1, typename Arithmetic2, \ + typename = std::enable_if_t< \ + std::is_arithmetic<Arithmetic1>::value && \ + std::is_arithmetic<Arithmetic2>::value && \ + !std::is_same<Arithmetic1, long double>::value && \ + !std::is_same<Arithmetic2, long double>::value>> \ + KOKKOS_INLINE_FUNCTION double FUNC(Arithmetic1 x, Arithmetic2 y) { \ + return Kokkos::Experimental::FUNC( \ + static_cast<std::conditional_t<std::is_integral<Arithmetic1>::value, \ + double, Arithmetic1>>(x), \ + static_cast<std::conditional_t<std::is_integral<Arithmetic2>::value, \ + double, Arithmetic2>>(y)); \ + } + +#define KOKKOS_IMPL_MATH_UNARY_FUNCTION(FUNC) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, float, float) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, double, double) \ + KOKKOS_IMPL_UNARY_FUNCTION_INTEGRAL(FUNC, double) + +#define KOKKOS_IMPL_MATH_UNARY_PREDICATE(FUNC) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, bool, float) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, bool, double) \ + KOKKOS_IMPL_UNARY_FUNCTION_INTEGRAL(FUNC, bool) + +#define KOKKOS_IMPL_MATH_BINARY_FUNCTION(FUNC) \ + KOKKOS_IMPL_BINARY_FUNCTION_FLOATING_POINT(FUNC, float) \ + KOKKOS_IMPL_BINARY_FUNCTION_FLOATING_POINT(FUNC, double) \ + KOKKOS_IMPL_BINARY_FUNCTION_ARITHMETIC(FUNC) + +#define KOKKOS_IMPL_MATH_NAN() \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(nanf, float, char const*) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(nan, double, char const*) + +#else // long double overloads are available + +#define KOKKOS_IMPL_BINARY_FUNCTION_ARITHMETIC(FUNC) \ + template <typename Arithmetic1, typename Arithmetic2, \ + typename = \ + std::enable_if_t<std::is_arithmetic<Arithmetic1>::value && \ + std::is_arithmetic<Arithmetic2>::value>, \ + typename Promoted = std::conditional_t< \ + std::is_same<Arithmetic1, long double>::value || \ + std::is_same<Arithmetic2, long double>::value, \ + long double, double>> \ + KOKKOS_INLINE_FUNCTION Promoted FUNC(Arithmetic1 x, Arithmetic2 y) { \ + return Kokkos::Experimental::FUNC( \ + static_cast<std::conditional_t<std::is_integral<Arithmetic1>::value, \ + double, Arithmetic1>>(x), \ + static_cast<std::conditional_t<std::is_integral<Arithmetic2>::value, \ + double, Arithmetic2>>(y)); \ + } + +#define KOKKOS_IMPL_MATH_UNARY_FUNCTION(FUNC) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, float, float) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, double, double) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, long double, long double) \ + KOKKOS_IMPL_UNARY_FUNCTION_INTEGRAL(FUNC, double) + +#define KOKKOS_IMPL_MATH_UNARY_PREDICATE(FUNC) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, bool, float) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, bool, double) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(FUNC, bool, long double) \ + KOKKOS_IMPL_UNARY_FUNCTION_INTEGRAL(FUNC, bool) + +#define KOKKOS_IMPL_MATH_BINARY_FUNCTION(FUNC) \ + KOKKOS_IMPL_BINARY_FUNCTION_FLOATING_POINT(FUNC, float) \ + KOKKOS_IMPL_BINARY_FUNCTION_FLOATING_POINT(FUNC, double) \ + KOKKOS_IMPL_BINARY_FUNCTION_FLOATING_POINT(FUNC, long double) \ + KOKKOS_IMPL_BINARY_FUNCTION_ARITHMETIC(FUNC) + +#define KOKKOS_IMPL_MATH_NAN() \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(nanf, float, char const*) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(nan, double, char const*) \ + KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT(nanl, long double, char const*) + +#endif + +// Basic operations +KOKKOS_IMPL_MATH_UNARY_FUNCTION(fabs) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(fmod) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(remainder) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(fmin) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(fmax) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(fdim) +#ifndef KOKKOS_ENABLE_SYCL +KOKKOS_IMPL_MATH_NAN() +#endif +// Power functions +KOKKOS_IMPL_MATH_BINARY_FUNCTION(pow) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(sqrt) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(cbrt) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(hypot) +// Exponential functions +KOKKOS_IMPL_MATH_UNARY_FUNCTION(exp) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(exp2) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(expm1) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(log) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(log10) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(log2) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(log1p) +// Trigonometric functions +KOKKOS_IMPL_MATH_UNARY_FUNCTION(sin) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(cos) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(tan) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(asin) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(acos) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(atan) +KOKKOS_IMPL_MATH_BINARY_FUNCTION(atan2) +// Hyperbolic functions +KOKKOS_IMPL_MATH_UNARY_FUNCTION(sinh) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(cosh) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(tanh) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(asinh) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(acosh) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(atanh) +// Error and gamma functions +KOKKOS_IMPL_MATH_UNARY_FUNCTION(erf) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(erfc) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(tgamma) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(lgamma) +// Nearest integer floating point operations +KOKKOS_IMPL_MATH_UNARY_FUNCTION(ceil) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(floor) +KOKKOS_IMPL_MATH_UNARY_FUNCTION(trunc) +#ifndef KOKKOS_ENABLE_SYCL +KOKKOS_IMPL_MATH_UNARY_FUNCTION(nearbyint) +#endif +// Classification and comparison +KOKKOS_IMPL_MATH_UNARY_PREDICATE(isfinite) +KOKKOS_IMPL_MATH_UNARY_PREDICATE(isinf) +KOKKOS_IMPL_MATH_UNARY_PREDICATE(isnan) + +#undef KOKKOS_IMPL_UNARY_FUNCTION_FLOATING_POINT +#undef KOKKOS_IMPL_UNARY_FUNCTION_INTEGRAL +#undef KOKKOS_IMPL_BINARY_FUNCTION_FLOATING_POINT +#undef KOKKOS_IMPL_BINARY_FUNCTION_ARITHMETIC +#undef KOKKOS_IMPL_MATH_UNARY_FUNCTION +#undef KOKKOS_IMPL_MATH_UNARY_PREDICATE +#undef KOKKOS_IMPL_MATH_BINARY_FUNCTION +#undef KOKKOS_IMPL_MATH_NAN +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Kokkos_MemoryPool.hpp b/packages/kokkos/core/src/Kokkos_MemoryPool.hpp index 042ad6d9023494f7650f8c17ab24c0c0f424d929..2cafac1aea462ec29fe1d1cb853cb374ea7e8109 100644 --- a/packages/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/packages/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -408,7 +408,7 @@ class MemoryPool { const size_t alloc_size = header_size + (size_t(m_sb_count) << m_sb_size_lg2); - Record *rec = Record::allocate(memspace, "MemoryPool", alloc_size); + Record *rec = Record::allocate(memspace, "Kokkos::MemoryPool", alloc_size); m_tracker.assign_allocated_record_to_uninitialized(rec); @@ -524,7 +524,9 @@ class MemoryPool { // Fast query clock register 'tic' to pseudo-randomize // the guess for which block within a superblock should // be claimed. If not available then a search occurs. - +#if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GEN) + const uint32_t block_id_hint = alloc_size; +#else const uint32_t block_id_hint = (uint32_t)(Kokkos::Impl::clock_tic() #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) @@ -533,6 +535,7 @@ class MemoryPool { + (threadIdx.x + blockDim.x * threadIdx.y) #endif ); +#endif // expected state of superblock for allocation uint32_t sb_state = block_state; diff --git a/packages/kokkos/core/src/Kokkos_NumericTraits.hpp b/packages/kokkos/core/src/Kokkos_NumericTraits.hpp index 7d55a96523c218c74e6f159905aa627783b1da06..b9380cbe02b42a04c5b21b6cb8408016049d15f8 100644 --- a/packages/kokkos/core/src/Kokkos_NumericTraits.hpp +++ b/packages/kokkos/core/src/Kokkos_NumericTraits.hpp @@ -42,14 +42,199 @@ //@HEADER */ -#ifndef KOKKOS_NUMERICTRAITS_HPP -#define KOKKOS_NUMERICTRAITS_HPP +#ifndef KOKKOS_NUMERIC_TRAITS_HPP +#define KOKKOS_NUMERIC_TRAITS_HPP #include <Kokkos_Macros.hpp> -#include <climits> #include <cfloat> +#include <climits> +#include <cmath> +#include <cstdint> +#include <type_traits> namespace Kokkos { +namespace Experimental { +namespace Impl { +// clang-format off +template <class> struct infinity_helper; +template <> struct infinity_helper<float> { static constexpr float value = HUGE_VALF; }; +template <> struct infinity_helper<double> { static constexpr double value = HUGE_VAL; }; +template <> struct infinity_helper<long double> { static constexpr long double value = HUGE_VALL; }; +template <class> struct finite_min_helper; +template <> struct finite_min_helper<bool> { static constexpr bool value = false; }; +template <> struct finite_min_helper<char> { static constexpr char value = CHAR_MIN; }; +template <> struct finite_min_helper<signed char> { static constexpr signed char value = SCHAR_MIN; }; +template <> struct finite_min_helper<unsigned char> { static constexpr unsigned char value = 0; }; +template <> struct finite_min_helper<short> { static constexpr short value = SHRT_MIN; }; +template <> struct finite_min_helper<unsigned short> { static constexpr unsigned short value = 0; }; +template <> struct finite_min_helper<int> { static constexpr int value = INT_MIN; }; +template <> struct finite_min_helper<unsigned int> { static constexpr unsigned int value = 0; }; +template <> struct finite_min_helper<long int> { static constexpr long int value = LONG_MIN; }; +template <> struct finite_min_helper<unsigned long int> { static constexpr unsigned long int value = 0; }; +template <> struct finite_min_helper<long long int> { static constexpr long long int value = LLONG_MIN; }; +template <> struct finite_min_helper<unsigned long long int> { static constexpr unsigned long long int value = 0; }; +template <> struct finite_min_helper<float> { static constexpr float value = -FLT_MAX; }; +template <> struct finite_min_helper<double> { static constexpr double value = -DBL_MAX; }; +template <> struct finite_min_helper<long double> { static constexpr long double value = -LDBL_MAX; }; +template <class> struct finite_max_helper; +template <> struct finite_max_helper<bool> { static constexpr bool value = true; }; +template <> struct finite_max_helper<char> { static constexpr char value = CHAR_MAX; }; +template <> struct finite_max_helper<signed char> { static constexpr signed char value = SCHAR_MAX; }; +template <> struct finite_max_helper<unsigned char> { static constexpr unsigned char value = UCHAR_MAX; }; +template <> struct finite_max_helper<short> { static constexpr short value = SHRT_MAX; }; +template <> struct finite_max_helper<unsigned short> { static constexpr unsigned short value = USHRT_MAX; }; +template <> struct finite_max_helper<int> { static constexpr int value = INT_MAX; }; +template <> struct finite_max_helper<unsigned int> { static constexpr unsigned int value = UINT_MAX; }; +template <> struct finite_max_helper<long int> { static constexpr long int value = LONG_MAX; }; +template <> struct finite_max_helper<unsigned long int> { static constexpr unsigned long int value = ULONG_MAX; }; +template <> struct finite_max_helper<long long int> { static constexpr long long int value = LLONG_MAX; }; +template <> struct finite_max_helper<unsigned long long int> { static constexpr unsigned long long int value = ULLONG_MAX; }; +template <> struct finite_max_helper<float> { static constexpr float value = FLT_MAX; }; +template <> struct finite_max_helper<double> { static constexpr double value = DBL_MAX; }; +template <> struct finite_max_helper<long double> { static constexpr long double value = LDBL_MAX; }; +template <class> struct epsilon_helper; +namespace{ + // FIXME workaround for LDL_EPSILON with XL + template<typename T> + constexpr T machineeps() { + T epsilon = 1, prev = 1, expression = 1; + do { + prev = epsilon; + epsilon /= 2; + expression = 1 + epsilon; + } while (expression > 1); + return prev; + } +} +template <> struct epsilon_helper<float> { static constexpr float value = FLT_EPSILON; }; +template <> struct epsilon_helper<double> { static constexpr double value = DBL_EPSILON; }; +template <> struct epsilon_helper<long double> { +#ifdef KOKKOS_COMPILER_IBM + static constexpr long double value = machineeps<long double>(); +#else + static constexpr long double value = LDBL_EPSILON; +#endif +}; +template <class> struct round_error_helper; +template <> struct round_error_helper<float> { static constexpr float value = 0.5F; }; +template <> struct round_error_helper<double> { static constexpr double value = 0.5; }; +template <> struct round_error_helper<long double> { static constexpr long double value = 0.5L; }; +template <class> struct norm_min_helper; +template <> struct norm_min_helper<float> { static constexpr float value = FLT_MIN; }; +template <> struct norm_min_helper<double> { static constexpr double value = DBL_MIN; }; +template <> struct norm_min_helper<long double> { static constexpr long double value = LDBL_MIN; }; +template <class> struct digits_helper; +template <> struct digits_helper<bool> { static constexpr int value = 1; }; +template <> struct digits_helper<char> { static constexpr int value = CHAR_BIT - std::is_signed<char>::value; }; +template <> struct digits_helper<signed char> { static constexpr int value = CHAR_BIT - 1; }; +template <> struct digits_helper<unsigned char> { static constexpr int value = CHAR_BIT; }; +template <> struct digits_helper<short> { static constexpr int value = CHAR_BIT*sizeof(short)-1; }; +template <> struct digits_helper<unsigned short> { static constexpr int value = CHAR_BIT*sizeof(short); }; +template <> struct digits_helper<int> { static constexpr int value = CHAR_BIT*sizeof(int)-1; }; +template <> struct digits_helper<unsigned int> { static constexpr int value = CHAR_BIT*sizeof(int); }; +template <> struct digits_helper<long int> { static constexpr int value = CHAR_BIT*sizeof(long int)-1; }; +template <> struct digits_helper<unsigned long int> { static constexpr int value = CHAR_BIT*sizeof(long int); }; +template <> struct digits_helper<long long int> { static constexpr int value = CHAR_BIT*sizeof(long long int)-1; }; +template <> struct digits_helper<unsigned long long int> { static constexpr int value = CHAR_BIT*sizeof(long long int); }; +template <> struct digits_helper<float> { static constexpr int value = FLT_MANT_DIG; }; +template <> struct digits_helper<double> { static constexpr int value = DBL_MANT_DIG; }; +template <> struct digits_helper<long double> { static constexpr int value = LDBL_MANT_DIG; }; +template <class> struct digits10_helper; +template <> struct digits10_helper<bool> { static constexpr int value = 0; }; +constexpr double log10_2 = 2.41; +#define DIGITS10_HELPER_INTEGRAL(TYPE) \ +template <> struct digits10_helper<TYPE> { static constexpr int value = digits_helper<TYPE>::value * log10_2; }; +DIGITS10_HELPER_INTEGRAL(char) +DIGITS10_HELPER_INTEGRAL(signed char) +DIGITS10_HELPER_INTEGRAL(unsigned char) +DIGITS10_HELPER_INTEGRAL(short) +DIGITS10_HELPER_INTEGRAL(unsigned short) +DIGITS10_HELPER_INTEGRAL(int) +DIGITS10_HELPER_INTEGRAL(unsigned int) +DIGITS10_HELPER_INTEGRAL(long int) +DIGITS10_HELPER_INTEGRAL(unsigned long int) +DIGITS10_HELPER_INTEGRAL(long long int) +DIGITS10_HELPER_INTEGRAL(unsigned long long int) +#undef DIGITS10_HELPER_INTEGRAL +template <> struct digits10_helper<float> { static constexpr int value = FLT_DIG; }; +template <> struct digits10_helper<double> { static constexpr int value = DBL_DIG; }; +template <> struct digits10_helper<long double> { static constexpr int value = LDBL_DIG; }; +template <class> struct max_digits10_helper; +// FIXME not sure why were not defined in my <cfloat> +//template <> struct max_digits10_helper<float> { static constexpr int value = FLT_DECIMAL_DIG; }; +//template <> struct max_digits10_helper<double> { static constexpr int value = DBL_DECIMAL_DIG; }; +//template <> struct max_digits10_helper<long double> { static constexpr int value = LDBL_DECIMAL_DIG; }; +template <> struct max_digits10_helper<float> { static constexpr int value = 9; }; +template <> struct max_digits10_helper<double> { static constexpr int value = 17; }; +template <> struct max_digits10_helper<long double> { static constexpr int value = 21; }; +template <class> struct radix_helper; +template <> struct radix_helper<bool> { static constexpr int value = 2; }; +template <> struct radix_helper<char> { static constexpr int value = 2; }; +template <> struct radix_helper<signed char> { static constexpr int value = 2; }; +template <> struct radix_helper<unsigned char> { static constexpr int value = 2; }; +template <> struct radix_helper<short> { static constexpr int value = 2; }; +template <> struct radix_helper<unsigned short> { static constexpr int value = 2; }; +template <> struct radix_helper<int> { static constexpr int value = 2; }; +template <> struct radix_helper<unsigned int> { static constexpr int value = 2; }; +template <> struct radix_helper<long int> { static constexpr int value = 2; }; +template <> struct radix_helper<unsigned long int> { static constexpr int value = 2; }; +template <> struct radix_helper<long long int> { static constexpr int value = 2; }; +template <> struct radix_helper<unsigned long long int> { static constexpr int value = 2; }; +template <> struct radix_helper<float> { static constexpr int value = FLT_RADIX; }; +template <> struct radix_helper<double> { static constexpr int value = FLT_RADIX; }; +template <> struct radix_helper<long double> { static constexpr int value = FLT_RADIX; }; +template <class> struct min_exponent_helper; +template <> struct min_exponent_helper<float> { static constexpr int value = FLT_MIN_EXP; }; +template <> struct min_exponent_helper<double> { static constexpr int value = DBL_MIN_EXP; }; +template <> struct min_exponent_helper<long double> { static constexpr int value = LDBL_MIN_EXP; }; +template <class> struct min_exponent10_helper; +template <> struct min_exponent10_helper<float> { static constexpr int value = FLT_MIN_10_EXP; }; +template <> struct min_exponent10_helper<double> { static constexpr int value = DBL_MIN_10_EXP; }; +template <> struct min_exponent10_helper<long double> { static constexpr int value = LDBL_MIN_10_EXP; }; +template <class> struct max_exponent_helper; +template <> struct max_exponent_helper<float> { static constexpr int value = FLT_MAX_EXP; }; +template <> struct max_exponent_helper<double> { static constexpr int value = DBL_MAX_EXP; }; +template <> struct max_exponent_helper<long double> { static constexpr int value = LDBL_MAX_EXP; }; +template <class> struct max_exponent10_helper; +template <> struct max_exponent10_helper<float> { static constexpr int value = FLT_MAX_10_EXP; }; +template <> struct max_exponent10_helper<double> { static constexpr int value = DBL_MAX_10_EXP; }; +template <> struct max_exponent10_helper<long double> { static constexpr int value = LDBL_MAX_10_EXP; }; +// clang-format on +} // namespace Impl + +#if defined(KOKKOS_ENABLE_CXX17) +#define KOKKOS_IMPL_DEFINE_TRAIT(TRAIT) \ + template <class T> \ + struct TRAIT : Impl::TRAIT##_helper<T> {}; \ + template <class T> \ + inline constexpr auto TRAIT##_v = TRAIT<T>::value; +#else +#define KOKKOS_IMPL_DEFINE_TRAIT(TRAIT) \ + template <class T> \ + struct TRAIT : Impl::TRAIT##_helper<T> {}; +#endif + +// Numeric distinguished value traits +KOKKOS_IMPL_DEFINE_TRAIT(infinity) +KOKKOS_IMPL_DEFINE_TRAIT(finite_min) +KOKKOS_IMPL_DEFINE_TRAIT(finite_max) +KOKKOS_IMPL_DEFINE_TRAIT(epsilon) +KOKKOS_IMPL_DEFINE_TRAIT(round_error) +KOKKOS_IMPL_DEFINE_TRAIT(norm_min) + +// Numeric characteristics traits +KOKKOS_IMPL_DEFINE_TRAIT(digits) +KOKKOS_IMPL_DEFINE_TRAIT(digits10) +KOKKOS_IMPL_DEFINE_TRAIT(max_digits10) +KOKKOS_IMPL_DEFINE_TRAIT(radix) +KOKKOS_IMPL_DEFINE_TRAIT(min_exponent) +KOKKOS_IMPL_DEFINE_TRAIT(min_exponent10) +KOKKOS_IMPL_DEFINE_TRAIT(max_exponent) +KOKKOS_IMPL_DEFINE_TRAIT(max_exponent10) + +#undef KOKKOS_IMPL_DEFINE_TRAIT + +} // namespace Experimental template <class T> struct reduction_identity; /*{ diff --git a/packages/kokkos/core/src/Kokkos_OpenMP.hpp b/packages/kokkos/core/src/Kokkos_OpenMP.hpp index 1856c00a65a7ff7cfd40a91b62c9b64119a4434c..eedba38a8456117ac03d8c21e657729673017984 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMP.hpp @@ -208,14 +208,6 @@ struct MemorySpaceAccess<Kokkos::OpenMP::memory_space, enum : bool { deepcopy = false }; }; -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::OpenMP::memory_space, Kokkos::OpenMP::scratch_memory_space> { - enum : bool { value = true }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp index 4cbeef2d7d6fb2ec3a505270b190ece741cdde6d..2a57a43e63b77b7f60e4cc40bb20272e0332944a 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMPTarget.hpp @@ -150,24 +150,6 @@ class OpenMPTargetSpaceInitializer : public ExecSpaceInitializerBase { /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ -namespace Kokkos { -namespace Impl { - -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::Experimental::OpenMPTarget::memory_space, - Kokkos::Experimental::OpenMPTarget::scratch_memory_space> { - enum : bool { value = true }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - -} // namespace Impl -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - #include <OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp> #include <OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp> #include <OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp> diff --git a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp index f7c043073c51f677516748454d3657d159caf46f..dc5e0194ab0a8bb85a29727c664a33b6c23e2c6c 100644 --- a/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp @@ -54,8 +54,10 @@ #ifdef KOKKOS_ENABLE_OPENMPTARGET +#include <OpenMPTarget/Kokkos_OpenMPTarget_Error.hpp> #include <Kokkos_HostSpace.hpp> #include <omp.h> + /*--------------------------------------------------------------------------*/ namespace Kokkos { @@ -145,17 +147,22 @@ namespace Impl { template <> class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void> - : public SharedAllocationRecord<void, void> { + : public HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::OpenMPTargetSpace> { private: + friend class HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::OpenMPTargetSpace>; + friend class SharedAllocationRecordCommon< + Kokkos::Experimental::OpenMPTargetSpace>; friend Kokkos::Experimental::OpenMPTargetSpace; + using base_t = HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::OpenMPTargetSpace>; using RecordBase = SharedAllocationRecord<void, void>; SharedAllocationRecord(const SharedAllocationRecord&) = delete; SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - static void deallocate(RecordBase*); - /**\brief Root record for tracked allocations from this OpenMPTargetSpace * instance */ static RecordBase s_root_record; @@ -184,23 +191,9 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void> #endif } - /**\brief Allocate tracked memory in the space */ - static void* allocate_tracked( - const Kokkos::Experimental::OpenMPTargetSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size); - /**\brief Reallocate tracked memory in the space */ static void* reallocate_tracked(void* const arg_alloc_ptr, const size_t arg_alloc_size); - - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void* const arg_alloc_ptr); - - static SharedAllocationRecord* get_record(void* arg_alloc_ptr); - - static void print_records(std::ostream&, - const Kokkos::Experimental::OpenMPTargetSpace&, - bool detail = false); }; } // namespace Impl @@ -217,13 +210,20 @@ template <class ExecutionSpace> struct DeepCopy<Kokkos::Experimental::OpenMPTargetSpace, Kokkos::Experimental::OpenMPTargetSpace, ExecutionSpace> { DeepCopy(void* dst, const void* src, size_t n) { - omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, - omp_get_default_device(), omp_get_default_device()); + // In the Release and RelWithDebInfo builds, the size of the memcpy should + // be greater than zero to avoid error. omp_target_memcpy returns zero on + // success. + if (n > 0) + OMPT_SAFE_CALL(omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, + omp_get_default_device(), + omp_get_default_device())); } DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) { exec.fence(); - omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, - omp_get_default_device(), omp_get_default_device()); + if (n > 0) + OMPT_SAFE_CALL(omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, + omp_get_default_device(), + omp_get_default_device())); } }; @@ -231,13 +231,17 @@ template <class ExecutionSpace> struct DeepCopy<Kokkos::Experimental::OpenMPTargetSpace, HostSpace, ExecutionSpace> { DeepCopy(void* dst, const void* src, size_t n) { - omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, - omp_get_default_device(), omp_get_initial_device()); + if (n > 0) + OMPT_SAFE_CALL(omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, + omp_get_default_device(), + omp_get_initial_device())); } DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) { exec.fence(); - omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, - omp_get_default_device(), omp_get_initial_device()); + if (n > 0) + OMPT_SAFE_CALL(omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, + omp_get_default_device(), + omp_get_initial_device())); } }; @@ -245,24 +249,20 @@ template <class ExecutionSpace> struct DeepCopy<HostSpace, Kokkos::Experimental::OpenMPTargetSpace, ExecutionSpace> { DeepCopy(void* dst, const void* src, size_t n) { - omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, - omp_get_initial_device(), omp_get_default_device()); + if (n > 0) + OMPT_SAFE_CALL(omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, + omp_get_initial_device(), + omp_get_default_device())); } DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, size_t n) { exec.fence(); - omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, - omp_get_initial_device(), omp_get_default_device()); + if (n > 0) + OMPT_SAFE_CALL(omp_target_memcpy(dst, const_cast<void*>(src), n, 0, 0, + omp_get_initial_device(), + omp_get_default_device())); } }; -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::HostSpace, Kokkos::Experimental::OpenMPTargetSpace> { - enum : bool { value = false }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_Parallel.hpp b/packages/kokkos/core/src/Kokkos_Parallel.hpp index a00da4472ba1f57cc658ded9f148b4e2a735b96f..85d1dad454ba64aa1311cf19437206768018571b 100644 --- a/packages/kokkos/core/src/Kokkos_Parallel.hpp +++ b/packages/kokkos/core/src/Kokkos_Parallel.hpp @@ -54,6 +54,7 @@ #include <Kokkos_ExecPolicy.hpp> #include <impl/Kokkos_Tools.hpp> +#include <type_traits> #include <typeinfo> #include <impl/Kokkos_Tags.hpp> @@ -71,6 +72,35 @@ namespace Kokkos { namespace Impl { +template <class T, class = void> +struct is_detected_execution_space : std::false_type { + using type = not_a_type; +}; + +template <class T> +struct is_detected_execution_space<T, void_t<typename T::execution_space>> + : std::true_type { + using type = typename T::execution_space; +}; + +template <class T> +using detected_execution_space_t = + typename is_detected_execution_space<T>::type; + +template <class T, class = void> +struct is_detected_device_type : std::false_type { + using type = not_a_type; +}; + +template <class T> +struct is_detected_device_type<T, void_t<typename T::device_type>> + : std::true_type { + using type = typename T::device_type; +}; + +template <class T> +using detected_device_type_t = typename is_detected_device_type<T>::type; + //---------------------------------------------------------------------------- /** \brief Given a Functor and Execution Policy query an execution space. * @@ -79,48 +109,19 @@ namespace Impl { * else if the Functor has a device_type use that for backward compatibility * else use the default */ -template <class Functor, class Policy, class EnableFunctor, class EnablePolicy> -struct FunctorPolicyExecutionSpace { - using execution_space = Kokkos::DefaultExecutionSpace; -}; - -template <class Functor, class Policy> -struct FunctorPolicyExecutionSpace< - Functor, Policy, - typename enable_if_type<typename Functor::device_type>::type, - typename enable_if_type<typename Policy ::execution_space>::type> { - using execution_space = typename Policy::execution_space; -}; template <class Functor, class Policy> -struct FunctorPolicyExecutionSpace< - Functor, Policy, - typename enable_if_type<typename Functor::execution_space>::type, - typename enable_if_type<typename Policy ::execution_space>::type> { - using execution_space = typename Policy::execution_space; -}; - -template <class Functor, class Policy, class EnableFunctor> -struct FunctorPolicyExecutionSpace< - Functor, Policy, EnableFunctor, - typename enable_if_type<typename Policy::execution_space>::type> { - using execution_space = typename Policy::execution_space; -}; - -template <class Functor, class Policy, class EnablePolicy> -struct FunctorPolicyExecutionSpace< - Functor, Policy, - typename enable_if_type<typename Functor::device_type>::type, - EnablePolicy> { - using execution_space = typename Functor::device_type::execution_space; -}; - -template <class Functor, class Policy, class EnablePolicy> -struct FunctorPolicyExecutionSpace< - Functor, Policy, - typename enable_if_type<typename Functor::execution_space>::type, - EnablePolicy> { - using execution_space = typename Functor::execution_space; +struct FunctorPolicyExecutionSpace { + using execution_space = std::conditional_t< + is_detected_execution_space<Policy>::value, + detected_execution_space_t<Policy>, + std::conditional_t< + is_detected_execution_space<Functor>::value, + detected_execution_space_t<Functor>, + std::conditional_t< + is_detected_device_type<Functor>::value, + detected_execution_space_t<detected_device_type_t<Functor>>, + Kokkos::DefaultExecutionSpace>>>; }; } // namespace Impl diff --git a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index e2e894da6e294411bcdcdb505ab9d281bad8fe3f..96242f99b0ca678e1ede6f148ae5d90a16127afe 100644 --- a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -744,8 +744,8 @@ struct ParallelReduceReturnValue< using value_type_scalar = typename return_type::value_type; using value_type_array = typename return_type::value_type* const; - using value_type = typename if_c<return_type::rank == 0, value_type_scalar, - value_type_array>::type; + using value_type = std::conditional_t<return_type::rank == 0, + value_type_scalar, value_type_array>; static return_type& return_value(ReturnType& return_val, const FunctorType&) { return return_val; @@ -1109,10 +1109,9 @@ inline void parallel_reduce( Kokkos::Impl::is_execution_policy<PolicyType>::value>::type* = nullptr) { using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = - typename Kokkos::Impl::if_c<(ValueTraits::StaticValueSize != 0), - typename ValueTraits::value_type, - typename ValueTraits::pointer_type>::type; + using value_type = std::conditional_t<(ValueTraits::StaticValueSize != 0), + typename ValueTraits::value_type, + typename ValueTraits::pointer_type>; static_assert( Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, @@ -1135,10 +1134,9 @@ inline void parallel_reduce( Kokkos::Impl::is_execution_policy<PolicyType>::value>::type* = nullptr) { using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = - typename Kokkos::Impl::if_c<(ValueTraits::StaticValueSize != 0), - typename ValueTraits::value_type, - typename ValueTraits::pointer_type>::type; + using value_type = std::conditional_t<(ValueTraits::StaticValueSize != 0), + typename ValueTraits::value_type, + typename ValueTraits::pointer_type>; static_assert( Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, PolicyType, @@ -1160,10 +1158,9 @@ inline void parallel_reduce(const size_t& policy, const FunctorType& functor) { typename Impl::ParallelReducePolicyType<void, size_t, FunctorType>::policy_type; using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = - typename Kokkos::Impl::if_c<(ValueTraits::StaticValueSize != 0), - typename ValueTraits::value_type, - typename ValueTraits::pointer_type>::type; + using value_type = std::conditional_t<(ValueTraits::StaticValueSize != 0), + typename ValueTraits::value_type, + typename ValueTraits::pointer_type>; static_assert( Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, @@ -1188,10 +1185,9 @@ inline void parallel_reduce(const std::string& label, const size_t& policy, typename Impl::ParallelReducePolicyType<void, size_t, FunctorType>::policy_type; using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, void>; - using value_type = - typename Kokkos::Impl::if_c<(ValueTraits::StaticValueSize != 0), - typename ValueTraits::value_type, - typename ValueTraits::pointer_type>::type; + using value_type = std::conditional_t<(ValueTraits::StaticValueSize != 0), + typename ValueTraits::value_type, + typename ValueTraits::pointer_type>; static_assert( Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE, diff --git a/packages/kokkos/core/src/Kokkos_SYCL.hpp b/packages/kokkos/core/src/Kokkos_SYCL.hpp index b8e0c74be41d98e960dc897e7526b92bfe951d84..aa720371df73cb1ad7bba8191e5c6d83c6c317c5 100644 --- a/packages/kokkos/core/src/Kokkos_SYCL.hpp +++ b/packages/kokkos/core/src/Kokkos_SYCL.hpp @@ -54,6 +54,7 @@ #include <Kokkos_ScratchSpace.hpp> #include <impl/Kokkos_ExecSpaceInitializer.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> +#include <impl/Kokkos_HostSharedPtr.hpp> namespace Kokkos { namespace Experimental { @@ -79,23 +80,22 @@ class SYCL { using scratch_memory_space = ScratchMemorySpace<SYCL>; - ~SYCL() = default; SYCL(); - - SYCL(SYCL&&) = default; - SYCL(const SYCL&) = default; - SYCL& operator=(SYCL&&) = default; - SYCL& operator=(const SYCL&) = default; + explicit SYCL(const sycl::queue&); uint32_t impl_instance_id() const noexcept { return 0; } + sycl::context sycl_context() const noexcept { + return m_space_instance->m_queue->get_context(); + }; + //@} //------------------------------------ //! \name Functions that all Kokkos devices must implement. //@{ KOKKOS_INLINE_FUNCTION static int in_parallel() { -#if defined(__SYCL_ARCH__) +#if defined(__SYCL_DEVICE_ONLY__) return true; #else return false; @@ -123,25 +123,21 @@ class SYCL { */ struct SYCLDevice { - SYCLDevice(); - explicit SYCLDevice(cl::sycl::device d); - explicit SYCLDevice(const cl::sycl::device_selector& selector); + SYCLDevice() : SYCLDevice(sycl::default_selector()) {} + explicit SYCLDevice(sycl::device d); + explicit SYCLDevice(const sycl::device_selector& selector); explicit SYCLDevice(size_t id); - explicit SYCLDevice(const std::function<bool(const sycl::device&)>& pred); - cl::sycl::device get_device() const; + sycl::device get_device() const; friend std::ostream& operator<<(std::ostream& os, const SYCLDevice& that) { return that.info(os); } - static std::ostream& list_devices(std::ostream& os); - static void list_devices(); - private: std::ostream& info(std::ostream& os) const; - cl::sycl::device m_device; + sycl::device m_device; }; static void impl_initialize(SYCLDevice = SYCLDevice()); @@ -154,11 +150,11 @@ class SYCL { static const char* name(); inline Impl::SYCLInternal* impl_internal_space_instance() const { - return m_space_instance; + return m_space_instance.get(); } private: - Impl::SYCLInternal* m_space_instance; + Kokkos::Impl::HostSharedPtr<Impl::SYCLInternal> m_space_instance; }; namespace Impl { diff --git a/packages/kokkos/core/src/Kokkos_SYCL_Space.hpp b/packages/kokkos/core/src/Kokkos_SYCL_Space.hpp index f9ee6ec44ee06774978af25719bd3ee60829ab66..392ab0e59a7d01f42342318bb44aa172bcb4f705 100644 --- a/packages/kokkos/core/src/Kokkos_SYCL_Space.hpp +++ b/packages/kokkos/core/src/Kokkos_SYCL_Space.hpp @@ -49,6 +49,7 @@ #ifdef KOKKOS_ENABLE_SYCL #include <Kokkos_Concepts.hpp> +#include <Kokkos_ScratchSpace.hpp> #include <SYCL/Kokkos_SYCL_Instance.hpp> #include <impl/Kokkos_SharedAlloc.hpp> #include <impl/Kokkos_Tools.hpp> @@ -64,6 +65,7 @@ class SYCLDeviceUSMSpace { using size_type = Impl::SYCLInternal::size_type; SYCLDeviceUSMSpace(); + explicit SYCLDeviceUSMSpace(sycl::queue queue); void* allocate(const std::size_t arg_alloc_size) const; void* allocate(const char* arg_label, const size_t arg_alloc_size, @@ -78,21 +80,43 @@ class SYCLDeviceUSMSpace { private: template <class, class, class, class> friend class LogicalMemorySpace; - void* impl_allocate(const char* arg_label, const size_t arg_alloc_size, - const size_t arg_logical_size = 0, - const Kokkos::Tools::SpaceHandle = - Kokkos::Tools::make_space_handle(name())) const; - void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr, - const size_t arg_alloc_size, - const size_t arg_logical_size = 0, - const Kokkos::Tools::SpaceHandle = - Kokkos::Tools::make_space_handle(name())) const; public: static constexpr const char* name() { return "SYCLDeviceUSM"; }; private: - int m_device; + sycl::queue m_queue; +}; + +class SYCLSharedUSMSpace { + public: + using execution_space = SYCL; + using memory_space = SYCLSharedUSMSpace; + using device_type = Kokkos::Device<execution_space, memory_space>; + using size_type = Impl::SYCLInternal::size_type; + + SYCLSharedUSMSpace(); + explicit SYCLSharedUSMSpace(sycl::queue queue); + + void* allocate(const std::size_t arg_alloc_size) const; + void* allocate(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; + + void deallocate(void* const arg_alloc_ptr, + const std::size_t arg_alloc_size) const; + void deallocate(const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; + + private: + template <class, class, class, class> + friend class LogicalMemorySpace; + + public: + static constexpr const char* name() { return "SYCLSharedUSM"; }; + + private: + sycl::queue m_queue; }; } // namespace Experimental @@ -102,6 +126,11 @@ static_assert(Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::SYCLDeviceUSMSpace>::assignable, ""); +static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::SYCLSharedUSMSpace, + Kokkos::Experimental::SYCLSharedUSMSpace>::assignable, + ""); + template <> struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::Experimental::SYCLDeviceUSMSpace> { @@ -110,6 +139,15 @@ struct MemorySpaceAccess<Kokkos::HostSpace, enum : bool { deepcopy = true }; }; +template <> +struct MemorySpaceAccess<Kokkos::HostSpace, + Kokkos::Experimental::SYCLSharedUSMSpace> { + // HostSpace::execution_space != SYCLSharedUSMSpace::execution_space + enum : bool { assignable = false }; + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; + template <> struct MemorySpaceAccess<Kokkos::Experimental::SYCLDeviceUSMSpace, Kokkos::HostSpace> { @@ -118,20 +156,79 @@ struct MemorySpaceAccess<Kokkos::Experimental::SYCLDeviceUSMSpace, enum : bool { deepcopy = true }; }; +template <> +struct MemorySpaceAccess<Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCLSharedUSMSpace> { + // SYCLDeviceUSMSpace::execution_space == SYCLSharedUSMSpace::execution_space + enum : bool { assignable = true }; + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; + +//---------------------------------------- +// SYCLSharedUSMSpace::execution_space == SYCL +// SYCLSharedUSMSpace accessible to both SYCL and Host + +template <> +struct MemorySpaceAccess<Kokkos::Experimental::SYCLSharedUSMSpace, + Kokkos::HostSpace> { + enum : bool { assignable = false }; + enum : bool { accessible = false }; // SYCL cannot access HostSpace + enum : bool { deepcopy = true }; +}; + +template <> +struct MemorySpaceAccess<Kokkos::Experimental::SYCLSharedUSMSpace, + Kokkos::Experimental::SYCLDeviceUSMSpace> { + // SYCLSharedUSMSpace::execution_space == SYCLDeviceUSMSpace::execution_space + // Can access SYCLSharedUSMSpace from Host but cannot access + // SYCLDeviceUSMSpace from Host + enum : bool { assignable = false }; + + // SYCLSharedUSMSpace::execution_space can access SYCLDeviceUSMSpace + enum : bool { accessible = true }; + enum : bool { deepcopy = true }; +}; + +template <> +struct MemorySpaceAccess< + Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::ScratchMemorySpace<Kokkos::Experimental::SYCL>> { + enum : bool { assignable = false }; + enum : bool { accessible = true }; + enum : bool { deepcopy = false }; +}; + +template <> +struct MemorySpaceAccess< + Kokkos::Experimental::SYCLSharedUSMSpace, + Kokkos::ScratchMemorySpace<Kokkos::Experimental::SYCL>> { + enum : bool { assignable = false }; + enum : bool { accessible = true }; + enum : bool { deepcopy = false }; +}; + } // namespace Impl namespace Impl { template <> class SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void> - : public SharedAllocationRecord<void, void> { + : public HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::SYCLDeviceUSMSpace> { private: + friend class SharedAllocationRecordCommon< + Kokkos::Experimental::SYCLDeviceUSMSpace>; + friend class HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::SYCLDeviceUSMSpace>; + using base_t = HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::SYCLDeviceUSMSpace>; using RecordBase = SharedAllocationRecord<void, void>; SharedAllocationRecord(const SharedAllocationRecord&) = delete; + SharedAllocationRecord(SharedAllocationRecord&&) = delete; SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - - static void deallocate(RecordBase*); + SharedAllocationRecord& operator=(SharedAllocationRecord&&) = delete; #ifdef KOKKOS_ENABLE_DEBUG static RecordBase s_root_record; @@ -145,32 +242,38 @@ class SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void> SharedAllocationRecord( const Kokkos::Experimental::SYCLDeviceUSMSpace& arg_space, const std::string& arg_label, const size_t arg_alloc_size, - const RecordBase::function_type arg_dealloc = &deallocate); + const RecordBase::function_type arg_dealloc = &base_t::deallocate); +}; - public: - std::string get_label() const; +template <> +class SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, void> + : public SharedAllocationRecordCommon< + Kokkos::Experimental::SYCLSharedUSMSpace> { + private: + friend class SharedAllocationRecordCommon< + Kokkos::Experimental::SYCLSharedUSMSpace>; + using base_t = + SharedAllocationRecordCommon<Kokkos::Experimental::SYCLSharedUSMSpace>; + using RecordBase = SharedAllocationRecord<void, void>; - static SharedAllocationRecord* allocate( - const Kokkos::Experimental::SYCLDeviceUSMSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size); + SharedAllocationRecord(const SharedAllocationRecord&) = delete; + SharedAllocationRecord(SharedAllocationRecord&&) = delete; + SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; + SharedAllocationRecord& operator=(SharedAllocationRecord&&) = delete; - /**\brief Allocate tracked memory in the space */ - static void* allocate_tracked( - const Kokkos::Experimental::SYCLDeviceUSMSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size); + static RecordBase s_root_record; - /**\brief Reallocate tracked memory in the space */ - static void* reallocate_tracked(void* const arg_alloc_ptr, - const size_t arg_alloc_size); + const Kokkos::Experimental::SYCLSharedUSMSpace m_space; - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void* const arg_alloc_ptr); + protected: + ~SharedAllocationRecord(); - static SharedAllocationRecord* get_record(void* arg_alloc_ptr); + SharedAllocationRecord() = default; - static void print_records(std::ostream&, - const Kokkos::Experimental::SYCLDeviceUSMSpace&, - bool detail = false); + SharedAllocationRecord( + const Kokkos::Experimental::SYCLSharedUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &base_t::deallocate); }; } // namespace Impl diff --git a/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp b/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp index f6b0a5fbeb04378d1c4d62b492b5c0e8632cc495..2eebf5365e71d2c5cf42c356951ccec9d041fe14 100644 --- a/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp @@ -70,8 +70,8 @@ class ScratchMemorySpace { private: mutable char* m_iter_L0 = nullptr; - char* m_end_L0 = nullptr; mutable char* m_iter_L1 = nullptr; + char* m_end_L0 = nullptr; char* m_end_L1 = nullptr; mutable int m_multiplier = 0; @@ -100,89 +100,46 @@ class ScratchMemorySpace { template <typename IntType> KOKKOS_INLINE_FUNCTION void* get_shmem(const IntType& size, int level = -1) const { - if (level == -1) level = m_default_level; - if (level == 0) { - void* tmp = m_iter_L0 + m_offset * align(size); - if (m_end_L0 < (m_iter_L0 += align(size) * m_multiplier)) { - m_iter_L0 -= align(size) * m_multiplier; // put it back like it was -#ifdef KOKKOS_ENABLE_DEBUG - // mfh 23 Jun 2015: printf call consumes 25 registers - // in a CUDA build, so only print in debug mode. The - // function still returns nullptr if not enough memory. - printf( - "ScratchMemorySpace<...>::get_shmem: Failed to allocate " - "%ld byte(s); remaining capacity is %ld byte(s)\n", - long(size), long(m_end_L0 - m_iter_L0)); -#endif // KOKKOS_ENABLE_DEBUG - tmp = nullptr; - } - return tmp; - } else { - void* tmp = m_iter_L1 + m_offset * align(size); - if (m_end_L1 < (m_iter_L1 += align(size) * m_multiplier)) { - m_iter_L1 -= align(size) * m_multiplier; // put it back like it was -#ifdef KOKKOS_ENABLE_DEBUG - // mfh 23 Jun 2015: printf call consumes 25 registers - // in a CUDA build, so only print in debug mode. The - // function still returns nullptr if not enough memory. - printf( - "ScratchMemorySpace<...>::get_shmem: Failed to allocate " - "%ld byte(s); remaining capacity is %ld byte(s)\n", - long(size), long(m_end_L1 - m_iter_L1)); -#endif // KOKKOS_ENABLE_DEBUG - tmp = nullptr; - } - return tmp; - } + return get_shmem_common</*aligned*/ false>(size, 1, level); } - KOKKOS_INLINE_FUNCTION - void* get_shmem_aligned(const ptrdiff_t size, const ptrdiff_t alignment, - int level = -1) const { + template <typename IntType> + KOKKOS_INLINE_FUNCTION void* get_shmem_aligned(const IntType& size, + const ptrdiff_t alignment, + int level = -1) const { + return get_shmem_common</*aligned*/ true>(size, alignment, level); + } + + private: + template <bool aligned, typename IntType> + KOKKOS_INLINE_FUNCTION void* get_shmem_common(const IntType& size, + const ptrdiff_t alignment, + int level = -1) const { if (level == -1) level = m_default_level; - if (level == 0) { - char* previous = m_iter_L0; - const ptrdiff_t missalign = size_t(m_iter_L0) % alignment; - if (missalign) m_iter_L0 += alignment - missalign; - - void* tmp = m_iter_L0 + m_offset * size; - if (m_end_L0 < (m_iter_L0 += size * m_multiplier)) { - m_iter_L0 = previous; // put it back like it was -#ifdef KOKKOS_ENABLE_DEBUG - // mfh 23 Jun 2015: printf call consumes 25 registers - // in a CUDA build, so only print in debug mode. The - // function still returns nullptr if not enough memory. - printf( - "ScratchMemorySpace<...>::get_shmem: Failed to allocate " - "%ld byte(s); remaining capacity is %ld byte(s)\n", - long(size), long(m_end_L0 - m_iter_L0)); -#endif // KOKKOS_ENABLE_DEBUG - tmp = nullptr; - } - return tmp; - } else { - char* previous = m_iter_L1; - const ptrdiff_t missalign = size_t(m_iter_L1) % alignment; - if (missalign) m_iter_L1 += alignment - missalign; - - void* tmp = m_iter_L1 + m_offset * size; - if (m_end_L1 < (m_iter_L1 += size * m_multiplier)) { - m_iter_L1 = previous; // put it back like it was + auto& m_iter = (level == 0) ? m_iter_L0 : m_iter_L1; + auto& m_end = (level == 0) ? m_end_L0 : m_end_L1; + char* previous = m_iter; + const ptrdiff_t missalign = size_t(m_iter) % alignment; + if (missalign) m_iter += alignment - missalign; + + void* tmp = m_iter + m_offset * (aligned ? size : align(size)); + if (m_end < (m_iter += (aligned ? size : align(size)) * m_multiplier)) { + m_iter = previous; // put it back like it was #ifdef KOKKOS_ENABLE_DEBUG - // mfh 23 Jun 2015: printf call consumes 25 registers - // in a CUDA build, so only print in debug mode. The - // function still returns nullptr if not enough memory. - printf( - "ScratchMemorySpace<...>::get_shmem: Failed to allocate " - "%ld byte(s); remaining capacity is %ld byte(s)\n", - long(size), long(m_end_L1 - m_iter_L1)); + // mfh 23 Jun 2015: printf call consumes 25 registers + // in a CUDA build, so only print in debug mode. The + // function still returns nullptr if not enough memory. + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "ScratchMemorySpace<...>::get_shmem: Failed to allocate " + "%ld byte(s); remaining capacity is %ld byte(s)\n", + long(size), long(m_end - m_iter)); #endif // KOKKOS_ENABLE_DEBUG - tmp = nullptr; - } - return tmp; + tmp = nullptr; } + return tmp; } + public: KOKKOS_DEFAULTED_FUNCTION ScratchMemorySpace() = default; @@ -192,9 +149,9 @@ class ScratchMemorySpace { void* ptr_L1 = nullptr, const IntType& size_L1 = 0) : m_iter_L0((char*)ptr_L0), - m_end_L0(m_iter_L0 + size_L0), m_iter_L1((char*)ptr_L1), - m_end_L1(m_iter_L1 + size_L1), + m_end_L0((char*)ptr_L0 + size_L0), + m_end_L1((char*)ptr_L1 + size_L1), m_multiplier(1), m_offset(0), m_default_level(0) {} diff --git a/packages/kokkos/core/src/Kokkos_Serial.hpp b/packages/kokkos/core/src/Kokkos_Serial.hpp index a1fccd37558c84c45a5e6a223664b66464556396..4d5bb2410bfaabf6f752acf55795c9d7ef82016d 100644 --- a/packages/kokkos/core/src/Kokkos_Serial.hpp +++ b/packages/kokkos/core/src/Kokkos_Serial.hpp @@ -197,14 +197,6 @@ struct MemorySpaceAccess<Kokkos::Serial::memory_space, enum : bool { deepcopy = false }; }; -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::Serial::memory_space, Kokkos::Serial::scratch_memory_space> { - enum : bool { value = true }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - } // namespace Impl } // namespace Kokkos @@ -474,8 +466,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, using ReducerTypeFwd = typename ReducerConditional::type; using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; @@ -729,7 +721,15 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, public: inline void execute() const { this->exec(); } - + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy&, const Functor&) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } inline ParallelFor(const FunctorType& arg_functor, const MDRangePolicy& arg_policy) : m_functor(arg_functor), @@ -751,8 +751,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, MDRangePolicy, FunctorType>; @@ -781,6 +781,15 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, } public: + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy&, const Functor&) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } inline void execute() const { const size_t pool_reduce_size = Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); @@ -923,8 +932,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; diff --git a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp index b2b2cb4473a8378e0ac3d5ee952a7bd2088ea5dd..743273670c9b5fa77f6d590596eb27fc7204396a 100644 --- a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -236,8 +236,8 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { Kokkos::Impl::SharedAllocationRecord<memory_space, typename queue_type::Destroy>; - record_type* record = - record_type::allocate(memory_space(), "TaskQueue", sizeof(queue_type)); + record_type* record = record_type::allocate( + memory_space(), "Kokkos::TaskQueue", sizeof(queue_type)); m_queue = new (record->data()) queue_type(arg_memory_pool); diff --git a/packages/kokkos/core/src/Kokkos_Threads.hpp b/packages/kokkos/core/src/Kokkos_Threads.hpp index 1374ee7106f8e3cbbe2e2986f1b4da03d2cfb816..e827c2a2a1abd46999360c1eef57eb85428436aa 100644 --- a/packages/kokkos/core/src/Kokkos_Threads.hpp +++ b/packages/kokkos/core/src/Kokkos_Threads.hpp @@ -211,14 +211,6 @@ struct MemorySpaceAccess<Kokkos::Threads::memory_space, enum : bool { deepcopy = false }; }; -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::Threads::memory_space, Kokkos::Threads::scratch_memory_space> { - enum : bool { value = true }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_Tuners.hpp b/packages/kokkos/core/src/Kokkos_Tuners.hpp index 83492babcf927ed37137359ba1231acd30ce70ee..f7cc34cc114d29cbe5612bf4350fe01a498282c3 100644 --- a/packages/kokkos/core/src/Kokkos_Tuners.hpp +++ b/packages/kokkos/core/src/Kokkos_Tuners.hpp @@ -48,6 +48,7 @@ #include <Kokkos_Macros.hpp> #include <Kokkos_Core_fwd.hpp> #include <Kokkos_ExecPolicy.hpp> +#include <KokkosExp_MDRangePolicy.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> #include <array> @@ -182,14 +183,28 @@ struct get_space_dimensionality; // The dimensionality of a vector is 1 template <class T> struct get_space_dimensionality<std::vector<T>> { - static constexpr const int value = 1; + static constexpr int value = 1; }; // The dimensionality of a map is 1 (the map) plus the dimensionality // of the map's value type template <class K, class V> struct get_space_dimensionality<std::map<K, V>> { - static constexpr const int value = 1 + get_space_dimensionality<V>::value; + static constexpr int value = 1 + get_space_dimensionality<V>::value; +}; + +template <class T, int N> +struct n_dimensional_sparse_structure; + +template <class T> +struct n_dimensional_sparse_structure<T, 1> { + using type = std::vector<T>; +}; + +template <class T, int N> +struct n_dimensional_sparse_structure { + using type = + std::map<T, typename n_dimensional_sparse_structure<T, N - 1>::type>; }; /** @@ -286,13 +301,12 @@ template <template <class...> class Container, size_t MaxDimensionSize = 100, class MultidimensionalSparseTuningProblem { public: using ProblemSpaceInput = Container<TemplateArguments...>; - static constexpr const int space_dimensionality = + static constexpr int space_dimensionality = Impl::get_space_dimensionality<ProblemSpaceInput>::value; - static constexpr const size_t max_space_dimension_size = MaxDimensionSize; - static constexpr const double tuning_min = 0.0; - static constexpr const double tuning_max = 0.999; - static constexpr const double tuning_step = - tuning_max / max_space_dimension_size; + static constexpr size_t max_space_dimension_size = MaxDimensionSize; + static constexpr double tuning_min = 0.0; + static constexpr double tuning_max = 0.999; + static constexpr double tuning_step = tuning_max / max_space_dimension_size; using StoredProblemSpace = typename Impl::MapTypeConverter<ProblemSpaceInput>::type; @@ -470,6 +484,72 @@ class TeamSizeTuner { private: }; +namespace Impl { + +template <typename T> +void fill_tile(std::vector<T>& cont, int tile_size) { + for (int x = 1; x < tile_size; x *= 2) { + cont.push_back(x); + } +} +template <typename T, typename Mapped> +void fill_tile(std::map<T, Mapped>& cont, int tile_size) { + for (int x = 1; x < tile_size; x *= 2) { + fill_tile(cont[x], tile_size / x); + } +} +} // namespace Impl + +template <int MDRangeRank> +struct MDRangeTuner { + private: + static constexpr int rank = MDRangeRank; + static constexpr int max_slices = 15; + using SpaceDescription = + typename Impl::n_dimensional_sparse_structure<int, rank>::type; + using TunerType = + decltype(make_multidimensional_sparse_tuning_problem<max_slices>( + std::declval<SpaceDescription>(), + std::declval<std::vector<std::string>>())); + TunerType tuner; + + public: + MDRangeTuner() = default; + template <typename Functor, typename TagType, typename Calculator, + typename... Properties> + MDRangeTuner(const std::string& name, + const Kokkos::MDRangePolicy<Properties...>& policy, + const Functor& functor, const TagType& tag, Calculator calc) { + SpaceDescription desc; + int max_tile_size = + calc.get_mdrange_max_tile_size_product(policy, functor, tag); + Impl::fill_tile(desc, max_tile_size); + std::vector<std::string> feature_names; + for (int x = 0; x < rank; ++x) { + feature_names.push_back(name + "_tile_size_" + std::to_string(x)); + } + tuner = make_multidimensional_sparse_tuning_problem<max_slices>( + desc, feature_names); + } + template <typename Policy, typename Tuple, size_t... Indices> + void set_policy_tile(Policy& policy, const Tuple& tuple, + const std::index_sequence<Indices...>&) { + policy.impl_change_tile_size({std::get<Indices>(tuple)...}); + } + template <typename... Properties> + void tune(Kokkos::MDRangePolicy<Properties...>& policy) { + if (Kokkos::Tools::Experimental::have_tuning_tool()) { + auto configuration = tuner.begin(); + set_policy_tile(policy, configuration, std::make_index_sequence<rank>{}); + } + } + void end() { + if (Kokkos::Tools::Experimental::have_tuning_tool()) { + tuner.end(); + } + } +}; + } // namespace Experimental } // namespace Tools } // namespace Kokkos diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp index 8fffe20f5580964c565ba6bd6a6f71bb484e265e..1abe0a48df5eab32f01ef703e6d39921eb9c70c3 100644 --- a/packages/kokkos/core/src/Kokkos_View.hpp +++ b/packages/kokkos/core/src/Kokkos_View.hpp @@ -789,36 +789,22 @@ class View : public ViewTraits<DataType, Properties...> { std::is_same<typename traits::specialize, void>::value && (is_layout_left || is_layout_right || is_layout_stride); - template <class Space, bool = Kokkos::Impl::MemorySpaceAccess< - Space, typename traits::memory_space>::accessible> - struct verify_space { - KOKKOS_FORCEINLINE_FUNCTION static void check() {} - }; - - template <class Space> - struct verify_space<Space, false> { - KOKKOS_FORCEINLINE_FUNCTION static void check() { - Kokkos::abort( - "Kokkos::View ERROR: attempt to access inaccessible memory space"); - }; - }; - #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) #define KOKKOS_IMPL_SINK(ARG) ARG -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ - View::template verify_space< \ - Kokkos::Impl::ActiveExecutionMemorySpace>::check(); \ +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ + Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \ + typename traits::memory_space>::check(); \ Kokkos::Impl::view_verify_operator_bounds<typename traits::memory_space> ARG; #else #define KOKKOS_IMPL_SINK(ARG) -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ - View::template verify_space< \ - Kokkos::Impl::ActiveExecutionMemorySpace>::check(); +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ + Kokkos::Impl::verify_space<Kokkos::Impl::ActiveExecutionMemorySpace, \ + typename traits::memory_space>::check(); #endif @@ -1618,7 +1604,17 @@ class View : public ViewTraits<DataType, Properties...> { : View(arg_prop, typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#ifdef KOKKOS_ENABLE_OPENMPTARGET + KOKKOS_IMPL_IF_ON_HOST + Impl::runtime_check_rank_host( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label()); + else Impl::runtime_check_rank_device( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7); +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) Impl::runtime_check_rank_host( traits::rank_dynamic, std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, @@ -1648,7 +1644,17 @@ class View : public ViewTraits<DataType, Properties...> { : View(arg_prop, typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#ifdef KOKKOS_ENABLE_OPENMPTARGET + KOKKOS_IMPL_IF_ON_HOST + Impl::runtime_check_rank_host( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label()); + else Impl::runtime_check_rank_device( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7); +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) Impl::runtime_check_rank_host( traits::rank_dynamic, std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, @@ -1692,7 +1698,17 @@ class View : public ViewTraits<DataType, Properties...> { "Layout is not extent constructible. A layout object should " "be passed too.\n"); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#ifdef KOKKOS_ENABLE_OPENMPTARGET + KOKKOS_IMPL_IF_ON_HOST + Impl::runtime_check_rank_host( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label()); + else Impl::runtime_check_rank_device( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7); +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) Impl::runtime_check_rank_host( traits::rank_dynamic, std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, @@ -1758,7 +1774,17 @@ class View : public ViewTraits<DataType, Properties...> { : View(Impl::ViewCtorProp<pointer_type>(arg_ptr), typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#ifdef KOKKOS_ENABLE_OPENMPTARGET + KOKKOS_IMPL_IF_ON_HOST + Impl::runtime_check_rank_host( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label()); + else Impl::runtime_check_rank_device( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7); +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) Impl::runtime_check_rank_host( traits::rank_dynamic, std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, @@ -1838,7 +1864,17 @@ class View : public ViewTraits<DataType, Properties...> { sizeof(typename traits::value_type)))), typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#ifdef KOKKOS_ENABLE_OPENMPTARGET + KOKKOS_IMPL_IF_ON_HOST + Impl::runtime_check_rank_host( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label()); + else Impl::runtime_check_rank_device( + traits::rank_dynamic, + std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, + arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7); +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) Impl::runtime_check_rank_host( traits::rank_dynamic, std::is_same<typename traits::specialize, void>::value, arg_N0, arg_N1, diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp index eb15005cfb2d46a490c8f74d7b66f8961b7aff27..82f049ed136119c28b4add24f1460831fec55b16 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp @@ -49,7 +49,7 @@ #if defined(KOKKOS_ENABLE_OPENMP) #if !defined(_OPENMP) && !defined(__CUDA_ARCH__) && \ - !defined(__HIP_DEVICE_COMPILE__) + !defined(__HIP_DEVICE_COMPILE__) && !defined(__SYCL_DEVICE_ONLY__) #error \ "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!" #endif diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index 176f452f5c33403fa802556ea8684a8db94248c9..2fc522780a495971a1d6455e19260bad0b422207 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -228,6 +228,15 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, m_functor(arg_functor), m_mdr_policy(arg_policy), m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy&, const Functor&) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } }; } // namespace Impl @@ -257,8 +266,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; // Static Assert WorkTag void if ReducerType not InvalidType @@ -430,8 +439,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; @@ -567,6 +576,15 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ } + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy&, const Functor&) { + /** + * 1024 here is just our guess for a reasonable max tile size, + * it isn't a hardware constraint. If people see a use for larger + * tile size products, we're happy to change this. + */ + return 1024; + } }; } // namespace Impl @@ -963,8 +981,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, using ReducerTypeFwd = typename ReducerConditional::type; using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; using ValueInit = Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; using ValueJoin = Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp index efa06194782a4bc2e144dbab9175fff3980bf0cf..6fbb4245b8fb8b1e354452727ce9862c85a147c8 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp @@ -42,9 +42,10 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> + #include <algorithm> #include <omp.h> -#include <Kokkos_Macros.hpp> /*--------------------------------------------------------------------------*/ @@ -56,6 +57,7 @@ #include <sstream> #include <cstring> +#include <Kokkos_OpenMPTarget.hpp> #include <Kokkos_OpenMPTargetSpace.hpp> #include <impl/Kokkos_Error.hpp> #include <Kokkos_Atomic.hpp> @@ -111,12 +113,6 @@ std::string SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, return std::string("OpenMPTargetAllocation"); } -void SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, - void>::deallocate(SharedAllocationRecord<void, void> - *arg_rec) { - delete static_cast<SharedAllocationRecord *>(arg_rec); -} - SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: SharedAllocationRecord( const Kokkos::Experimental::OpenMPTargetSpace &arg_space, @@ -124,7 +120,7 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: const SharedAllocationRecord<void, void>::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord<void, void>( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::s_root_record, @@ -135,12 +131,8 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: m_space(arg_space) { SharedAllocationHeader header; - header.m_record = static_cast<SharedAllocationRecord<void, void> *>(this); + this->base_t::_fill_host_accessible_header_info(header, arg_label); - strncpy(header.m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); - // Set last element zero, in case c_str is too long - header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; // TODO DeepCopy // DeepCopy Kokkos::Impl::DeepCopy<Experimental::OpenMPTargetSpace, HostSpace>( @@ -149,30 +141,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: //---------------------------------------------------------------------------- -void *SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: - allocate_tracked(const Kokkos::Experimental::OpenMPTargetSpace &arg_space, - const std::string &arg_alloc_label, - const size_t arg_alloc_size) { - if (!arg_alloc_size) return nullptr; - - SharedAllocationRecord *const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - - RecordBase::increment(r); - - return r->data(); -} - -void SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, - void>::deallocate_tracked(void *const - arg_alloc_ptr) { - if (arg_alloc_ptr != nullptr) { - SharedAllocationRecord *const r = get_record(arg_alloc_ptr); - - RecordBase::decrement(r); - } -} - void *SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: reallocate_tracked(void *const arg_alloc_ptr, const size_t arg_alloc_size) { SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr); @@ -190,48 +158,6 @@ void *SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: return r_new->data(); } -SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void> - *SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, - void>::get_record(void *alloc_ptr) { - using Header = SharedAllocationHeader; - using RecordHost = - SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>; - - if (alloc_ptr) { - Header head; - const Header *const head_ompt = Header::get_header(alloc_ptr); - - Kokkos::Impl::DeepCopy<HostSpace, Experimental::OpenMPTargetSpace>( - &head, head_ompt, sizeof(SharedAllocationHeader)); - - RecordHost *record = static_cast<RecordHost *>(head.m_record); - if (record->m_alloc_ptr == head_ompt) { - return record; - } - } - Kokkos::Impl::throw_runtime_exception(std::string( - "Kokkos::Experimental::Impl::SharedAllocationRecord< " - "Kokkos::Experimental::OpenMPTargetSpace , void >::get_record ERROR")); - return nullptr; -} - -// Iterate records to print orphaned memory ... -void SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>:: - print_records(std::ostream &s, - const Kokkos::Experimental::OpenMPTargetSpace &, - bool detail) { -#ifdef KOKKOS_ENABLE_DEBUG - SharedAllocationRecord<void, void>::print_host_accessible_records( - s, "OpenMPTargetSpace", &s_root_record, detail); -#else - (void)s; - (void)detail; - throw_runtime_exception( - "SharedAllocationRecord<OpenMPTargetSpace>::print_records" - " only works with KOKKOS_ENABLE_DEBUG enabled"); -#endif -} - } // namespace Impl } // namespace Kokkos @@ -303,3 +229,25 @@ HOST_SPACE_ATOMIC_XOR_MASK] , 0); } }*/ + +//============================================================================== +// <editor-fold desc="Explicit instantiations of CRTP Base classes"> {{{1 + +#include <impl/Kokkos_SharedAlloc_timpl.hpp> + +namespace Kokkos { +namespace Impl { + +// To avoid additional compilation cost for something that's (mostly?) not +// performance sensitive, we explicity instantiate these CRTP base classes here, +// where we have access to the associated *_timpl.hpp header files. +template class HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::OpenMPTargetSpace>; +template class SharedAllocationRecordCommon< + Kokkos::Experimental::OpenMPTargetSpace>; + +} // end namespace Impl +} // end namespace Kokkos + +// </editor-fold> end Explicit instantiations of CRTP Base classes }}}1 +//============================================================================== diff --git a/packages/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Error.hpp similarity index 73% rename from packages/kokkos/algorithms/unit_tests/TestOpenMP.cpp rename to packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Error.hpp index 5ded3ce39065dc5fb7510973de40577703429f8d..1ca30631af920badd089559874a7d24a7cfb63f7 100644 --- a/packages/kokkos/algorithms/unit_tests/TestOpenMP.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Error.hpp @@ -42,22 +42,32 @@ //@HEADER */ -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_ENABLE_OPENMP +#ifndef KOKKOS_OPENMPTARGET_ERROR_HPP +#define KOKKOS_OPENMPTARGET_ERROR_HPP -#include <gtest/gtest.h> -#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> +#include <sstream> -//---------------------------------------------------------------------------- -#include <TestRandom.hpp> -#include <TestSort.hpp> -#include <iomanip> +namespace Kokkos { +namespace Impl { -namespace Test { +inline void ompt_internal_safe_call(int e, const char* name, + const char* file = nullptr, + const int line = 0) { + if (e != 0) { + std::ostringstream out; + out << name << " return value of " << e << " indicates failure"; + if (file) { + out << " " << file << ":" << line; + } + throw_runtime_exception(out.str()); + } +} -TEST(openmp, SortIssue1160) { Impl::test_issue_1160_sort<Kokkos::OpenMP>(); } +#define OMPT_SAFE_CALL(call) \ + Kokkos::Impl::ompt_internal_safe_call(call, #call, __FILE__, __LINE__) + +} // namespace Impl +} // namespace Kokkos -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} #endif diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp index 74be6a37d3d0072e38c2f1148894d4bfdde6133a..f13875b440b63b729a64615a20da0f597a85cf6e 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp @@ -92,8 +92,11 @@ void OpenMPTargetExec::verify_initialized(const char* const label) { } } -void* OpenMPTargetExec::m_scratch_ptr = nullptr; -int64_t OpenMPTargetExec::m_scratch_size = 0; +void* OpenMPTargetExec::m_scratch_ptr = nullptr; +int64_t OpenMPTargetExec::m_scratch_size = 0; +int* OpenMPTargetExec::m_lock_array = nullptr; +int64_t OpenMPTargetExec::m_lock_size = 0; +uint32_t* OpenMPTargetExec::m_uniquetoken_ptr = nullptr; void OpenMPTargetExec::clear_scratch() { Kokkos::Experimental::OpenMPTargetSpace space; @@ -102,18 +105,28 @@ void OpenMPTargetExec::clear_scratch() { m_scratch_size = 0; } +void OpenMPTargetExec::clear_lock_array() { + if (m_lock_array != nullptr) { + Kokkos::Experimental::OpenMPTargetSpace space; + space.deallocate(m_lock_array, m_lock_size); + m_lock_array = nullptr; + m_lock_size = 0; + } +} + void* OpenMPTargetExec::get_scratch_ptr() { return m_scratch_ptr; } -void OpenMPTargetExec::resize_scratch(int64_t reduce_bytes, - int64_t team_reduce_bytes, - int64_t team_shared_bytes, - int64_t thread_local_bytes) { +void OpenMPTargetExec::resize_scratch(int64_t team_size, int64_t shmem_size_L0, + int64_t shmem_size_L1) { Kokkos::Experimental::OpenMPTargetSpace space; + const int64_t shmem_size = + shmem_size_L0 + shmem_size_L1; // L0 + L1 scratch memory per team. + const int64_t padding = shmem_size * 10 / 100; // Padding per team. + // Total amount of scratch memory allocated is depenedent + // on the maximum number of in-flight teams possible. int64_t total_size = - MAX_ACTIVE_TEAMS * reduce_bytes + // Inter Team Reduction - MAX_ACTIVE_TEAMS * team_reduce_bytes + // Intra Team Reduction - MAX_ACTIVE_TEAMS * team_shared_bytes + // Team Local Scratch - MAX_ACTIVE_THREADS * thread_local_bytes; // Thread Private Scratch + (shmem_size + OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE + padding) * + (MAX_ACTIVE_THREADS / team_size); if (total_size > m_scratch_size) { space.deallocate(m_scratch_ptr, m_scratch_size); @@ -121,6 +134,35 @@ void OpenMPTargetExec::resize_scratch(int64_t reduce_bytes, m_scratch_ptr = space.allocate(total_size); } } + +int* OpenMPTargetExec::get_lock_array(int num_teams) { + Kokkos::Experimental::OpenMPTargetSpace space; + int max_active_league_size = MAX_ACTIVE_THREADS / 32; + int lock_array_elem = + (num_teams > max_active_league_size) ? num_teams : max_active_league_size; + if (m_lock_size < (lock_array_elem * sizeof(int))) { + space.deallocate(m_lock_array, m_lock_size); + m_lock_size = lock_array_elem * sizeof(int); + m_lock_array = static_cast<int*>(space.allocate(m_lock_size)); + + // FIXME_OPENMPTARGET - Creating a target region here to initialize the + // lock_array with 0's fails. Hence creating an equivalent host array to + // achieve the same. Value of host array are then copied to the lock_array. + int* h_lock_array = static_cast<int*>( + omp_target_alloc(m_lock_size, omp_get_initial_device())); + + for (int i = 0; i < lock_array_elem; ++i) h_lock_array[i] = 0; + + OMPT_SAFE_CALL(omp_target_memcpy(m_lock_array, h_lock_array, m_lock_size, 0, + 0, omp_get_default_device(), + omp_get_initial_device())); + + omp_target_free(h_lock_array, omp_get_initial_device()); + } + + return m_lock_array; +} + } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp index 5bf1cdd9e119f2a76d02df7f11ddba81f0194a84..0b65e0d4a4b2270fdf577b4fffc1a10835467a47 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp @@ -51,6 +51,11 @@ #include <Kokkos_Atomic.hpp> #include "Kokkos_OpenMPTarget_Abort.hpp" +// FIXME_OPENMPTARGET - Using this macro to implement a workaround for +// hierarchical reducers. It avoids hitting the code path which we wanted to +// write but doesn't work. undef'ed at the end. +#define KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -495,7 +500,11 @@ namespace Impl { class OpenMPTargetExec { public: - enum { MAX_ACTIVE_THREADS = 256 * 8 * 56 * 4 }; + // FIXME_OPENMPTARGET - Currently the maximum number of + // teams possible is calculated based on NVIDIA's Volta GPU. In + // future this value should be based on the chosen architecture for the + // OpenMPTarget backend. + enum { MAX_ACTIVE_THREADS = 2080 * 80 }; enum { MAX_ACTIVE_TEAMS = MAX_ACTIVE_THREADS / 32 }; private: @@ -505,14 +514,19 @@ class OpenMPTargetExec { static void verify_is_process(const char* const); static void verify_initialized(const char* const); + static int* get_lock_array(int num_teams); static void* get_scratch_ptr(); static void clear_scratch(); - static void resize_scratch(int64_t reduce_bytes, int64_t team_reduce_bytes, + static void clear_lock_array(); + static void resize_scratch(int64_t team_reduce_bytes, int64_t team_shared_bytes, int64_t thread_local_bytes); static void* m_scratch_ptr; static int64_t m_scratch_size; + static int* m_lock_array; + static int64_t m_lock_size; + static uint32_t* m_uniquetoken_ptr; }; } // namespace Impl @@ -542,6 +556,7 @@ class OpenMPTargetExecTeamMember { int m_league_size; int m_vector_length; int m_vector_lane; + int m_shmem_block_index; void* m_glb_scratch; void* m_reduce_scratch; @@ -583,13 +598,14 @@ class OpenMPTargetExecTeamMember { } KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space& team_scratch(int) const { - return m_team_shared.set_team_thread_mode(0, 1, 0); + const execution_space::scratch_memory_space& team_scratch(int level) const { + return m_team_shared.set_team_thread_mode(level, 1, + m_team_scratch_size[level]); } KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space& thread_scratch(int) const { - return m_team_shared.set_team_thread_mode(0, team_size(), team_rank()); + const execution_space::scratch_memory_space& thread_scratch(int level) const { + return m_team_shared.set_team_thread_mode(level, team_size(), team_rank()); } KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank; } @@ -605,23 +621,25 @@ class OpenMPTargetExecTeamMember { } template <class ValueType> - KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& /*value*/, - const int& /*thread_id*/) const { - // FIXME_OPENMPTARGET - /*#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { } - #else - // Make sure there is enough scratch space: - using type = typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE - , ValueType , void >::type; - - type * const local_value = ((type*) m_exec.scratch_thread()); - if(team_rank() == thread_id) - *local_value = value; - memory_fence(); - team_barrier(); - value = *local_value; - #endif*/ + KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& value, + int thread_id) const { + // Make sure there is enough scratch space: + using type = + typename std::conditional<(sizeof(ValueType) < TEAM_REDUCE_SIZE), + ValueType, void>::type; + type* team_scratch = reinterpret_cast<type*>( + ((char*)(m_glb_scratch) + TEAM_REDUCE_SIZE * omp_get_team_num())); +#pragma omp barrier + if (team_rank() == thread_id) *team_scratch = value; +#pragma omp barrier + value = *team_scratch; + } + + template <class Closure, class ValueType> + KOKKOS_INLINE_FUNCTION void team_broadcast(const Closure& f, ValueType& value, + const int& thread_id) const { + f(value); + team_broadcast(value, thread_id); } template <class ValueType, class JoinOp> @@ -633,8 +651,8 @@ class OpenMPTargetExecTeamMember { const JoinLambdaAdapter<value_type, JoinOp> op(op_in); // Make sure there is enough scratch space: - using type = typename if_c<sizeof(value_type) < TEAM_REDUCE_SIZE, - value_type, void>::type; + using type = std::conditional_t<(sizeof(value_type) < TEAM_REDUCE_SIZE), + value_type, void>; const int n_values = TEAM_REDUCE_SIZE / sizeof(value_type); type* team_scratch = @@ -674,7 +692,7 @@ class OpenMPTargetExecTeamMember { // FIXME_OPENMPTARGET /* // Make sure there is enough scratch space: using type = - typename if_c<sizeof(ArgType) < TEAM_REDUCE_SIZE, ArgType, void>::type; + std::conditional_t<(sizeof(ArgType) < TEAM_REDUCE_SIZE), ArgType, void>; volatile type * const work_value = ((type*) m_exec.scratch_thread()); @@ -733,26 +751,46 @@ class OpenMPTargetExecTeamMember { using space = execution_space::scratch_memory_space; public: + // FIXME_OPENMPTARGET - 512(16*32) bytes at the begining of the scratch space + // for each league is saved for reduction. It should actually be based on the + // ValueType of the reduction variable. inline OpenMPTargetExecTeamMember( const int league_rank, const int league_size, const int team_size, const int vector_length // const TeamPolicyInternal< OpenMPTarget, // Properties ...> & team , - void* const glb_scratch, const int shmem_size_L1, const int shmem_size_L2) - : m_team_shared(nullptr, 0), - m_team_scratch_size{shmem_size_L1, shmem_size_L2}, + void* const glb_scratch, const int shmem_block_index, + const int shmem_size_L0, const int shmem_size_L1) + : m_team_scratch_size{shmem_size_L0, shmem_size_L1}, m_team_rank(0), m_team_size(team_size), m_league_rank(league_rank), m_league_size(league_size), m_vector_length(vector_length), + m_shmem_block_index(shmem_block_index), m_glb_scratch(glb_scratch) { - const int omp_tid = omp_get_thread_num(); - const int omp_team_num = omp_get_team_num(); - m_reduce_scratch = (char*)glb_scratch + omp_team_num * TEAM_REDUCE_SIZE; - m_league_rank = league_rank; - m_team_rank = omp_tid; - m_vector_lane = 0; + const int omp_tid = omp_get_thread_num(); + m_team_shared = scratch_memory_space( + ((char*)glb_scratch + + m_shmem_block_index * + (shmem_size_L0 + shmem_size_L1 + + ((shmem_size_L0 + shmem_size_L1) * 10 / 100) + TEAM_REDUCE_SIZE)), + shmem_size_L0, + ((char*)glb_scratch + + m_shmem_block_index * (shmem_size_L0 + shmem_size_L1 + + ((shmem_size_L0 + shmem_size_L1) * 10 / 100) + + TEAM_REDUCE_SIZE)) + + shmem_size_L0 + ((shmem_size_L0 + shmem_size_L1) * 10 / 100) + + TEAM_REDUCE_SIZE, + shmem_size_L1); + m_reduce_scratch = + (char*)glb_scratch + + shmem_block_index * + (shmem_size_L0 + shmem_size_L1 + + ((shmem_size_L0 + shmem_size_L1) * 10 / 100) + TEAM_REDUCE_SIZE); + m_league_rank = league_rank; + m_team_rank = omp_tid; + m_vector_lane = 0; } static inline int team_reduce_size() { return TEAM_REDUCE_SIZE; } @@ -1047,13 +1085,16 @@ TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, iType, Impl::OpenMPTargetExecTeamMember>(thread, count); } -template <typename iType> +template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> + typename std::common_type<iType1, iType2>::type, + Impl::OpenMPTargetExecTeamMember> TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, - const iType& begin, const iType& end) { + const iType1& begin, const iType2& end) { + using iType = typename std::common_type<iType1, iType2>::type; return Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, begin, end); + iType, Impl::OpenMPTargetExecTeamMember>(thread, iType(begin), + iType(end)); } template <typename iType> @@ -1065,13 +1106,16 @@ ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType, Impl::OpenMPTargetExecTeamMember>(thread, count); } -template <typename iType> +template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> + typename std::common_type<iType1, iType2>::type, + Impl::OpenMPTargetExecTeamMember> ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, - const iType& arg_begin, const iType& arg_end) { + const iType1& arg_begin, const iType2& arg_end) { + using iType = typename std::common_type<iType1, iType2>::type; return Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, arg_begin, arg_end); + iType, Impl::OpenMPTargetExecTeamMember>(thread, iType(arg_begin), + iType(arg_end)); } template <typename iType> @@ -1083,13 +1127,16 @@ TeamVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType, Impl::OpenMPTargetExecTeamMember>(thread, count); } -template <typename iType> +template <typename iType1, typename iType2> KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> + typename std::common_type<iType1, iType2>::type, + Impl::OpenMPTargetExecTeamMember> TeamVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, - const iType& arg_begin, const iType& arg_end) { + const iType1& arg_begin, const iType2& arg_end) { + using iType = typename std::common_type<iType1, iType2>::type; return Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, arg_begin, arg_end); + iType, Impl::OpenMPTargetExecTeamMember>(thread, iType(arg_begin), + iType(arg_end)); } KOKKOS_INLINE_FUNCTION @@ -1127,26 +1174,143 @@ KOKKOS_INLINE_FUNCTION void parallel_for( * The range i=0..N-1 is mapped to all threads of the the calling thread team * and a summation of val is performed and put into result. */ + template <typename iType, class Lambda, typename ValueType> -KOKKOS_INLINE_FUNCTION void parallel_reduce( - const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, - const Lambda& lambda, ValueType& result) { - ValueType* tmp_scratch = +KOKKOS_INLINE_FUNCTION + std::enable_if_t<!Kokkos::is_reducer_type<ValueType>::value> + parallel_reduce( + const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ValueType& result) { + // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of + // elements in the array <= 32. For reduction we allocate, 16 bytes per + // element in the scratch space, hence, 16*32 = 512. + static_assert(sizeof(ValueType) <= + Impl::OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE); + + ValueType* TeamThread_scratch = static_cast<ValueType*>(loop_boundaries.team.impl_reduce_scratch()); + #pragma omp barrier - tmp_scratch[0] = ValueType(); + TeamThread_scratch[0] = ValueType(); #pragma omp barrier -#pragma omp for reduction(+ : tmp_scratch[:1]) schedule(static, 1) + if constexpr (std::is_arithmetic<ValueType>::value) { +#pragma omp for reduction(+ : TeamThread_scratch[:1]) + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + ValueType tmp = ValueType(); + lambda(i, tmp); + TeamThread_scratch[0] += tmp; + } + } else { +#pragma omp declare reduction(custom:ValueType : omp_out += omp_in) + +#pragma omp for reduction(custom : TeamThread_scratch[:1]) + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + ValueType tmp = ValueType(); + lambda(i, tmp); + TeamThread_scratch[0] += tmp; + } + } + + result = TeamThread_scratch[0]; +} + +#if !defined(KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND) +// For some reason the actual version we wanted to write doesn't work +// and crashes. We should try this with every new compiler +// This is the variant we actually wanted to write +template <typename iType, class Lambda, typename ReducerType> +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> + parallel_reduce( + const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType result) { + using ValueType = typename ReducerType::value_type; + +#pragma omp declare reduction( \ + custominner:ValueType \ + : Impl::OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \ + initializer( \ + Impl::OpenMPTargetReducerWrapper <ReducerType>::init(omp_priv)) + + // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of + // elements in the array <= 32. For reduction we allocate, 16 bytes per + // element in the scratch space, hence, 16*32 = 512. + static_assert(sizeof(ValueType) <= + Impl::OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE); + + ValueType* TeamThread_scratch = + static_cast<ValueType*>(loop_boundaries.team.impl_reduce_scratch()); + +#pragma omp barrier + // These three lines all cause crash + Impl::OpenMPTargetReducerWrapper<ReducerType>::init(TeamThread_scratch[0]); +// result.init(TeamThread_scratch[0]); +// Impl::OpenMPTargetReducerWrapper<ReducerType> red; +// red.init(TeamThread_scratch[0]); +#pragma omp barrier + +#pragma omp for reduction(custominner : TeamThread_scratch[:1]) for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { - ValueType tmp = ValueType(); + ValueType tmp; + result.init(tmp); lambda(i, tmp); - tmp_scratch[0] += tmp; + // This line causes a crash + Impl::OpenMPTargetReducerWrapper<ReducerType>::join(TeamThread_scratch[0], + tmp); + } + result.reference() = TeamThread_scratch[0]; +} +#else +template <typename iType, class Lambda, typename ReducerType> +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> + parallel_reduce( + const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType result) { + using ValueType = typename ReducerType::value_type; + + // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of + // elements in the array <= 32. For reduction we allocate, 16 bytes per + // element in the scratch space, hence, 16*32 = 512. + static_assert(sizeof(ValueType) <= + Impl::OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE); + + ValueType* TeamThread_scratch = + static_cast<ValueType*>(loop_boundaries.team.impl_reduce_scratch()); + +#pragma omp declare reduction( \ + omp_red_teamthread_reducer:ValueType \ + : Impl::OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \ + initializer( \ + Impl::OpenMPTargetReducerWrapper <ReducerType>::init(omp_priv)) + +#pragma omp barrier + ValueType tmp; + result.init(tmp); + TeamThread_scratch[0] = tmp; +#pragma omp barrier + + iType team_size = iType(omp_get_num_threads()); +#pragma omp for reduction(omp_red_teamthread_reducer \ + : TeamThread_scratch[:1]) schedule(static, 1) + for (iType t = 0; t < team_size; t++) { + ValueType tmp2; + result.init(tmp2); + + for (iType i = loop_boundaries.start + t; i < loop_boundaries.end; + i += team_size) { + lambda(i, tmp2); + } + TeamThread_scratch[0] = tmp2; } - result = tmp_scratch[0]; + result.reference() = TeamThread_scratch[0]; } +#endif // KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND /** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, * ValueType & val) for each i=0..N-1. @@ -1163,16 +1327,38 @@ KOKKOS_INLINE_FUNCTION void parallel_reduce( const Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, const Lambda& lambda, const JoinType& join, ValueType& init_result) { - ValueType result = init_result; + ValueType* TeamThread_scratch = + static_cast<ValueType*>(loop_boundaries.team.impl_reduce_scratch()); - for (iType i = loop_boundaries.start; i < loop_boundaries.end; - i += loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i, tmp); - join(result, tmp); + // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of + // elements in the array <= 32. For reduction we allocate, 16 bytes per + // element in the scratch space, hence, 16*32 = 512. + static_assert(sizeof(ValueType) <= + Impl::OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE); + +#pragma omp barrier + TeamThread_scratch[0] = init_result; +#pragma omp barrier + + if constexpr (std::is_arithmetic<ValueType>::value) { +#pragma omp for reduction(+ : TeamThread_scratch[:1]) + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + ValueType tmp = ValueType(); + lambda(i, tmp); + TeamThread_scratch[0] += tmp; + } + } else { +#pragma omp declare reduction(custom:ValueType : omp_out += omp_in) + +#pragma omp for reduction(custom : TeamThread_scratch[:1]) + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + ValueType tmp = ValueType(); + lambda(i, tmp); + join(TeamThread_scratch[0], tmp); + } } - // init_result = loop_boundaries.thread.team_reduce(result,join); + init_result = TeamThread_scratch[0]; } // This is largely the same code as in HIP and CUDA except for the member name @@ -1216,6 +1402,7 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( } } // namespace Kokkos +#undef KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND namespace Kokkos { /** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each @@ -1244,15 +1431,52 @@ KOKKOS_INLINE_FUNCTION void parallel_reduce( iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, const Lambda& lambda, ValueType& result) { ValueType vector_reduce = ValueType(); + + if constexpr (std::is_arithmetic<ValueType>::value) { #pragma omp simd reduction(+ : vector_reduce) - for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { - ValueType tmp = ValueType(); - lambda(i, tmp); - vector_reduce += tmp; + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + ValueType tmp = ValueType(); + lambda(i, tmp); + vector_reduce += tmp; + } + } else { +#pragma omp declare reduction(custom:ValueType : omp_out += omp_in) + +#pragma omp simd reduction(custom : vector_reduce) + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + lambda(i, vector_reduce); + } } + result = vector_reduce; } +template <typename iType, class Lambda, typename ReducerType> +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> + parallel_reduce( + const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType const& result) { + using ValueType = typename ReducerType::value_type; + +#pragma omp declare reduction( \ + custom:ValueType \ + : Impl::OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \ + initializer( \ + Impl::OpenMPTargetReducerWrapper <ReducerType>::init(omp_priv)) + + ValueType vector_reduce; + Impl::OpenMPTargetReducerWrapper<ReducerType>::init(vector_reduce); + +#pragma omp simd reduction(custom : vector_reduce) + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + lambda(i, vector_reduce); + } + + result.reference() = vector_reduce; +} + /** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, * ValueType & val) for each i=0..N-1. * @@ -1269,14 +1493,15 @@ KOKKOS_INLINE_FUNCTION void parallel_reduce( iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, const Lambda& lambda, const JoinType& join, ValueType& init_result) { ValueType result = init_result; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif + + // FIXME_OPENMPTARGET think about omp simd + // join does not work with omp reduction clause for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { ValueType tmp = ValueType(); lambda(i, tmp); join(result, tmp); } + init_result = result; } @@ -1324,7 +1549,7 @@ KOKKOS_INLINE_FUNCTION void parallel_for( const Impl::TeamVectorRangeBoundariesStruct< iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, const Lambda& lambda) { -#pragma omp for simd +#pragma omp for simd nowait schedule(static, 1) for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) lambda(i); } @@ -1339,22 +1564,130 @@ KOKKOS_INLINE_FUNCTION void parallel_reduce( const Impl::TeamVectorRangeBoundariesStruct< iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, const Lambda& lambda, ValueType& result) { - ValueType* tmp_scratch = + // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of + // elements in the array <= 32. For reduction we allocate, 16 bytes per + // element in the scratch space, hence, 16*32 = 512. + static_assert(sizeof(ValueType) <= + Impl::OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE); + + ValueType* TeamVector_scratch = static_cast<ValueType*>(loop_boundaries.team.impl_reduce_scratch()); + #pragma omp barrier - tmp_scratch[0] = ValueType(); + TeamVector_scratch[0] = ValueType(); #pragma omp barrier -#pragma omp for simd reduction(+ : tmp_scratch[:1]) + if constexpr (std::is_arithmetic<ValueType>::value) { +#pragma omp for simd reduction(+ : TeamVector_scratch[:1]) + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + ValueType tmp = ValueType(); + lambda(i, tmp); + TeamVector_scratch[0] += tmp; + } + } else { +#pragma omp declare reduction(custom:ValueType : omp_out += omp_in) + +#pragma omp for simd reduction(custom : TeamVector_scratch[:1]) + for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { + ValueType tmp = ValueType(); + lambda(i, tmp); + TeamVector_scratch[0] += tmp; + } + } + + result = TeamVector_scratch[0]; +} + +#if !defined(KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND) +template <typename iType, class Lambda, typename ReducerType> +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> + parallel_reduce( + const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType const& result) { + using ValueType = typename ReducerType::value_type; + + // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of + // elements in the array <= 32. For reduction we allocate, 16 bytes per + // element in the scratch space, hence, 16*32 = 512. + static_assert(sizeof(ValueType) <= + Impl::OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE); + +#pragma omp declare reduction( \ + custom:ValueType \ + : Impl::OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \ + initializer( \ + Impl::OpenMPTargetReducerWrapper <ReducerType>::init(omp_priv)) + + ValueType* TeamVector_scratch = + static_cast<ValueType*>(loop_boundaries.team.impl_reduce_scratch()); + +#pragma omp barrier + Impl::OpenMPTargetReducerWrapper<ReducerType>::init(TeamVector_scratch[0]); +#pragma omp barrier + +#pragma omp for simd reduction(custom : TeamVector_scratch[:1]) for (iType i = loop_boundaries.start; i < loop_boundaries.end; i++) { ValueType tmp = ValueType(); lambda(i, tmp); - tmp_scratch[0] += tmp; + TeamVector_scratch[0] += tmp; + } + + result.reference() = TeamVector_scratch[0]; +} +#else +template <typename iType, class Lambda, typename ReducerType> +KOKKOS_INLINE_FUNCTION + std::enable_if_t<Kokkos::is_reducer_type<ReducerType>::value> + parallel_reduce( + const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda& lambda, ReducerType const& result) { + using ValueType = typename ReducerType::value_type; + + // FIXME_OPENMPTARGET - Make sure that if its an array reduction, number of + // elements in the array <= 32. For reduction we allocate, 16 bytes per + // element in the scratch space, hence, 16*32 = 512. + static_assert(sizeof(ValueType) <= + Impl::OpenMPTargetExecTeamMember::TEAM_REDUCE_SIZE); + + ValueType* TeamVector_scratch = + static_cast<ValueType*>(loop_boundaries.team.impl_reduce_scratch()); + +#pragma omp declare reduction( \ + omp_red_teamthread_reducer:ValueType \ + : Impl::OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \ + initializer( \ + Impl::OpenMPTargetReducerWrapper <ReducerType>::init(omp_priv)) + +#pragma omp barrier + ValueType tmp; + result.init(tmp); + TeamVector_scratch[0] = tmp; +#pragma omp barrier + + iType team_size = iType(omp_get_num_threads()); +#pragma omp for simd reduction(omp_red_teamthread_reducer \ + : TeamVector_scratch[:1]) schedule(static, 1) + for (iType t = 0; t < team_size; t++) { + ValueType tmp2; + result.init(tmp2); + + for (iType i = loop_boundaries.start + t; i < loop_boundaries.end; + i += team_size) { + lambda(i, tmp2); + } + TeamVector_scratch[0] = tmp2; } - result = tmp_scratch[0]; + + result.reference() = TeamVector_scratch[0]; } +#endif // KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND } // namespace Kokkos +#undef KOKKOS_IMPL_HIERARCHICAL_REDUCERS_WORKAROUND + namespace Kokkos { template <class FunctorType> diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp index 2d68a34c9fe2f5fb500c133ee07740d502dff8b6..4a79b72732dafb9bd93613723551ec7a9b01ddd1 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp @@ -46,7 +46,12 @@ #if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(_OPENMP) +// FIXME_OPENMPTARGET - macro for workaround implementation in UniqueToken +// constructor. undef'ed at the end +#define KOKKOS_IMPL_OPENMPTARGET_WORKAROUND + #include <Kokkos_OpenMPTarget.hpp> +#include <OpenMPTarget/Kokkos_OpenMPTarget_UniqueToken.hpp> #include <OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp> #include <sstream> @@ -63,7 +68,15 @@ void OpenMPTargetInternal::print_configuration(std::ostream& /*stream*/, printf("Using OpenMPTarget\n"); } -void OpenMPTargetInternal::impl_finalize() { m_is_initialized = false; } +void OpenMPTargetInternal::impl_finalize() { + m_is_initialized = false; + Kokkos::Impl::OpenMPTargetExec space; + if (space.m_lock_array != nullptr) space.clear_lock_array(); + + if (space.m_uniquetoken_ptr != nullptr) + Kokkos::kokkos_free<Kokkos::Experimental::OpenMPTargetSpace>( + space.m_uniquetoken_ptr); +} void OpenMPTargetInternal::impl_initialize() { m_is_initialized = true; } int OpenMPTargetInternal::impl_is_initialized() { return m_is_initialized ? 1 : 0; @@ -149,4 +162,48 @@ void OpenMPTargetSpaceInitializer::print_configuration(std::ostream& msg, } // namespace Impl } // Namespace Kokkos +namespace Kokkos { +namespace Experimental { + +UniqueToken<Kokkos::Experimental::OpenMPTarget, + Kokkos::Experimental::UniqueTokenScope::Global>:: + UniqueToken(Kokkos::Experimental::OpenMPTarget const&) { +#ifdef KOKKOS_IMPL_OPENMPTARGET_WORKAROUND + uint32_t* ptr = Kokkos::Impl::OpenMPTargetExec::m_uniquetoken_ptr; + int count = Kokkos::Experimental::OpenMPTarget().concurrency(); + if (ptr == nullptr) { + int size = count * sizeof(uint32_t); + ptr = static_cast<uint32_t*>( + Kokkos::kokkos_malloc<Kokkos::Experimental::OpenMPTargetSpace>( + "Kokkos::OpenMPTarget::m_uniquetoken_ptr", size)); + std::vector<uint32_t> h_buf(count, 0); + OMPT_SAFE_CALL(omp_target_memcpy(ptr, h_buf.data(), size, 0, 0, + omp_get_default_device(), + omp_get_initial_device())); + + Kokkos::Impl::OpenMPTargetExec::m_uniquetoken_ptr = ptr; + } +#else +// FIXME_OPENMPTARGET - 2 versions of non-working implementations to fill `ptr` +// with 0's +// Version 1 - Creating a target region and filling the +// pointer Error - CUDA error: named symbol not found +#pragma omp target teams distribute parallel for is_device_ptr(ptr) \ + map(to \ + : size) + for (int i = 0; i < count; ++i) ptr[i] = 0; + + // Version 2 : Allocating a view on the device and filling it with a scalar + // value of 0. + Kokkos::View<uint32_t*, Kokkos::Experimental::OpenMPTargetSpace> ptr_view( + ptr, count); + Kokkos::deep_copy(ptr_view, 0); +#endif + m_buffer = ptr; + m_count = count; +} +} // namespace Experimental +} // namespace Kokkos + +#undef KOKKOS_IMPL_OPENMPTARGET_WORKAROUND #endif // defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(_OPENMP) diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp index 862a63672dc4fba8ec40c77a24659cf0a3b64da1..a4092c3a37a7e9a1493576c5efe783334982a391 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp @@ -51,6 +51,8 @@ #include <OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> +#define KOKKOS_IMPL_LOCK_FREE_HIERARCHICAL + namespace Kokkos { namespace Impl { @@ -84,8 +86,7 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, } */ template <class TagType> - inline typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_impl() const { + inline void execute_impl() const { OpenMPTargetExec::verify_is_process( "Kokkos::Experimental::OpenMPTarget parallel_for"); OpenMPTargetExec::verify_initialized( @@ -97,27 +98,13 @@ class ParallelFor<FunctorType, Kokkos::RangePolicy<Traits...>, FunctorType a_functor(m_functor); + if constexpr (std::is_same<TagType, void>::value) { #pragma omp target teams distribute parallel for map(to : a_functor) - for (auto i = begin; i < end; i++) a_functor(i); - } - - template <class TagType> - inline typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_impl() const { - OpenMPTargetExec::verify_is_process( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - OpenMPTargetExec::verify_initialized( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - const auto begin = m_policy.begin(); - const auto end = m_policy.end(); - - if (end <= begin) return; - - FunctorType a_functor(m_functor); -#pragma omp target teams distribute parallel for num_threads(128) \ - map(to \ - : a_functor) - for (auto i = begin; i < end; i++) a_functor(TagType(), i); + for (auto i = begin; i < end; i++) a_functor(i); + } else { +#pragma omp target teams distribute parallel for map(to : a_functor) + for (auto i = begin; i < end; i++) a_functor(TagType(), i); + } } inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) @@ -134,8 +121,8 @@ namespace Kokkos { namespace Impl { template <class FunctorType, class PolicyType, class ReducerType, - class PointerType, class ValueType, int FunctorHasJoin, - int UseReducerType> + class PointerType, class ValueType, bool FunctorHasJoin, + bool UseReducerType> struct ParallelReduceSpecialize { static inline void execute(const FunctorType& /*f*/, const PolicyType& /*p*/, PointerType /*result_ptr*/) { @@ -150,13 +137,12 @@ struct ParallelReduceSpecialize { template <class FunctorType, class ReducerType, class PointerType, class ValueType, class... PolicyArgs> struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, - ReducerType, PointerType, ValueType, 0, 0> { + ReducerType, PointerType, ValueType, false, + false> { using PolicyType = Kokkos::RangePolicy<PolicyArgs...>; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_impl(const FunctorType& f, const PolicyType& p, - PointerType result_ptr) { + inline static void execute_impl(const FunctorType& f, const PolicyType& p, + PointerType result_ptr) { OpenMPTargetExec::verify_is_process( "Kokkos::Experimental::OpenMPTarget parallel_for"); OpenMPTargetExec::verify_initialized( @@ -167,32 +153,15 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, if (end <= begin) return; ValueType result = ValueType(); + if constexpr (std::is_same<TagType, void>::value) { #pragma omp target teams distribute parallel for num_teams(512) \ map(to:f) map(tofrom:result) reduction(+: result) - for (auto i = begin; i < end; i++) f(i, result); - - *result_ptr = result; - } - - template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_impl(const FunctorType& f, const PolicyType& p, - PointerType result_ptr) { - OpenMPTargetExec::verify_is_process( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - OpenMPTargetExec::verify_initialized( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - const auto begin = p.begin(); - const auto end = p.end(); - - if (end <= begin) return; - - ValueType result = ValueType(); -#pragma omp target teams distribute parallel for \ - num_teams(512) map(to:f) map(tofrom: result) \ - reduction(+: result) - for (auto i = begin; i < end; i++) f(TagType(), i, result); + for (auto i = begin; i < end; i++) f(i, result); + } else { +#pragma omp target teams distribute parallel for num_teams(512) \ + map(to:f) map(tofrom:result) reduction(+: result) + for (auto i = begin; i < end; i++) f(TagType(), i, result); + } *result_ptr = result; } @@ -206,17 +175,15 @@ struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, template <class FunctorType, class PolicyType, class ReducerType, class PointerType, class ValueType> struct ParallelReduceSpecialize<FunctorType, PolicyType, ReducerType, - PointerType, ValueType, 0, 1> { + PointerType, ValueType, false, true> { #pragma omp declare reduction( \ custom:ValueType \ : OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \ initializer(OpenMPTargetReducerWrapper <ReducerType>::init(omp_priv)) template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_impl(const FunctorType& f, const PolicyType& p, - PointerType result_ptr) { + inline static void execute_impl(const FunctorType& f, const PolicyType& p, + PointerType result_ptr) { OpenMPTargetExec::verify_is_process( "Kokkos::Experimental::OpenMPTarget parallel_for"); OpenMPTargetExec::verify_initialized( @@ -229,38 +196,21 @@ struct ParallelReduceSpecialize<FunctorType, PolicyType, ReducerType, ValueType result = ValueType(); OpenMPTargetReducerWrapper<ReducerType>::init(result); -// clang-format off -#pragma omp target teams distribute parallel for num_teams(512) map(to: f) \ - map(tofrom: result) reduction(custom: result) - for (auto i = begin; i < end; i++) f(i, result); - // clang-format on - *result_ptr = result; - } - - template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_impl(const FunctorType& f, const PolicyType& p, - PointerType result_ptr) { - OpenMPTargetExec::verify_is_process( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - OpenMPTargetExec::verify_initialized( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - const typename PolicyType::member_type begin = p.begin(); - const typename PolicyType::member_type end = p.end(); - - if (end <= begin) return; - - ValueType result = ValueType(); - OpenMPTargetReducerWrapper<ReducerType>::init(result); - -// clang-format off -#pragma omp target teams distribute parallel for num_teams(512) map(to: f) \ - map(tofrom: result) reduction(custom: result) - for (auto i = begin; i < end; i++) f(TagType(), i, result); - // clang-format on - - *result_ptr = result; + if constexpr (std::is_same<TagType, void>::value) { +#pragma omp target teams distribute parallel for num_teams(512) map(to \ + : f) \ + reduction(custom \ + : result) + for (auto i = begin; i < end; i++) f(i, result); + *result_ptr = result; + } else { +#pragma omp target teams distribute parallel for num_teams(512) map(to \ + : f) \ + reduction(custom \ + : result) + for (auto i = begin; i < end; i++) f(TagType(), i, result); + *result_ptr = result; + } } inline static void execute(const FunctorType& f, const PolicyType& p, @@ -284,8 +234,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; // Static Assert WorkTag void if ReducerType not InvalidType @@ -347,114 +297,143 @@ namespace Impl { template <class FunctorType, class... Traits> class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, Kokkos::Experimental::OpenMPTarget> { - private: + protected: using Policy = Kokkos::RangePolicy<Traits...>; using WorkTag = typename Policy::work_tag; using WorkRange = typename Policy::WorkRange; using Member = typename Policy::member_type; + using idx_type = typename Policy::index_type; using ValueTraits = Kokkos::Impl::FunctorValueTraits<FunctorType, WorkTag>; using ValueInit = Kokkos::Impl::FunctorValueInit<FunctorType, WorkTag>; using ValueJoin = Kokkos::Impl::FunctorValueJoin<FunctorType, WorkTag>; using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; + using value_type = typename ValueTraits::value_type; using pointer_type = typename ValueTraits::pointer_type; using reference_type = typename ValueTraits::reference_type; const FunctorType m_functor; const Policy m_policy; - /* - template< class TagType > - inline static - typename std::enable_if< std::is_same< TagType , void >::value >::type - exec_range( const FunctorType & functor - , const Member ibeg , const Member iend - , reference_type update , const bool final ) + + template <class TagType> + inline typename std::enable_if<std::is_same<TagType, void>::value>::type + call_with_tag(const FunctorType& f, const idx_type& idx, value_type& val, + const bool& is_final) const { + f(idx, val, is_final); + } + template <class TagType> + inline typename std::enable_if<!std::is_same<TagType, void>::value>::type + call_with_tag(const FunctorType& f, const idx_type& idx, value_type& val, + const bool& is_final) const { + f(WorkTag(), idx, val, is_final); + } + + public: + inline void impl_execute( + Kokkos::View<value_type**, Kokkos::LayoutRight, + Kokkos::Experimental::OpenMPTargetSpace> + element_values, + Kokkos::View<value_type*, Kokkos::Experimental::OpenMPTargetSpace> + chunk_values, + Kokkos::View<int64_t, Kokkos::Experimental::OpenMPTargetSpace> count) + const { + const idx_type N = m_policy.end() - m_policy.begin(); + const idx_type chunk_size = 128; + const idx_type n_chunks = (N + chunk_size - 1) / chunk_size; + idx_type nteams = n_chunks > 512 ? 512 : n_chunks; + idx_type team_size = 128; + + FunctorType a_functor(m_functor); +#pragma omp target teams distribute map(to \ + : a_functor) num_teams(nteams) \ + thread_limit(team_size) + for (idx_type team_id = 0; team_id < n_chunks; team_id++) { +#pragma omp parallel num_threads(team_size) { - #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - #endif - for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { - functor( iwork , update , final ); + const idx_type local_offset = team_id * chunk_size; + +#pragma omp for + for (idx_type i = 0; i < chunk_size; i++) { + const idx_type idx = local_offset + i; + value_type val; + ValueInit::init(a_functor, &val); + if (idx < N) call_with_tag<WorkTag>(a_functor, idx, val, false); + element_values(team_id, i) = val; + } +#pragma omp barrier + if (omp_get_thread_num() == 0) { + value_type sum; + ValueInit::init(a_functor, &sum); + for (idx_type i = 0; i < chunk_size; i++) { + ValueJoin::join(a_functor, &sum, &element_values(team_id, i)); + element_values(team_id, i) = sum; + } + chunk_values(team_id) = sum; + } +#pragma omp barrier + if (omp_get_thread_num() == 0) { + if (Kokkos::atomic_fetch_add(&count(), 1) == n_chunks - 1) { + value_type sum; + ValueInit::init(a_functor, &sum); + for (idx_type i = 0; i < n_chunks; i++) { + ValueJoin::join(a_functor, &sum, &chunk_values(i)); + chunk_values(i) = sum; + } + } } } + } - template< class TagType > - inline static - typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec_range( const FunctorType & functor - , const Member ibeg , const Member iend - , reference_type update , const bool final ) +#pragma omp target teams distribute map(to \ + : a_functor) num_teams(nteams) \ + thread_limit(team_size) + for (idx_type team_id = 0; team_id < n_chunks; team_id++) { +#pragma omp parallel num_threads(team_size) { - const TagType t{} ; - #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - #endif - for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { - functor( t , iwork , update , final ); + const idx_type local_offset = team_id * chunk_size; + value_type offset_value; + if (team_id > 0) + offset_value = chunk_values(team_id - 1); + else + ValueInit::init(a_functor, &offset_value); + +#pragma omp for + for (idx_type i = 0; i < chunk_size; i++) { + const idx_type idx = local_offset + i; + value_type local_offset_value; + if (i > 0) { + local_offset_value = element_values(team_id, i - 1); + ValueJoin::join(a_functor, &local_offset_value, &offset_value); + } else + local_offset_value = offset_value; + if (idx < N) + call_with_tag<WorkTag>(a_functor, idx, local_offset_value, true); } } - */ - public: - inline void execute() const { - /* OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget - parallel_scan"); - OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget - parallel_scan"); - - OpenMPTargetExec::resize_scratch( 2 * ValueTraits::value_size( - m_functor ) , 0 ); - - #pragma omp parallel - { - OpenMPTargetExec & exec = * OpenMPTargetExec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() - ); const pointer_type ptr = pointer_type( exec.scratch_reduce() ) + - ValueTraits::value_count( m_functor ); - ParallelScan::template exec_range< WorkTag > - ( m_functor , range.begin() , range.end() - , ValueInit::init( m_functor , ptr ) , false ); - } - - { - const unsigned thread_count = OpenMPTargetExec::pool_size(); - const unsigned value_count = ValueTraits::value_count( m_functor ); - - pointer_type ptr_prev = 0 ; - - for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) { - - pointer_type ptr = pointer_type( - OpenMPTargetExec::pool_rev(rank_rev)->scratch_reduce() ); - - if ( ptr_prev ) { - for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = - ptr_prev[ i + value_count ] ; } ValueJoin::join( m_functor , ptr + - value_count , ptr ); - } - else { - ValueInit::init( m_functor , ptr ); - } - - ptr_prev = ptr ; - } - } + } + } - #pragma omp parallel - { - OpenMPTargetExec & exec = * OpenMPTargetExec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() - ); const pointer_type ptr = pointer_type( exec.scratch_reduce() ); - ParallelScan::template exec_range< WorkTag > - ( m_functor , range.begin() , range.end() - , ValueOps::reference( ptr ) , true ); - } - */ + inline void execute() const { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + const idx_type N = m_policy.end() - m_policy.begin(); + const idx_type chunk_size = 128; + const idx_type n_chunks = (N + chunk_size - 1) / chunk_size; + + // This could be scratch memory per team + Kokkos::View<value_type**, Kokkos::LayoutRight, + Kokkos::Experimental::OpenMPTargetSpace> + element_values("element_values", n_chunks, chunk_size); + Kokkos::View<value_type*, Kokkos::Experimental::OpenMPTargetSpace> + chunk_values("chunk_values", n_chunks); + Kokkos::View<int64_t, Kokkos::Experimental::OpenMPTargetSpace> count( + "Count"); + + impl_execute(element_values, chunk_values, count); } //---------------------------------------- @@ -465,6 +444,51 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, //---------------------------------------- }; +template <class FunctorType, class ReturnType, class... Traits> +class ParallelScanWithTotal<FunctorType, Kokkos::RangePolicy<Traits...>, + ReturnType, Kokkos::Experimental::OpenMPTarget> + : public ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, + Kokkos::Experimental::OpenMPTarget> { + using base_t = ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>, + Kokkos::Experimental::OpenMPTarget>; + using value_type = typename base_t::value_type; + value_type& m_returnvalue; + + public: + inline void execute() const { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + const int64_t N = base_t::m_policy.end() - base_t::m_policy.begin(); + const int chunk_size = 128; + const int64_t n_chunks = (N + chunk_size - 1) / chunk_size; + + if (N > 0) { + // This could be scratch memory per team + Kokkos::View<value_type**, Kokkos::LayoutRight, + Kokkos::Experimental::OpenMPTargetSpace> + element_values("element_values", n_chunks, chunk_size); + Kokkos::View<value_type*, Kokkos::Experimental::OpenMPTargetSpace> + chunk_values("chunk_values", n_chunks); + Kokkos::View<int64_t, Kokkos::Experimental::OpenMPTargetSpace> count( + "Count"); + + base_t::impl_execute(element_values, chunk_values, count); + + const int size = base_t::ValueTraits::value_size(base_t::m_functor); + DeepCopy<HostSpace, Kokkos::Experimental::OpenMPTargetSpace>( + &m_returnvalue, chunk_values.data() + (n_chunks - 1), size); + } else { + m_returnvalue = 0; + } + } + + ParallelScanWithTotal(const FunctorType& arg_functor, + const typename base_t::Policy& arg_policy, + ReturnType& arg_returnvalue) + : base_t(arg_functor, arg_policy), m_returnvalue(arg_returnvalue) {} +}; } // namespace Impl } // namespace Kokkos @@ -499,8 +523,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, private: template <class TagType> - inline typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_impl() const { + inline void execute_impl() const { OpenMPTargetExec::verify_is_process( "Kokkos::Experimental::OpenMPTarget parallel_for"); OpenMPTargetExec::verify_initialized( @@ -508,59 +531,94 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, const auto league_size = m_policy.league_size(); const auto team_size = m_policy.team_size(); const auto vector_length = m_policy.impl_vector_length(); - const auto nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS < league_size - ? OpenMPTargetExec::MAX_ACTIVE_TEAMS - : league_size; - OpenMPTargetExec::resize_scratch(0, Policy::member_type::TEAM_REDUCE_SIZE, - 0, 0); - void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); + const size_t shmem_size_L0 = m_policy.scratch_size(0, team_size); + const size_t shmem_size_L1 = m_policy.scratch_size(1, team_size); + OpenMPTargetExec::resize_scratch(team_size, shmem_size_L0, shmem_size_L1); + void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); FunctorType a_functor(m_functor); -#pragma omp target teams distribute map(to \ - : a_functor) \ - is_device_ptr(scratch_ptr) num_teams(nteams) thread_limit(team_size) - for (int i = 0; i < league_size; i++) { -#pragma omp parallel num_threads(team_size) - { - typename Policy::member_type team(i, league_size, team_size, - vector_length, scratch_ptr, 0, 0); - m_functor(team); - } - } - } - - template <class TagType> - inline typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_impl() const { - OpenMPTargetExec::verify_is_process( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - OpenMPTargetExec::verify_initialized( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - const auto league_size = m_policy.league_size(); - const auto team_size = m_policy.team_size(); - const auto vector_length = m_policy.impl_vector_length(); - const auto nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS < league_size - ? OpenMPTargetExec::MAX_ACTIVE_TEAMS - : league_size; - FunctorType a_functor(m_functor); + // FIXME_OPENMPTARGET - If the team_size is not a multiple of 32, the + // scratch implementation does not work in the Release or RelWithDebugInfo + // mode but works in the Debug mode. + + // Maximum active teams possible. + int max_active_teams = OpenMPTargetExec::MAX_ACTIVE_THREADS / team_size; + // nteams should not exceed the maximum in-flight teams possible. + const auto nteams = + league_size < max_active_teams ? league_size : max_active_teams; + +#ifdef KOKKOS_IMPL_LOCK_FREE_HIERARCHICAL +// Performing our own scheduling of teams to avoid separation of code between +// teams-distribute and parallel. Gave a 2x performance boost in test cases with +// the clang compiler. atomic_compare_exchange can be avoided since the standard +// guarantees that the number of teams specified in the `num_teams` clause is +// always less than or equal to the maximum concurrently running teams. +#pragma omp target teams num_teams(nteams) thread_limit(team_size) \ + map(to \ + : a_functor) is_device_ptr(scratch_ptr) +#pragma omp parallel + { + const int blockIdx = omp_get_team_num(); + const int gridDim = omp_get_num_teams(); + + // Iterate through the number of teams until league_size and assign the + // league_id accordingly + // Guarantee that the compilers respect the `num_teams` clause + if (gridDim <= nteams) { + for (int league_id = blockIdx; league_id < league_size; + league_id += gridDim) { + typename Policy::member_type team( + league_id, league_size, team_size, vector_length, scratch_ptr, + blockIdx, shmem_size_L0, shmem_size_L1); + if constexpr (std::is_same<TagType, void>::value) + m_functor(team); + else + m_functor(TagType(), team); + } + } else + Kokkos::abort("`num_teams` clause was not respected.\n"); + } - OpenMPTargetExec::resize_scratch(0, Policy::member_type::TEAM_REDUCE_SIZE, - 0, 0); - void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); -#pragma omp target teams distribute map(to \ - : a_functor) \ - is_device_ptr(scratch_ptr) num_teams(nteams) thread_limit(team_size) +#else +// Saving the older implementation that uses `atomic_compare_exchange` to +// calculate the shared memory block index and `distribute` clause to distribute +// teams. +#pragma omp target teams distribute map(to \ + : a_functor) \ + is_device_ptr(scratch_ptr, lock_array) num_teams(nteams) \ + thread_limit(team_size) for (int i = 0; i < league_size; i++) { + int shmem_block_index = -1, lock_team = 99999, iter = -1; + iter = (omp_get_team_num() % max_active_teams); + + // Loop as long as a shmem_block_index is not found. + while (shmem_block_index == -1) { + // Try and acquire a lock on the index. + lock_team = atomic_compare_exchange(&lock_array[iter], 0, 1); + + // If lock is acquired assign it to the block index. + // lock_team = 0, implies atomic_compare_exchange is successfull. + if (lock_team == 0) + shmem_block_index = iter; + else + iter = ++iter % max_active_teams; + } + #pragma omp parallel num_threads(team_size) { - typename Policy::member_type team(i / (team_size * vector_length), - league_size, team_size, vector_length, - scratch_ptr, 0, 0); - m_functor(TagType(), team); + typename Policy::member_type team( + i, league_size, team_size, vector_length, scratch_ptr, + shmem_block_index, shmem_size_L0, shmem_size_L1); + m_functor(team); } + + // Free the locked block and increment the number of available free + // blocks. + lock_team = atomic_compare_exchange(&lock_array[shmem_block_index], 1, 0); } +#endif } public: @@ -575,14 +633,13 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, template <class FunctorType, class ReducerType, class PointerType, class ValueType, class... PolicyArgs> struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, - ReducerType, PointerType, ValueType, 0, 0> { + ReducerType, PointerType, ValueType, false, + false> { using PolicyType = TeamPolicyInternal<PolicyArgs...>; template <class TagType> - inline static - typename std::enable_if<std::is_same<TagType, void>::value>::type - execute_impl(const FunctorType& f, const PolicyType& p, - PointerType result_ptr) { + inline static void execute_impl(const FunctorType& f, const PolicyType& p, + PointerType result_ptr) { OpenMPTargetExec::verify_is_process( "Kokkos::Experimental::OpenMPTarget parallel_for"); OpenMPTargetExec::verify_initialized( @@ -591,68 +648,150 @@ struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, const int league_size = p.league_size(); const int team_size = p.team_size(); const int vector_length = p.impl_vector_length(); - const int nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS < league_size - ? OpenMPTargetExec::MAX_ACTIVE_TEAMS - : league_size; - OpenMPTargetExec::resize_scratch( - 0, PolicyType::member_type::TEAM_REDUCE_SIZE, 0, 0); + const size_t shmem_size_L0 = p.scratch_size(0, team_size); + const size_t shmem_size_L1 = p.scratch_size(1, team_size); + OpenMPTargetExec::resize_scratch(PolicyType::member_type::TEAM_REDUCE_SIZE, + shmem_size_L0, shmem_size_L1); void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); ValueType result = ValueType(); + // Maximum active teams possible. + int max_active_teams = OpenMPTargetExec::MAX_ACTIVE_THREADS / team_size; + const auto nteams = + league_size < max_active_teams ? league_size : max_active_teams; + +#ifdef KOKKOS_IMPL_LOCK_FREE_HIERARCHICAL +#pragma omp target teams num_teams(nteams) thread_limit(team_size) map(to \ + : f) \ + is_device_ptr(scratch_ptr) reduction(+: result) +#pragma omp parallel reduction(+ : result) + { + const int blockIdx = omp_get_team_num(); + const int gridDim = omp_get_num_teams(); + + // Guarantee that the compilers respect the `num_teams` clause + if (gridDim <= nteams) { + for (int league_id = blockIdx; league_id < league_size; + league_id += gridDim) { + typename PolicyType::member_type team( + league_id, league_size, team_size, vector_length, scratch_ptr, + blockIdx, shmem_size_L0, shmem_size_L1); + if constexpr (std::is_same<TagType, void>::value) + f(team, result); + else + f(TagType(), team, result); + } + } else + Kokkos::abort("`num_teams` clause was not respected.\n"); + } + + *result_ptr = result; +#else +// Saving the older implementation that uses `atomic_compare_exchange` to +// calculate the shared memory block index and `distribute` clause to distribute +// teams. #pragma omp target teams distribute num_teams(nteams) thread_limit(team_size) \ map(to:f) map(tofrom:result) reduction(+: result) \ - is_device_ptr(scratch_ptr) + is_device_ptr(scratch_ptr, lock_array) for (int i = 0; i < league_size; i++) { ValueType inner_result = ValueType(); + int shmem_block_index = -1, lock_team = 99999, iter = -1; + iter = (omp_get_team_num() % max_active_teams); + + // Loop as long as a shmem_block_index is not found. + while (shmem_block_index == -1) { + // Try and acquire a lock on the index. + lock_team = atomic_compare_exchange(&lock_array[iter], 0, 1); + + // If lock is acquired assign it to the block index. + // lock_team = 0, implies atomic_compare_exchange is successfull. + if (lock_team == 0) + shmem_block_index = iter; + else + iter = ++iter % max_active_teams; + } #pragma omp parallel num_threads(team_size) reduction(+ : inner_result) { - typename PolicyType::member_type team(i, league_size, team_size, - vector_length, scratch_ptr, 0, 0); + typename PolicyType::member_type team( + i, league_size, team_size, vector_length, scratch_ptr, + shmem_block_index, shmem_size_L0, shmem_size_L1); f(team, inner_result); } result = inner_result; + + // Free the locked block and increment the number of available free + // blocks. + lock_team = atomic_compare_exchange(&lock_array[shmem_block_index], 1, 0); } *result_ptr = result; +#endif } + inline static void execute(const FunctorType& f, const PolicyType& p, + PointerType ptr) { + execute_impl<typename PolicyType::work_tag>(f, p, ptr); + } +}; + +template <class FunctorType, class ReducerType, class PointerType, + class ValueType, class... PolicyArgs> +struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, + ReducerType, PointerType, ValueType, false, + true> { + using PolicyType = TeamPolicyInternal<PolicyArgs...>; template <class TagType> - inline static - typename std::enable_if<!std::is_same<TagType, void>::value>::type - execute_impl(const FunctorType& f, const PolicyType& p, - PointerType result_ptr) { + inline static void execute_impl(const FunctorType& f, const PolicyType& p, + PointerType result_ptr) { OpenMPTargetExec::verify_is_process( "Kokkos::Experimental::OpenMPTarget parallel_for"); OpenMPTargetExec::verify_initialized( "Kokkos::Experimental::OpenMPTarget parallel_for"); - const int league_size = p.league_size(); - const int team_size = p.team_size(); - const int vector_length = p.impl_vector_length(); - const int nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS < league_size - ? OpenMPTargetExec::MAX_ACTIVE_TEAMS - : league_size; - - OpenMPTargetExec::resize_scratch( - 0, PolicyType::member_type::TEAM_REDUCE_SIZE, 0, 0); +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper <ReducerType>::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper <ReducerType>::init(omp_priv)) + const int league_size = p.league_size(); + const int team_size = p.team_size(); + const int vector_length = p.impl_vector_length(); + const size_t shmem_size_L0 = p.scratch_size(0, team_size); + const size_t shmem_size_L1 = p.scratch_size(1, team_size); + OpenMPTargetExec::resize_scratch(team_size, shmem_size_L0, shmem_size_L1); void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); ValueType result = ValueType(); -#pragma omp target teams distribute num_teams(nteams) thread_limit(team_size) \ - map(to:f) map(tofrom:result) reduction(+: result) \ - is_device_ptr(scratch_ptr) - for (int i = 0; i < league_size; i++) { - ValueType inner_result = ValueType(); -#pragma omp parallel num_threads(team_size) reduction(+ : inner_result) - { - typename PolicyType::member_type team(i, league_size, team_size, - vector_length, scratch_ptr, 0, 0); - f(TagType(), team, result); - } - result = inner_result; + // Maximum active teams possible. + int max_active_teams = OpenMPTargetExec::MAX_ACTIVE_THREADS / team_size; + const auto nteams = + league_size < max_active_teams ? league_size : max_active_teams; + +#pragma omp target teams num_teams(nteams) thread_limit(team_size) map(to \ + : f) \ + is_device_ptr(scratch_ptr) reduction(custom \ + : result) +#pragma omp parallel reduction(custom : result) + { + const int blockIdx = omp_get_team_num(); + const int gridDim = omp_get_num_teams(); + + // Guarantee that the compilers respect the `num_teams` clause + if (gridDim <= nteams) { + for (int league_id = blockIdx; league_id < league_size; + league_id += gridDim) { + typename PolicyType::member_type team( + league_id, league_size, team_size, vector_length, scratch_ptr, + blockIdx, shmem_size_L0, shmem_size_L1); + if constexpr (std::is_same<TagType, void>::value) + f(team, result); + else + f(TagType(), team, result); + } + } else + Kokkos::abort("`num_teams` clause was not respected.\n"); } *result_ptr = result; @@ -680,8 +819,8 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, FunctorType, ReducerType>; using ReducerTypeFwd = typename ReducerConditional::type; using WorkTagFwd = - typename Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - WorkTag, void>::type; + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, + void>; using ValueTraits = Kokkos::Impl::FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>; @@ -790,61 +929,11 @@ struct TeamVectorRangeBoundariesStruct<iType, OpenMPTargetExecTeamMember> { : start(begin_), end(end_), team(thread_) {} }; -template <typename iType> -KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> -TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, iType count) { - return Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, count); -} - -template <typename iType> -KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> -TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, iType begin, - iType end) { - return Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, begin, end); -} - -template <typename iType> -KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> -ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType count) { - return Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, count); -} - -template <typename iType> -KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> -ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType begin, - iType end) { - return Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, begin, end); -} - -template <typename iType> -KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> -ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType count) { - return Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, count); -} - -template <typename iType> -KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember> -ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType begin, - iType end) { - return Impl::TeamVectorRangeBoundariesStruct< - iType, Impl::OpenMPTargetExecTeamMember>(thread, begin, end); -} - } // namespace Impl } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#undef KOKKOS_IMPL_LOCK_FREE_HIERARCHICAL #endif /* KOKKOS_OPENMPTARGET_PARALLEL_HPP */ diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp index 2bd8a5d3a39ca54b178c465b29c639d89c691419..3dfad2bb856e0bb65a48dfd70b3458cee4c9beb5 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp @@ -156,7 +156,10 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, #pragma omp target teams distribute parallel for collapse(2) map(to : functor) for (auto i0 = begin_0; i0 < end_0; i0++) { for (auto i1 = begin_1; i1 < end_1; i1++) { - functor(i0, i1); + if constexpr (std::is_same<typename Policy::work_tag, void>::value) + functor(i0, i1); + else + functor(typename Policy::work_tag(), i0, i1); } } #else @@ -170,7 +173,12 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, #pragma omp for collapse(2) for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) - for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) functor(i0, i1); + for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) { + if constexpr (std::is_same<typename Policy::work_tag, void>::value) + functor(i0, i1); + else + functor(typename Policy::work_tag(), i0, i1); + } #endif } @@ -192,7 +200,10 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (auto i0 = begin_0; i0 < end_0; i0++) { for (auto i1 = begin_1; i1 < end_1; i1++) { for (auto i2 = begin_2; i2 < end_2; i2++) { - functor(i0, i1, i2); + if constexpr (std::is_same<typename Policy::work_tag, void>::value) + functor(i0, i1, i2); + else + functor(typename Policy::work_tag(), i0, i1, i2); } } } @@ -212,7 +223,12 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, #pragma omp for collapse(3) for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) - for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) functor(i0, i1, i2); + for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) { + if constexpr (std::is_same<typename Policy::work_tag, void>::value) + functor(i0, i1, i2); + else + functor(typename Policy::work_tag(), i0, i1, i2); + } #endif } @@ -237,7 +253,10 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (auto i1 = begin_1; i1 < end_1; i1++) { for (auto i2 = begin_2; i2 < end_2; i2++) { for (auto i3 = begin_3; i3 < end_3; i3++) { - functor(i0, i1, i2, i3); + if constexpr (std::is_same<typename Policy::work_tag, void>::value) + functor(i0, i1, i2, i3); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3); } } } @@ -263,8 +282,12 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) - for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) - functor(i0, i1, i2, i3); + for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) { + if constexpr (std::is_same<typename Policy::work_tag, void>::value) + functor(i0, i1, i2, i3); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3); + } #endif } @@ -292,7 +315,11 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (auto i2 = begin_2; i2 < end_2; i2++) { for (auto i3 = begin_3; i3 < end_3; i3++) { for (auto i4 = begin_4; i4 < end_4; i4++) { - functor(i0, i1, i2, i3, i4); + if constexpr (std::is_same<typename Policy::work_tag, + void>::value) + functor(i0, i1, i2, i3, i4); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); } } } @@ -324,8 +351,13 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) - for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) - functor(i0, i1, i2, i3, i4); + for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) { + if constexpr (std::is_same<typename Policy::work_tag, + void>::value) + functor(i0, i1, i2, i3, i4); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); + } #endif } @@ -356,7 +388,14 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (auto i3 = begin_3; i3 < end_3; i3++) { for (auto i4 = begin_4; i4 < end_4; i4++) { for (auto i5 = begin_5; i5 < end_5; i5++) { - functor(i0, i1, i2, i3, i4, i5); + { + if constexpr (std::is_same<typename Policy::work_tag, + void>::value) + functor(i0, i1, i2, i3, i4, i5); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + i5); + } } } } @@ -394,8 +433,13 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) - for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) - functor(i0, i1, i2, i3, i4, i5); + for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) { + if constexpr (std::is_same<typename Policy::work_tag, + void>::value) + functor(i0, i1, i2, i3, i4, i5); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5); + } #endif } @@ -429,7 +473,12 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) { for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) { for (ptrdiff_t i6 = begin_6; i6 < end_6; i6++) { - functor(i0, i1, i2, i3, i4, i5, i6); + if constexpr (std::is_same<typename Policy::work_tag, + void>::value) + functor(i0, i1, i2, i3, i4, i5, i6); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5, + i6); } } } @@ -473,8 +522,14 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) - for (ptrdiff_t i6 = begin_6; i6 < end_6; i6++) - functor(i0, i1, i2, i3, i4, i5, i6); + for (ptrdiff_t i6 = begin_6; i6 < end_6; i6++) { + if constexpr (std::is_same<typename Policy::work_tag, + void>::value) + functor(i0, i1, i2, i3, i4, i5, i6); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5, + i6); + } #endif } @@ -511,7 +566,12 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) { for (ptrdiff_t i6 = begin_6; i6 < end_6; i6++) { for (ptrdiff_t i7 = begin_7; i7 < end_7; i7++) { - functor(i0, i1, i2, i3, i4, i5, i6, i7); + if constexpr (std::is_same<typename Policy::work_tag, + void>::value) + functor(i0, i1, i2, i3, i4, i5, i6, i7); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + i5, i6, i7); } } } @@ -561,13 +621,26 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) for (ptrdiff_t i6 = begin_6; i6 < end_6; i6++) - for (ptrdiff_t i7 = begin_7; i7 < end_7; i7++) - functor(i0, i1, i2, i3, i4, i5, i6, i7); + for (ptrdiff_t i7 = begin_7; i7 < end_7; i7++) { + if constexpr (std::is_same<typename Policy::work_tag, + void>::value) + functor(i0, i1, i2, i3, i4, i5, i6, i7); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + i5, i6, i7); + } #endif } inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) : m_functor(arg_functor), m_policy(arg_policy) {} + // TODO DZP: based on a conversation with Christian, we're using 256 as a + // heuristic here. We need something better once we can query these kinds of + // properties + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy&, const Functor&) { + return 256; + } }; } // namespace Impl @@ -758,6 +831,13 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, // , "Reduction result on Kokkos::Experimental::OpenMPTarget must be a // Kokkos::View in HostSpace" ); } + // TODO DZP: based on a conversation with Christian, we're using 256 as a +heuristic + // here. We need something better once we can query these kinds of properties + template<typename Policy, typename Functor> +static int max_tile_size_product(const Policy&, const Functor&) { + return 256; + } };*/ } // namespace Impl diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_UniqueToken.hpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_UniqueToken.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fa348611b953aa62704cb760521a275a04729985 --- /dev/null +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_UniqueToken.hpp @@ -0,0 +1,135 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMPTARGET_UNIQUE_TOKEN_HPP +#define KOKKOS_OPENMPTARGET_UNIQUE_TOKEN_HPP + +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_OPENMPTARGET + +#include <Kokkos_OpenMPTargetSpace.hpp> +#include <Kokkos_UniqueToken.hpp> +#include <impl/Kokkos_SharedAlloc.hpp> +#include <impl/Kokkos_ConcurrentBitset.hpp> + +namespace Kokkos { +namespace Experimental { + +// both global and instance Unique Tokens are implemented in the same way +template <> +class UniqueToken<OpenMPTarget, UniqueTokenScope::Global> { + protected: + uint32_t volatile* m_buffer; + uint32_t m_count; + + public: + using execution_space = OpenMPTarget; + using size_type = int32_t; + + explicit UniqueToken(execution_space const& = execution_space()); + + KOKKOS_DEFAULTED_FUNCTION + UniqueToken(const UniqueToken&) = default; + + KOKKOS_DEFAULTED_FUNCTION + UniqueToken(UniqueToken&&) = default; + + KOKKOS_DEFAULTED_FUNCTION + UniqueToken& operator=(const UniqueToken&) = default; + + KOKKOS_DEFAULTED_FUNCTION + UniqueToken& operator=(UniqueToken&&) = default; + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + size_type size() const noexcept { return m_count; } + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + size_type acquire() const { + const Kokkos::pair<int, int> result = + Kokkos::Impl::concurrent_bitset::acquire_bounded( + m_buffer, m_count, Kokkos::Impl::clock_tic() % m_count); + + if (result.first < 0) { + Kokkos::abort( + "UniqueToken<OpenMPTarget> failure to acquire tokens, no tokens " + "available"); + } + + return result.first; + } + + /// \brief release an acquired value + KOKKOS_INLINE_FUNCTION + void release(size_type i) const noexcept { + Kokkos::Impl::concurrent_bitset::release(m_buffer, i); + } +}; + +template <> +class UniqueToken<OpenMPTarget, UniqueTokenScope::Instance> + : public UniqueToken<OpenMPTarget, UniqueTokenScope::Global> { + private: + Kokkos::View<uint32_t*, ::Kokkos::Experimental::OpenMPTargetSpace> + m_buffer_view; + + public: + explicit UniqueToken(execution_space const& arg = execution_space()) + : UniqueToken<OpenMPTarget, UniqueTokenScope::Global>(arg) {} + + UniqueToken(size_type max_size, execution_space const& = execution_space()) + : m_buffer_view( + "Kokkos::UniqueToken::m_buffer_view", + ::Kokkos::Impl::concurrent_bitset::buffer_bound(max_size)) { + m_buffer = m_buffer_view.data(); + m_count = max_size; + } +}; + +} // namespace Experimental +} // namespace Kokkos + +#endif // KOKKOS_ENABLE_OPENMPTARGET +#endif // KOKKOS_OPENMPTARGET_UNIQUE_TOKEN_HPP diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp index 567145161c7a1a9a251dd4e056c666863e6f15dc..9c29eb190d17b64c0340751a3459785c070d7c47 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL.cpp @@ -76,42 +76,75 @@ int get_gpu(const InitArguments& args); } // namespace Impl namespace Experimental { -SYCL::SYCL() : m_space_instance(&Impl::SYCLInternal::singleton()) { +SYCL::SYCL() + : m_space_instance(&Impl::SYCLInternal::singleton(), + [](Impl::SYCLInternal*) {}) { Impl::SYCLInternal::singleton().verify_is_initialized( "SYCL instance constructor"); } +SYCL::SYCL(const sycl::queue& stream) + : m_space_instance(new Impl::SYCLInternal, [](Impl::SYCLInternal* ptr) { + ptr->finalize(); + delete ptr; + }) { + Impl::SYCLInternal::singleton().verify_is_initialized( + "SYCL instance constructor"); + m_space_instance->initialize(stream); +} + int SYCL::concurrency() { - // FIXME_SYCL We need a value larger than 1 here for some tests to pass, - // clearly this is true but not the roght value - return 2; + return Impl::SYCLInternal::singleton().m_maxConcurrency; } +const char* SYCL::name() { return "SYCL"; } + bool SYCL::impl_is_initialized() { return Impl::SYCLInternal::singleton().is_initialized(); } void SYCL::impl_finalize() { Impl::SYCLInternal::singleton().finalize(); } -void SYCL::fence() const { m_space_instance->m_queue->wait(); } +void SYCL::fence() const { + Impl::SYCLInternal::fence(*m_space_instance->m_queue); +} + +void SYCL::impl_static_fence() { + // guard accessing all_queues + std::lock_guard<std::mutex> lock(Impl::SYCLInternal::mutex); + for (auto& queue : Impl::SYCLInternal::all_queues) + Impl::SYCLInternal::fence(**queue); +} int SYCL::sycl_device() const { return impl_internal_space_instance()->m_syclDev; } -SYCL::SYCLDevice::SYCLDevice(cl::sycl::device d) : m_device(std::move(d)) {} +SYCL::SYCLDevice::SYCLDevice(sycl::device d) : m_device(std::move(d)) {} -SYCL::SYCLDevice::SYCLDevice(const cl::sycl::device_selector& selector) +SYCL::SYCLDevice::SYCLDevice(const sycl::device_selector& selector) : m_device(selector.select_device()) {} -cl::sycl::device SYCL::SYCLDevice::get_device() const { return m_device; } +SYCL::SYCLDevice::SYCLDevice(size_t id) { + std::vector<sycl::device> gpu_devices = + sycl::device::get_devices(sycl::info::device_type::gpu); + if (id >= gpu_devices.size()) { + std::stringstream error_message; + error_message << "Requested GPU with id " << id << " but only " + << gpu_devices.size() << " GPU(s) available!\n"; + Kokkos::Impl::throw_runtime_exception(error_message.str()); + } + m_device = gpu_devices[id]; +} + +sycl::device SYCL::SYCLDevice::get_device() const { return m_device; } void SYCL::impl_initialize(SYCL::SYCLDevice d) { Impl::SYCLInternal::singleton().initialize(d.get_device()); } std::ostream& SYCL::SYCLDevice::info(std::ostream& os) const { - using namespace cl::sycl::info; + using namespace sycl::info; return os << "Name: " << m_device.get_info<device::name>() << "\nDriver Version: " << m_device.get_info<device::driver_version>() @@ -227,7 +260,7 @@ std::ostream& SYCL::SYCLDevice::info(std::ostream& os) const { namespace Impl { -int g_hip_space_factory_initialized = +int g_sycl_space_factory_initialized = Kokkos::Impl::initialize_space_factory<SYCLSpaceInitializer>("170_SYCL"); void SYCLSpaceInitializer::initialize(const InitArguments& args) { @@ -236,9 +269,13 @@ void SYCLSpaceInitializer::initialize(const InitArguments& args) { if (std::is_same<Kokkos::Experimental::SYCL, Kokkos::DefaultExecutionSpace>::value || 0 < use_gpu) { - // FIXME_SYCL choose a specific device - Kokkos::Experimental::SYCL::impl_initialize( - Kokkos::Experimental::SYCL::SYCLDevice(cl::sycl::default_selector())); + if (use_gpu > -1) { + Kokkos::Experimental::SYCL::impl_initialize( + Kokkos::Experimental::SYCL::SYCLDevice(use_gpu)); + } else { + Kokkos::Experimental::SYCL::impl_initialize( + Kokkos::Experimental::SYCL::SYCLDevice(sycl::default_selector())); + } } } @@ -252,9 +289,7 @@ void SYCLSpaceInitializer::finalize(const bool all_spaces) { } void SYCLSpaceInitializer::fence() { - // FIXME_SYCL should be - // Kokkos::Experimental::SYCL::impl_static_fence(); - Kokkos::Experimental::SYCL().fence(); + Kokkos::Experimental::SYCL::impl_static_fence(); } void SYCLSpaceInitializer::print_configuration(std::ostream& msg, diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_Category.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Abort.hpp similarity index 86% rename from packages/kokkos/core/unit_test/cuda/TestCudaUVM_Category.hpp rename to packages/kokkos/core/src/SYCL/Kokkos_SYCL_Abort.hpp index ff53e5a719a7274e8d38b93259286e14bc44d27a..13d6dc1a4a705421a05ce3f86e28f376de0ac41b 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_Category.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Abort.hpp @@ -42,13 +42,21 @@ //@HEADER */ -#ifndef KOKKOS_TEST_CUDAUVM_HPP -#define KOKKOS_TEST_CUDAUVM_HPP +#ifndef KOKKOS_SYCL_ABORT_HPP +#define KOKKOS_SYCL_ABORT_HPP -#include <gtest/gtest.h> +#include <Kokkos_Macros.hpp> +#if defined(KOKKOS_ENABLE_SYCL) -#define TEST_CATEGORY cuda_uvm -#define TEST_CATEGORY_DEATH cuda_uvm_DeathTest -#define TEST_EXECSPACE Kokkos::CudaUVMSpace +namespace Kokkos { +namespace Impl { +inline void sycl_abort(char const *msg) { + KOKKOS_IMPL_DO_NOT_USE_PRINTF("Aborting with message %s.\n", msg); +} + +} // namespace Impl +} // namespace Kokkos + +#endif #endif diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp index 95906f8e7e91830e72a7e752bb761cc236f0e06f..aef65ee7ecbbf3c39432b42a42b595dbfe00b239 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp @@ -131,6 +131,100 @@ struct DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, Kokkos::HostSpace, } }; +template <> +struct DeepCopy<Experimental::SYCLSharedUSMSpace, + Experimental::SYCLSharedUSMSpace, Kokkos::Experimental::SYCL> + : public DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL> { + using DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL>::DeepCopy; +}; + +template <> +struct DeepCopy<Experimental::SYCLSharedUSMSpace, HostSpace, + Kokkos::Experimental::SYCL> + : public DeepCopy<Experimental::SYCLDeviceUSMSpace, HostSpace, + Kokkos::Experimental::SYCL> { + using DeepCopy<Experimental::SYCLDeviceUSMSpace, HostSpace, + Kokkos::Experimental::SYCL>::DeepCopy; +}; + +template <> +struct DeepCopy<HostSpace, Experimental::SYCLSharedUSMSpace, + Kokkos::Experimental::SYCL> + : public DeepCopy<HostSpace, Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL> { + using DeepCopy<HostSpace, Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL>::DeepCopy; +}; + +template <> +struct DeepCopy<Experimental::SYCLSharedUSMSpace, + Experimental::SYCLDeviceUSMSpace, Kokkos::Experimental::SYCL> + : public DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL> { + using DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL>::DeepCopy; +}; + +template <> +struct DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLSharedUSMSpace, Kokkos::Experimental::SYCL> + : public DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL> { + using DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCL>::DeepCopy; +}; + +template <class ExecutionSpace> +struct DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLSharedUSMSpace, ExecutionSpace> + : public DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, ExecutionSpace> { + using DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, ExecutionSpace>::DeepCopy; +}; + +template <class ExecutionSpace> +struct DeepCopy<Experimental::SYCLSharedUSMSpace, + Experimental::SYCLDeviceUSMSpace, ExecutionSpace> + : public DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, ExecutionSpace> { + using DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, ExecutionSpace>::DeepCopy; +}; + +template <class ExecutionSpace> +struct DeepCopy<Experimental::SYCLSharedUSMSpace, + Experimental::SYCLSharedUSMSpace, ExecutionSpace> + : public DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, ExecutionSpace> { + using DeepCopy<Experimental::SYCLDeviceUSMSpace, + Experimental::SYCLDeviceUSMSpace, ExecutionSpace>::DeepCopy; +}; + +template <class ExecutionSpace> +struct DeepCopy<Experimental::SYCLSharedUSMSpace, HostSpace, ExecutionSpace> + : public DeepCopy<Experimental::SYCLDeviceUSMSpace, HostSpace, + ExecutionSpace> { + using DeepCopy<Experimental::SYCLDeviceUSMSpace, HostSpace, + ExecutionSpace>::DeepCopy; +}; + +template <class ExecutionSpace> +struct DeepCopy<HostSpace, Experimental::SYCLSharedUSMSpace, ExecutionSpace> + : public DeepCopy<HostSpace, Experimental::SYCLDeviceUSMSpace, + ExecutionSpace> { + using DeepCopy<HostSpace, Experimental::SYCLDeviceUSMSpace, + ExecutionSpace>::DeepCopy; +}; + } // namespace Impl } // namespace Kokkos #endif diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp index ef4f466b8a4ddc34464ab2c4c13db28e92391fb8..5a702b5027277cc7137cba9bba72e7367e9ae97b 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp @@ -44,27 +44,28 @@ #include <Kokkos_Concepts.hpp> #include <SYCL/Kokkos_SYCL_Instance.hpp> +#include <KokkosCore_Config_DeclareBackend.hpp> #include <Kokkos_SYCL.hpp> #include <Kokkos_HostSpace.hpp> #include <Kokkos_Serial.hpp> +#include <impl/Kokkos_ConcurrentBitset.hpp> #include <impl/Kokkos_Error.hpp> namespace Kokkos { namespace Experimental { namespace Impl { -int SYCLInternal::was_finalized = 0; +std::vector<std::optional<sycl::queue>*> SYCLInternal::all_queues; +std::mutex SYCLInternal::mutex; SYCLInternal::~SYCLInternal() { - if (m_scratchSpace || m_scratchFlags) { + if (!was_finalized || m_scratchSpace || m_scratchFlags || + m_scratchConcurrentBitset) { std::cerr << "Kokkos::Experimental::SYCL ERROR: Failed to call " "Kokkos::Experimental::SYCL::finalize()" << std::endl; std::cerr.flush(); } - - m_scratchSpace = nullptr; - m_scratchFlags = nullptr; } int SYCLInternal::verify_is_initialized(const char* const label) const { @@ -79,8 +80,26 @@ SYCLInternal& SYCLInternal::singleton() { return self; } -// FIME_SYCL -void SYCLInternal::initialize(const cl::sycl::device& d) { +void SYCLInternal::initialize(const sycl::device& d) { + auto exception_handler = [](sycl::exception_list exceptions) { + bool asynchronous_error = false; + for (std::exception_ptr const& e : exceptions) { + try { + std::rethrow_exception(e); + } catch (sycl::exception const& e) { + std::cerr << e.what() << '\n'; + asynchronous_error = true; + } + } + if (asynchronous_error) + Kokkos::Impl::throw_runtime_exception( + "There was an asynchronous SYCL error!\n"); + }; + initialize(sycl::queue{d, exception_handler}); +} + +// FIXME_SYCL +void SYCLInternal::initialize(const sycl::queue& q) { if (was_finalized) Kokkos::abort("Calling SYCL::initialize after SYCL::finalize is illegal\n"); @@ -96,9 +115,44 @@ void SYCLInternal::initialize(const cl::sycl::device& d) { const bool ok_init = nullptr == m_scratchSpace || nullptr == m_scratchFlags; const bool ok_dev = true; if (ok_init && ok_dev) { - m_queue = std::make_unique<cl::sycl::queue>(d); + m_queue = q; + // guard pushing to all_queues + { + std::lock_guard<std::mutex> lock(mutex); + all_queues.push_back(&m_queue); + } + const sycl::device& d = m_queue->get_device(); std::cout << SYCL::SYCLDevice(d) << '\n'; - m_indirectKernel.emplace(IndirectKernelAllocator(*m_queue)); + + m_maxWorkgroupSize = + d.template get_info<sycl::info::device::max_work_group_size>(); + // FIXME_SYCL this should give the correct value for NVIDIA GPUs + m_maxConcurrency = + m_maxWorkgroupSize * 2 * + d.template get_info<sycl::info::device::max_compute_units>(); + + // Setup concurent bitset for obtaining unique tokens from within an + // executing kernel. + { + const int32_t buffer_bound = + Kokkos::Impl::concurrent_bitset::buffer_bound(m_maxConcurrency); + using Record = Kokkos::Impl::SharedAllocationRecord< + Kokkos::Experimental::SYCLDeviceUSMSpace, void>; + Record* const r = + Record::allocate(Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue), + "Kokkos::SYCL::InternalScratchBitset", + sizeof(uint32_t) * buffer_bound); + Record::increment(r); + m_scratchConcurrentBitset = reinterpret_cast<uint32_t*>(r->data()); + auto event = m_queue->memset(m_scratchConcurrentBitset, 0, + sizeof(uint32_t) * buffer_bound); + fence(event); + } + + m_maxShmemPerBlock = + d.template get_info<sycl::info::device::local_mem_size>(); + m_indirectKernelMem.reset(*m_queue); + m_indirectReducerMem.reset(*m_queue); } else { std::ostringstream msg; msg << "Kokkos::Experimental::SYCL::initialize(...) FAILED"; @@ -112,16 +166,126 @@ void SYCLInternal::initialize(const cl::sycl::device& d) { void SYCLInternal::finalize() { SYCL().fence(); - was_finalized = 1; - if (nullptr != m_scratchSpace || nullptr != m_scratchFlags) { - // FIXME_SYCL - std::abort(); - } + was_finalized = true; + + using RecordSYCL = Kokkos::Impl::SharedAllocationRecord<SYCLDeviceUSMSpace>; + if (nullptr != m_scratchSpace) + RecordSYCL::decrement(RecordSYCL::get_record(m_scratchSpace)); + if (nullptr != m_scratchFlags) + RecordSYCL::decrement(RecordSYCL::get_record(m_scratchFlags)); + m_syclDev = -1; + m_scratchSpaceCount = 0; + m_scratchSpace = nullptr; + m_scratchFlagsCount = 0; + m_scratchFlags = nullptr; - m_indirectKernel.reset(); + RecordSYCL::decrement(RecordSYCL::get_record(m_scratchConcurrentBitset)); + m_scratchConcurrentBitset = nullptr; + + m_indirectKernelMem.reset(); + m_indirectReducerMem.reset(); + // guard erasing from all_queues + { + std::lock_guard<std::mutex> lock(mutex); + all_queues.erase(std::find(all_queues.begin(), all_queues.end(), &m_queue)); + } m_queue.reset(); } +void* SYCLInternal::scratch_space( + const Kokkos::Experimental::SYCL::size_type size) { + const size_type sizeScratchGrain = + sizeof(Kokkos::Experimental::SYCL::size_type); + if (verify_is_initialized("scratch_space") && + m_scratchSpaceCount * sizeScratchGrain < size) { + m_scratchSpaceCount = (size + sizeScratchGrain - 1) / sizeScratchGrain; + + using Record = Kokkos::Impl::SharedAllocationRecord< + Kokkos::Experimental::SYCLDeviceUSMSpace, void>; + + if (nullptr != m_scratchSpace) + Record::decrement(Record::get_record(m_scratchSpace)); + + Record* const r = + Record::allocate(Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue), + "Kokkos::SYCL::InternalScratchSpace", + (sizeScratchGrain * m_scratchSpaceCount)); + + Record::increment(r); + + m_scratchSpace = reinterpret_cast<size_type*>(r->data()); + } + + return m_scratchSpace; +} + +void* SYCLInternal::scratch_flags( + const Kokkos::Experimental::SYCL::size_type size) { + const size_type sizeScratchGrain = + sizeof(Kokkos::Experimental::SYCL::size_type); + if (verify_is_initialized("scratch_flags") && + m_scratchFlagsCount * sizeScratchGrain < size) { + m_scratchFlagsCount = (size + sizeScratchGrain - 1) / sizeScratchGrain; + + using Record = Kokkos::Impl::SharedAllocationRecord< + Kokkos::Experimental::SYCLDeviceUSMSpace, void>; + + if (nullptr != m_scratchFlags) + Record::decrement(Record::get_record(m_scratchFlags)); + + Record* const r = + Record::allocate(Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue), + "Kokkos::SYCL::InternalScratchFlags", + (sizeScratchGrain * m_scratchFlagsCount)); + + Record::increment(r); + + m_scratchFlags = reinterpret_cast<size_type*>(r->data()); + } + m_queue->memset(m_scratchFlags, 0, m_scratchFlagsCount * sizeScratchGrain); + fence(*m_queue); + + return m_scratchFlags; +} + +template <sycl::usm::alloc Kind> +size_t SYCLInternal::USMObjectMem<Kind>::reserve(size_t n) { + assert(m_size == 0); + assert(m_q); + + if (m_capacity < n) { + using Record = Kokkos::Impl::SharedAllocationRecord<AllocationSpace, void>; + // First free what we have (in case malloc can reuse it) + if (m_data) Record::decrement(Record::get_record(m_data)); + + Record* const r = Record::allocate(AllocationSpace(*m_q), + "Kokkos::SYCL::USMObjectMem", n); + Record::increment(r); + + m_data = r->data(); + m_capacity = n; + } + + return m_capacity; +} + +template <sycl::usm::alloc Kind> +void SYCLInternal::USMObjectMem<Kind>::reset() { + assert(m_size == 0); + + if (m_data) { + using Record = Kokkos::Impl::SharedAllocationRecord<AllocationSpace, void>; + Record::decrement(Record::get_record(m_data)); + + m_capacity = 0; + m_data = nullptr; + } + m_q.reset(); +} + +template class SYCLInternal::USMObjectMem<sycl::usm::alloc::shared>; +template class SYCLInternal::USMObjectMem<sycl::usm::alloc::device>; + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp index 254d37467966c31896d21b7bd7c4d2d2d4f1b53b..e797411cd40bdd734c04d2a9b0e51151fa269ebd 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp @@ -45,9 +45,11 @@ #ifndef KOKKOS_SYCL_INSTANCE_HPP_ #define KOKKOS_SYCL_INSTANCE_HPP_ -#include <memory> +#include <optional> #include <CL/sycl.hpp> +#include <impl/Kokkos_Error.hpp> + namespace Kokkos { namespace Experimental { namespace Impl { @@ -64,38 +66,273 @@ class SYCLInternal { SYCLInternal& operator=(SYCLInternal&&) = delete; SYCLInternal(SYCLInternal&&) = delete; - int m_syclDev = -1; - size_type* m_scratchSpace = nullptr; - size_type* m_scratchFlags = nullptr; + void* scratch_space(const size_type size); + void* scratch_flags(const size_type size); + + int m_syclDev = -1; + + size_t m_maxWorkgroupSize = 0; + uint32_t m_maxConcurrency = 0; + uint64_t m_maxShmemPerBlock = 0; + + uint32_t* m_scratchConcurrentBitset = nullptr; + size_type m_scratchSpaceCount = 0; + size_type* m_scratchSpace = nullptr; + size_type m_scratchFlagsCount = 0; + size_type* m_scratchFlags = nullptr; + + std::optional<sycl::queue> m_queue; + + // Using std::vector<std::optional<sycl::queue>> reveals a compiler bug when + // compiling for the CUDA backend. Storing pointers instead works around this. + static std::vector<std::optional<sycl::queue>*> all_queues; + // We need a mutex for thread safety when modifying all_queues. + static std::mutex mutex; + + // USMObjectMem is a reusable buffer for a single object + // in USM memory + template <sycl::usm::alloc Kind> + class USMObjectMem { + public: + class Deleter { + public: + Deleter() = default; + explicit Deleter(USMObjectMem* mem) : m_mem(mem) {} + + template <typename T> + void operator()(T* p) const noexcept { + assert(m_mem); + assert(sizeof(T) == m_mem->size()); + + if constexpr (sycl::usm::alloc::device == kind) + // Only skipping the dtor on trivially copyable types + static_assert(std::is_trivially_copyable_v<T>); + else + p->~T(); + + m_mem->m_size = 0; + } + + private: + USMObjectMem* m_mem = nullptr; + }; + + static constexpr sycl::usm::alloc kind = Kind; + + void reset(); + + void reset(sycl::queue q) { + reset(); + m_q.emplace(std::move(q)); + } + + USMObjectMem() = default; + explicit USMObjectMem(sycl::queue q) noexcept : m_q(std::move(q)) {} + + USMObjectMem(USMObjectMem const&) = delete; + USMObjectMem(USMObjectMem&&) = delete; + USMObjectMem& operator=(USMObjectMem&&) = delete; + USMObjectMem& operator=(USMObjectMem const&) = delete; + + ~USMObjectMem() { reset(); }; + + void* data() noexcept { return m_data; } + const void* data() const noexcept { return m_data; } + + size_t size() const noexcept { return m_size; } + size_t capacity() const noexcept { return m_capacity; } + + // reserve() allocates space for at least n bytes + // returns the new capacity + size_t reserve(size_t n); + + private: + using AllocationSpace = + std::conditional_t<Kind == sycl::usm::alloc::device, + Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCLSharedUSMSpace>; + + // This will memcpy an object T into memory held by this object + // returns: a T* to that object + // + // Note: it is UB to dereference this pointer with an object that is + // not an implicit-lifetime nor trivially-copyable type, but presumably much + // faster because we can use USM device memory + template <typename T> + std::unique_ptr<T, Deleter> memcpy_from(const T& t) { + reserve(sizeof(T)); + sycl::event memcopied = m_q->memcpy(m_data, std::addressof(t), sizeof(T)); + fence(memcopied); + + std::unique_ptr<T, Deleter> ptr(reinterpret_cast<T*>(m_data), + Deleter(this)); + m_size = sizeof(T); + return ptr; + } + + // This will copy-constuct an object T into memory held by this object + // returns: a unique_ptr<T, destruct_delete> that will call the + // destructor on the type when it goes out of scope. + // + // Note: This will not work with USM device memory + template <typename T> + std::unique_ptr<T, Deleter> copy_construct_from(const T& t) { + static_assert(kind != sycl::usm::alloc::device, + "Cannot copy construct into USM device memory"); + + reserve(sizeof(T)); + + std::unique_ptr<T, Deleter> ptr(new (m_data) T(t), Deleter(this)); + m_size = sizeof(T); + return ptr; + } + + public: + // Performs either memcpy (for USM device memory) and returns a T* + // (but is technically UB when dereferenced on an object that is not + // an implicit-lifetime nor trivially-copyable type + // + // or + // + // performs copy construction (for other USM memory types) and returns a + // unique_ptr<T, ...> + template <typename T> + std::unique_ptr<T, Deleter> copy_from(const T& t) { + if constexpr (sycl::usm::alloc::device == kind) + return memcpy_from(t); + else + return copy_construct_from(t); + } - std::unique_ptr<cl::sycl::queue> m_queue; + private: + // Returns a reference to t (helpful when debugging) + template <typename T> + T& memcpy_to(T& t) { + assert(sizeof(T) == m_size); + + sycl::event memcopied = m_q->memcpy(std::addressof(t), m_data, sizeof(T)); + fence(memcopied); + + return t; + } + + // Returns a reference to t (helpful when debugging) + template <typename T> + T& move_assign_to(T& t) { + static_assert(kind != sycl::usm::alloc::device, + "Cannot move_assign_to from USM device memory"); + + assert(sizeof(T) == m_size); + + t = std::move(*static_cast<T*>(m_data)); + + return t; + } + + public: + // Returns a reference to t (helpful when debugging) + template <typename T> + T& transfer_to(T& t) { + if constexpr (sycl::usm::alloc::device == kind) + return memcpy_to(t); + else + return move_assign_to(t); + } + + private: + // USMObjectMem class invariants + // All four expressions below must evaluate to true: + // + // !m_data == !m_capacity + // m_q || !m_data + // m_data || !m_size + // m_size <= m_capacity + // + // The above invariants mean that: + // if m_size != 0 then m_data != 0 + // if m_data != 0 then m_capacity != 0 && m_q != nullopt + // if m_data == 0 then m_capacity == 0 + + std::optional<sycl::queue> m_q; + void* m_data = nullptr; + size_t m_size = 0; // sizeof(T) iff m_data points to live T + size_t m_capacity = 0; + }; // An indirect kernel is one where the functor to be executed is explicitly - // created in USM shared memory before being executed, to get around the + // copied to USM device memory before being executed, to get around the // trivially copyable limitation of SYCL. - // - // m_indirectKernel just manages the memory as a reuseable buffer. It is - // stored in an optional because the allocator contains a queue - using IndirectKernelAllocator = - sycl::usm_allocator<std::byte, sycl::usm::alloc::shared>; - using IndirectKernelMemory = - std::vector<IndirectKernelAllocator::value_type, IndirectKernelAllocator>; - using IndirectKernel = std::optional<IndirectKernelMemory>; - IndirectKernel m_indirectKernel; - - static int was_finalized; + using IndirectKernelMem = USMObjectMem<sycl::usm::alloc::shared>; + IndirectKernelMem m_indirectKernelMem; + + using IndirectReducerMem = USMObjectMem<sycl::usm::alloc::shared>; + IndirectReducerMem m_indirectReducerMem; + + bool was_finalized = false; static SYCLInternal& singleton(); int verify_is_initialized(const char* const label) const; - void initialize(const cl::sycl::device& d); + void initialize(const sycl::device& d); + + void initialize(const sycl::queue& q); - int is_initialized() const { return m_queue != nullptr; } + int is_initialized() const { return m_queue.has_value(); } void finalize(); + + private: + // fence(...) takes any type with a .wait_and_throw() method + // (sycl::event and sycl::queue) + template <typename WAT> + static void fence_helper(WAT& wat) { + try { + wat.wait_and_throw(); + } catch (sycl::exception const& e) { + Kokkos::Impl::throw_runtime_exception( + std::string("There was a synchronous SYCL error:\n") += e.what()); + } + } + + public: + static void fence(sycl::queue& q) { fence_helper(q); } + static void fence(sycl::event& e) { fence_helper(e); } +}; + +template <typename Functor, typename Storage, + bool is_memcpyable = std::is_trivially_copyable_v<Functor>> +class SYCLFunctionWrapper; + +template <typename Functor, typename Storage> +class SYCLFunctionWrapper<Functor, Storage, true> { + const Functor& m_functor; + + public: + SYCLFunctionWrapper(const Functor& functor, Storage&) : m_functor(functor) {} + + const Functor& get_functor() const { return m_functor; } +}; + +template <typename Functor, typename Storage> +class SYCLFunctionWrapper<Functor, Storage, false> { + std::unique_ptr<Functor, + Experimental::Impl::SYCLInternal::IndirectKernelMem::Deleter> + m_kernelFunctorPtr; + + public: + SYCLFunctionWrapper(const Functor& functor, Storage& storage) + : m_kernelFunctorPtr(storage.copy_from(functor)) {} + + std::reference_wrapper<const Functor> get_functor() const { + return {*m_kernelFunctorPtr}; + } }; +template <typename Functor, typename Storage> +auto make_sycl_function_wrapper(const Functor& functor, Storage& storage) { + return SYCLFunctionWrapper<Functor, Storage>(functor, storage); +} } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_MDRangePolicy.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_MDRangePolicy.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3e90ec1fb50b21e92f4f2ce589f98e2e755967ea --- /dev/null +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_MDRangePolicy.hpp @@ -0,0 +1,37 @@ +#ifndef KOKKOS_SYCL_MDRANGEPOLICY_HPP_ +#define KOKKOS_SYCL_MDRANGEPOLICY_HPP_ + +#include <KokkosExp_MDRangePolicy.hpp> + +namespace Kokkos { + +template <> +struct default_outer_direction<Kokkos::Experimental::SYCL> { + using type = Iterate; + static constexpr Iterate value = Iterate::Left; +}; + +template <> +struct default_inner_direction<Kokkos::Experimental::SYCL> { + using type = Iterate; + static constexpr Iterate value = Iterate::Left; +}; + +namespace Impl { + +// Settings for MDRangePolicy +template <> +inline TileSizeProperties get_tile_size_properties<Kokkos::Experimental::SYCL>( + const Kokkos::Experimental::SYCL& space) { + TileSizeProperties properties; + properties.max_threads = + space.impl_internal_space_instance()->m_maxWorkgroupSize; + properties.default_largest_tile_size = 16; + properties.default_tile_size = 2; + properties.max_total_tile_size = properties.max_threads; + return properties; +} + +} // Namespace Impl +} // Namespace Kokkos +#endif diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp index ba42c36d39a277d47b79bb3dab3775b6f9cd156a..a286169c45988339dce1b14c6d6a4ffde25dcea5 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Range.hpp @@ -45,6 +45,8 @@ #ifndef KOKKOS_SYCL_PARALLEL_RANGE_HPP_ #define KOKKOS_SYCL_PARALLEL_RANGE_HPP_ +#include <impl/KokkosExp_IterateTileGPU.hpp> + template <class FunctorType, class ExecPolicy> class Kokkos::Impl::ParallelFor<FunctorType, ExecPolicy, Kokkos::Experimental::SYCL> { @@ -59,27 +61,22 @@ class Kokkos::Impl::ParallelFor<FunctorType, ExecPolicy, const FunctorType m_functor; const Policy m_policy; - private: - ParallelFor() = delete; - ParallelFor& operator=(const ParallelFor&) = delete; - template <typename Functor> static void sycl_direct_launch(const Policy& policy, const Functor& functor) { // Convenience references const Kokkos::Experimental::SYCL& space = policy.space(); Kokkos::Experimental::Impl::SYCLInternal& instance = *space.impl_internal_space_instance(); - cl::sycl::queue& q = *instance.m_queue; + sycl::queue& q = *instance.m_queue; - q.wait(); + space.fence(); - q.submit([functor, policy](cl::sycl::handler& cgh) { - cl::sycl::range<1> range(policy.end() - policy.begin()); + q.submit([functor, policy](sycl::handler& cgh) { + sycl::range<1> range(policy.end() - policy.begin()); + const auto begin = policy.begin(); - cgh.parallel_for(range, [=](cl::sycl::item<1> item) { - const typename Policy::index_type id = - static_cast<typename Policy::index_type>(item.get_linear_id()) + - policy.begin(); + cgh.parallel_for(range, [=](sycl::item<1> item) { + const typename Policy::index_type id = item.get_linear_id() + begin; if constexpr (std::is_same<WorkTag, void>::value) functor(id); else @@ -87,47 +84,188 @@ class Kokkos::Impl::ParallelFor<FunctorType, ExecPolicy, }); }); - q.wait(); + space.fence(); + } + + public: + using functor_type = FunctorType; + + void execute() const { + if (m_policy.begin() == m_policy.end()) return; + + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& + indirectKernelMem = m_policy.space() + .impl_internal_space_instance() + ->m_indirectKernelMem; + + const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_functor, indirectKernelMem); + sycl_direct_launch(m_policy, functor_wrapper.get_functor()); + } + + ParallelFor(const ParallelFor&) = delete; + ParallelFor(ParallelFor&&) = delete; + ParallelFor& operator=(const ParallelFor&) = delete; + ParallelFor& operator=(ParallelFor&&) = delete; + ~ParallelFor() = default; + + ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +// ParallelFor +template <class FunctorType, class... Traits> +class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>, + Kokkos::Experimental::SYCL> { + public: + using Policy = Kokkos::MDRangePolicy<Traits...>; + + private: + using array_index_type = typename Policy::array_index_type; + using index_type = typename Policy::index_type; + using LaunchBounds = typename Policy::launch_bounds; + using WorkTag = typename Policy::work_tag; + + const FunctorType m_functor; + // MDRangePolicy is not trivially copyable. Hence, replicate the data we + // really need in DeviceIterateTile in a trivially copyable struct. + const struct BarePolicy { + using index_type = typename Policy::index_type; + + BarePolicy(const Policy& policy) + : m_lower(policy.m_lower), + m_upper(policy.m_upper), + m_tile(policy.m_tile), + m_tile_end(policy.m_tile_end), + m_num_tiles(policy.m_num_tiles) {} + + const typename Policy::point_type m_lower; + const typename Policy::point_type m_upper; + const typename Policy::tile_type m_tile; + const typename Policy::point_type m_tile_end; + const typename Policy::index_type m_num_tiles; + static constexpr Iterate inner_direction = Policy::inner_direction; + } m_policy; + const Kokkos::Experimental::SYCL& m_space; + + sycl::nd_range<3> compute_ranges() const { + const auto& m_tile = m_policy.m_tile; + const auto& m_tile_end = m_policy.m_tile_end; + + if constexpr (Policy::rank == 2) { + sycl::range<3> local_sizes(m_tile[0], m_tile[1], 1); + sycl::range<3> global_sizes(m_tile_end[0] * m_tile[0], + m_tile_end[1] * m_tile[1], 1); + return {global_sizes, local_sizes}; + } + if constexpr (Policy::rank == 3) { + sycl::range<3> local_sizes(m_tile[0], m_tile[1], m_tile[2]); + sycl::range<3> global_sizes(m_tile_end[0] * m_tile[0], + m_tile_end[1] * m_tile[1], + m_tile_end[2] * m_tile[2]); + return {global_sizes, local_sizes}; + } + if constexpr (Policy::rank == 4) { + // id0,id1 encoded within first index; id2 to second index; id3 to third + // index + sycl::range<3> local_sizes(m_tile[0] * m_tile[1], m_tile[2], m_tile[3]); + sycl::range<3> global_sizes( + m_tile_end[0] * m_tile[0] * m_tile_end[1] * m_tile[1], + m_tile_end[2] * m_tile[2], m_tile_end[3] * m_tile[3]); + return {global_sizes, local_sizes}; + } + if constexpr (Policy::rank == 5) { + // id0,id1 encoded within first index; id2,id3 to second index; id4 to + // third index + sycl::range<3> local_sizes(m_tile[0] * m_tile[1], m_tile[2] * m_tile[3], + m_tile[4]); + sycl::range<3> global_sizes( + m_tile_end[0] * m_tile[0] * m_tile_end[1] * m_tile[1], + m_tile_end[2] * m_tile[2] * m_tile_end[3] * m_tile[3], + m_tile_end[4] * m_tile[4]); + return {global_sizes, local_sizes}; + } + if constexpr (Policy::rank == 6) { + // id0,id1 encoded within first index; id2,id3 to second index; id4,id5 to + // third index + sycl::range<3> local_sizes(m_tile[0] * m_tile[1], m_tile[2] * m_tile[3], + m_tile[4] * m_tile[5]); + sycl::range<3> global_sizes( + m_tile_end[0] * m_tile[0] * m_tile_end[1] * m_tile[1], + m_tile_end[2] * m_tile[2] * m_tile_end[3] * m_tile[3], + m_tile_end[4] * m_tile[4] * m_tile_end[5] * m_tile[5]); + return {global_sizes, local_sizes}; + } + static_assert(Policy::rank > 1 && Policy::rank < 7, + "Kokkos::MDRange Error: Exceeded rank bounds with SYCL\n"); } - // Indirectly launch a functor by explicitly creating it in USM shared memory - void sycl_indirect_launch() const { + template <typename Functor> + void sycl_direct_launch(const Functor& functor) const { // Convenience references - const Kokkos::Experimental::SYCL& space = m_policy.space(); Kokkos::Experimental::Impl::SYCLInternal& instance = - *space.impl_internal_space_instance(); - Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMemory& kernelMem = - *instance.m_indirectKernel; + *m_space.impl_internal_space_instance(); + sycl::queue& q = *instance.m_queue; + + m_space.fence(); - // Allocate USM shared memory for the functor - kernelMem.resize(std::max(kernelMem.size(), sizeof(m_functor))); + if (m_policy.m_num_tiles == 0) return; - // Placement new a copy of functor into USM shared memory - // - // Store it in a unique_ptr to call its destructor on scope exit - std::unique_ptr<FunctorType, Kokkos::Impl::destruct_delete> - kernelFunctorPtr(new (kernelMem.data()) FunctorType(m_functor)); + const BarePolicy bare_policy(m_policy); - // Use reference_wrapper (because it is both trivially copyable and - // invocable) and launch it - sycl_direct_launch(m_policy, std::reference_wrapper(*kernelFunctorPtr)); + q.submit([functor, this, bare_policy](sycl::handler& cgh) { + const auto range = compute_ranges(); + + cgh.parallel_for(range, [functor, bare_policy](sycl::nd_item<3> item) { + const index_type local_x = item.get_local_id(0); + const index_type local_y = item.get_local_id(1); + const index_type local_z = item.get_local_id(2); + const index_type global_x = item.get_group(0); + const index_type global_y = item.get_group(1); + const index_type global_z = item.get_group(2); + const index_type n_global_x = item.get_group_range(0); + const index_type n_global_y = item.get_group_range(1); + const index_type n_global_z = item.get_group_range(2); + + Kokkos::Impl::DeviceIterateTile<Policy::rank, BarePolicy, Functor, + typename Policy::work_tag>( + bare_policy, functor, {n_global_x, n_global_y, n_global_z}, + {global_x, global_y, global_z}, {local_x, local_y, local_z}) + .exec_range(); + }); + }); + + m_space.fence(); } public: using functor_type = FunctorType; + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy& policy, const Functor&) { + return policy.space().impl_internal_space_instance()->m_maxWorkgroupSize; + } + void execute() const { - // if the functor is trivially copyable, we can launch it directly; - // otherwise, we will launch it indirectly via explicitly creating - // it in USM shared memory. - if constexpr (std::is_trivially_copyable_v<decltype(m_functor)>) - sycl_direct_launch(m_policy, m_functor); - else - sycl_indirect_launch(); + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& + indirectKernelMem = + m_space.impl_internal_space_instance()->m_indirectKernelMem; + + const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_functor, indirectKernelMem); + sycl_direct_launch(functor_wrapper.get_functor()); } + ParallelFor(const ParallelFor&) = delete; + ParallelFor(ParallelFor&&) = delete; + ParallelFor& operator=(const ParallelFor&) = delete; + ParallelFor& operator=(ParallelFor&&) = delete; + ~ParallelFor() = default; + ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} + : m_functor(arg_functor), + m_policy(arg_policy), + m_space(arg_policy.space()) {} }; #endif // KOKKOS_SYCL_PARALLEL_RANGE_HPP_ diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp index ac5e8154f30ad1cb093e652e704454dee3b751b1..03b7753f8e81ef5045b16cedd4206d85174c0033 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp @@ -69,14 +69,6 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, using reference_type = typename Analysis::reference_type; using WorkTag = typename Policy::work_tag; - using ReducerConditional = - Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, - FunctorType, ReducerType>; - using WorkTagFwd = - std::conditional_t<std::is_same<InvalidType, ReducerType>::value, WorkTag, - void>; - using ValueInit = - typename Kokkos::Impl::FunctorValueInit<FunctorType, WorkTagFwd>; public: // V - View @@ -94,198 +86,503 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType, m_result_ptr(reducer.view().data()) {} private: - template <typename TagType> - std::enable_if_t<std::is_void<TagType>::value> exec(reference_type update) { - using member_type = typename Policy::member_type; - member_type e = m_policy.end(); - for (member_type i = m_policy.begin(); i < e; ++i) m_functor(i, update); + template <typename PolicyType, typename Functor, typename Reducer> + void sycl_direct_launch(const PolicyType& policy, const Functor& functor, + const Reducer& reducer) const { + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>; + using ValueInit = + Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; + using ValueJoin = + Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; + using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; + + auto selected_reducer = ReducerConditional::select(functor, reducer); + + // Convenience references + const Kokkos::Experimental::SYCL& space = policy.space(); + Kokkos::Experimental::Impl::SYCLInternal& instance = + *space.impl_internal_space_instance(); + sycl::queue& q = *instance.m_queue; + + // FIXME_SYCL optimize + constexpr size_t wgroup_size = 128; + constexpr size_t values_per_thread = 2; + std::size_t size = policy.end() - policy.begin(); + const auto init_size = std::max<std::size_t>( + ((size + values_per_thread - 1) / values_per_thread + wgroup_size - 1) / + wgroup_size, + 1); + const unsigned int value_count = + FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count( + selected_reducer); + // FIXME_SYCL only use the first half + const auto results_ptr = static_cast<pointer_type>(instance.scratch_space( + sizeof(value_type) * std::max(value_count, 1u) * init_size * 2)); + // FIXME_SYCL without this we are running into a race condition + const auto results_ptr2 = + results_ptr + std::max(value_count, 1u) * init_size; + + // If size<=1 we only call init(), the functor and possibly final once + // working with the global scratch memory but don't copy back to + // m_result_ptr yet. + if (size <= 1) { + q.submit([&](sycl::handler& cgh) { + const auto begin = policy.begin(); + cgh.single_task([=]() { + const auto& selected_reducer = ReducerConditional::select( + static_cast<const FunctorType&>(functor), + static_cast<const ReducerType&>(reducer)); + reference_type update = + ValueInit::init(selected_reducer, results_ptr); + if (size == 1) { + if constexpr (std::is_same<WorkTag, void>::value) + functor(begin, update); + else + functor(WorkTag(), begin, update); + } + if constexpr (ReduceFunctorHasFinal<FunctorType>::value) + FunctorFinal<FunctorType, WorkTag>::final( + static_cast<const FunctorType&>(functor), results_ptr); + }); + }); + space.fence(); + } + + // Otherwise, we perform a reduction on the values in all workgroups + // separately, write the workgroup results back to global memory and recurse + // until only one workgroup does the reduction and thus gets the final + // value. + bool first_run = true; + while (size > 1) { + auto n_wgroups = ((size + values_per_thread - 1) / values_per_thread + + wgroup_size - 1) / + wgroup_size; + q.submit([&](sycl::handler& cgh) { + sycl::accessor<value_type, 1, sycl::access::mode::read_write, + sycl::access::target::local> + local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u), + cgh); + const auto begin = policy.begin(); + + cgh.parallel_for( + sycl::nd_range<1>(n_wgroups * wgroup_size, wgroup_size), + [=](sycl::nd_item<1> item) { + const auto local_id = item.get_local_linear_id(); + const auto global_id = + wgroup_size * item.get_group_linear_id() * values_per_thread + + local_id; + const auto& selected_reducer = ReducerConditional::select( + static_cast<const FunctorType&>(functor), + static_cast<const ReducerType&>(reducer)); + + // In the first iteration, we call functor to initialize the local + // memory. Otherwise, the local memory is initialized with the + // results from the previous iteration that are stored in global + // memory. Note that we load values_per_thread values per thread + // and immediately combine them to avoid too many threads being + // idle in the actual workgroup reduction. + using index_type = typename Policy::index_type; + const auto upper_bound = std::min<index_type>( + global_id + values_per_thread * wgroup_size, size); + if (first_run) { + reference_type update = ValueInit::init( + selected_reducer, &local_mem[local_id * value_count]); + for (index_type id = global_id; id < upper_bound; + id += wgroup_size) { + if constexpr (std::is_same<WorkTag, void>::value) + functor(id + begin, update); + else + functor(WorkTag(), id + begin, update); + } + } else { + if (global_id >= size) + ValueInit::init(selected_reducer, + &local_mem[local_id * value_count]); + else { + ValueOps::copy(functor, &local_mem[local_id * value_count], + &results_ptr[global_id * value_count]); + for (index_type id = global_id + wgroup_size; + id < upper_bound; id += wgroup_size) { + ValueJoin::join(selected_reducer, + &local_mem[local_id * value_count], + &results_ptr[id * value_count]); + } + } + } + item.barrier(sycl::access::fence_space::local_space); + + // Perform the actual workgroup reduction. To achieve a better + // memory access pattern, we use sequential addressing and a + // reversed loop. If the workgroup size is 8, the first element + // contains all the values with index%4==0, after the second one + // the values with index%2==0 and after the third one index%1==0, + // i.e., all values. + for (unsigned int stride = wgroup_size / 2; stride > 0; + stride >>= 1) { + const auto idx = local_id; + if (idx < stride) { + ValueJoin::join(selected_reducer, + &local_mem[idx * value_count], + &local_mem[(idx + stride) * value_count]); + } + item.barrier(sycl::access::fence_space::local_space); + } + + // Finally, we copy the workgroup results back to global memory to + // be used in the next iteration. If this is the last iteration, + // i.e., there is only one workgroup also call final() if + // necessary. + if (local_id == 0) { + ValueOps::copy( + functor, + &results_ptr2[(item.get_group_linear_id()) * value_count], + &local_mem[0]); + if constexpr (ReduceFunctorHasFinal<FunctorType>::value) + if (n_wgroups <= 1) + FunctorFinal<FunctorType, WorkTag>::final( + static_cast<const FunctorType&>(functor), + &results_ptr2[(item.get_group_linear_id()) * + value_count]); + } + }); + }); + space.fence(); + + // FIXME_SYCL this is likely not necessary, see above + Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCLDeviceUSMSpace>( + space, results_ptr, results_ptr2, + sizeof(*m_result_ptr) * value_count * n_wgroups); + space.fence(); + + first_run = false; + size = n_wgroups; + } + + // At this point, the reduced value is written to the entry in results_ptr + // and all that is left is to copy it back to the given result pointer if + // necessary. + if (m_result_ptr) { + Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCLDeviceUSMSpace>( + space, m_result_ptr, results_ptr, + sizeof(*m_result_ptr) * value_count); + space.fence(); + } } - template <typename TagType> - std::enable_if_t<!std::is_void<TagType>::value> exec(reference_type update) { - using member_type = typename Policy::member_type; - member_type e = m_policy.end(); - for (member_type i = m_policy.begin(); i < e; ++i) - m_functor(TagType{}, i, update); + public: + void execute() const { + Kokkos::Experimental::Impl::SYCLInternal& instance = + *m_policy.space().impl_internal_space_instance(); + using IndirectKernelMem = + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; + IndirectKernelMem& indirectKernelMem = instance.m_indirectKernelMem; + IndirectKernelMem& indirectReducerMem = instance.m_indirectReducerMem; + + const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_functor, indirectKernelMem); + const auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_reducer, indirectReducerMem); + + sycl_direct_launch(m_policy, functor_wrapper.get_functor(), + reducer_wrapper.get_functor()); } - template <typename T> - struct ExtendedReferenceWrapper : std::reference_wrapper<T> { - using std::reference_wrapper<T>::reference_wrapper; + private: + FunctorType m_functor; + Policy m_policy; + ReducerType m_reducer; + pointer_type m_result_ptr; +}; - using value_type = typename FunctorValueTraits<T, WorkTag>::value_type; +template <class FunctorType, class ReducerType, class... Traits> +class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType, + Kokkos::Experimental::SYCL> { + public: + using Policy = Kokkos::MDRangePolicy<Traits...>; - template <typename Dummy = T> - std::enable_if_t<std::is_same_v<Dummy, T> && - ReduceFunctorHasInit<Dummy>::value> - init(value_type& old_value, const value_type& new_value) const { - return this->get().init(old_value, new_value); - } + private: + using Analysis = + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; + using execution_space = typename Analysis::execution_space; + using value_type = typename Analysis::value_type; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; - template <typename Dummy = T> - std::enable_if_t<std::is_same_v<Dummy, T> && - ReduceFunctorHasJoin<Dummy>::value> - join(value_type& old_value, const value_type& new_value) const { - return this->get().join(old_value, new_value); - } + using WorkTag = typename Policy::work_tag; - template <typename Dummy = T> - std::enable_if_t<std::is_same_v<Dummy, T> && - ReduceFunctorHasFinal<Dummy>::value> - final(value_type& old_value) const { - return this->get().final(old_value); - } + // MDRangePolicy is not trivially copyable. Hence, replicate the data we + // really need in DeviceIterateTile in a trivially copyable struct. + struct BarePolicy { + using index_type = typename Policy::index_type; + + BarePolicy(const Policy& policy) + : m_lower(policy.m_lower), + m_upper(policy.m_upper), + m_tile(policy.m_tile), + m_tile_end(policy.m_tile_end), + m_num_tiles(policy.m_num_tiles), + m_prod_tile_dims(policy.m_prod_tile_dims) {} + + const typename Policy::point_type m_lower; + const typename Policy::point_type m_upper; + const typename Policy::tile_type m_tile; + const typename Policy::point_type m_tile_end; + const typename Policy::index_type m_num_tiles; + const typename Policy::index_type m_prod_tile_dims; + static constexpr Iterate inner_direction = Policy::inner_direction; + static constexpr int rank = Policy::rank; }; - template <typename PolicyType, typename Functor> - void sycl_direct_launch(const PolicyType& policy, - const Functor& functor) const { - // Convenience references - const Kokkos::Experimental::SYCL& space = policy.space(); - Kokkos::Experimental::Impl::SYCLInternal& instance = - *space.impl_internal_space_instance(); - cl::sycl::queue& q = *instance.m_queue; - - auto result_ptr = static_cast<pointer_type>( - sycl::malloc(sizeof(*m_result_ptr), q, sycl::usm::alloc::shared)); - - value_type identity{}; - if constexpr (!std::is_same<ReducerType, InvalidType>::value) - m_reducer.init(identity); - - *result_ptr = identity; - if constexpr (ReduceFunctorHasInit<Functor>::value) - ValueInit::init(functor, result_ptr); - - q.submit([&](cl::sycl::handler& cgh) { - // FIXME_SYCL a local size larger than 1 doesn't work for all cases - cl::sycl::nd_range<1> range(policy.end() - policy.begin(), 1); - - const auto reduction = [&]() { - if constexpr (!std::is_same<ReducerType, InvalidType>::value) { - return cl::sycl::ONEAPI::reduction( - result_ptr, identity, - [this](value_type& old_value, const value_type& new_value) { - m_reducer.join(old_value, new_value); - return old_value; - }); - } else { - if constexpr (ReduceFunctorHasJoin<Functor>::value) { - return cl::sycl::ONEAPI::reduction( - result_ptr, identity, - [functor](value_type& old_value, const value_type& new_value) { - functor.join(old_value, new_value); - return old_value; - }); - } else { - return cl::sycl::ONEAPI::reduction(result_ptr, identity, - std::plus<>()); - } - } - }(); - - cgh.parallel_for(range, reduction, - [=](cl::sycl::nd_item<1> item, auto& sum) { - const typename Policy::index_type id = - static_cast<typename Policy::index_type>( - item.get_global_id(0)) + - policy.begin(); - value_type partial = identity; - if constexpr (std::is_same<WorkTag, void>::value) - functor(id, partial); - else - functor(WorkTag(), id, partial); - sum.combine(partial); - }); - }); - - q.wait(); - - static_assert(ReduceFunctorHasFinal<Functor>::value == - ReduceFunctorHasFinal<FunctorType>::value); - static_assert(ReduceFunctorHasJoin<Functor>::value == - ReduceFunctorHasJoin<FunctorType>::value); - - if constexpr (ReduceFunctorHasFinal<Functor>::value) - FunctorFinal<Functor, WorkTag>::final(functor, result_ptr); - else - *m_result_ptr = *result_ptr; - - sycl::free(result_ptr, q); - } + public: + // V - View + template <typename V> + ParallelReduce( + const FunctorType& f, const Policy& p, const V& v, + typename std::enable_if<Kokkos::is_view<V>::value, void*>::type = nullptr) + : m_functor(f), m_policy(p), m_space(p.space()), m_result_ptr(v.data()) {} + + ParallelReduce(const FunctorType& f, const Policy& p, + const ReducerType& reducer) + : m_functor(f), + m_policy(p), + m_space(p.space()), + m_reducer(reducer), + m_result_ptr(reducer.view().data()) {} + + private: + template <typename PolicyType, typename Functor, typename Reducer> + void sycl_direct_launch(const PolicyType& policy, const Functor& functor, + const Reducer& reducer) const { + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>; + using ValueInit = + Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; + using ValueJoin = + Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; + using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; - template <typename Functor> - void sycl_indirect_launch(const Functor& functor) const { // Convenience references - const Kokkos::Experimental::SYCL& space = m_policy.space(); Kokkos::Experimental::Impl::SYCLInternal& instance = - *space.impl_internal_space_instance(); - Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMemory& kernelMem = - *instance.m_indirectKernel; + *m_space.impl_internal_space_instance(); + sycl::queue& q = *instance.m_queue; + + const int nwork = m_policy.m_num_tiles; + const int block_size = + std::pow(2, std::ceil(std::log2(m_policy.m_prod_tile_dims))); + + const sycl::range<1> local_range(block_size); + // REMEMBER swap local x<->y to be conforming with Cuda/HIP implementation + const sycl::range<1> global_range(nwork * block_size); + const sycl::nd_range<1> range{global_range, local_range}; + + const size_t wgroup_size = range.get_local_range().size(); + size_t size = range.get_global_range().size(); + const auto init_size = + std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1); + const auto& selected_reducer = ReducerConditional::select(functor, reducer); + const unsigned int value_count = + FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count( + selected_reducer); + // FIXME_SYCL only use the first half + const auto results_ptr = static_cast<pointer_type>(instance.scratch_space( + sizeof(value_type) * std::max(value_count, 1u) * init_size * 2)); + // FIXME_SYCL without this we are running into a race condition + const auto results_ptr2 = + results_ptr + std::max(value_count, 1u) * init_size; + + // If size<=1 we only call init(), the functor and possibly final once + // working with the global scratch memory but don't copy back to + // m_result_ptr yet. + if (size <= 1) { + q.submit([&](sycl::handler& cgh) { + cgh.single_task([=]() { + const auto& selected_reducer = ReducerConditional::select( + static_cast<const FunctorType&>(functor), + static_cast<const ReducerType&>(reducer)); + reference_type update = + ValueInit::init(selected_reducer, results_ptr); + if (size == 1) { + Kokkos::Impl::Reduce::DeviceIterateTile< + Policy::rank, BarePolicy, Functor, typename Policy::work_tag, + reference_type>(policy, functor, update, {1, 1, 1}, {0, 0, 0}, + {0, 0, 0}) + .exec_range(); + } + if constexpr (ReduceFunctorHasFinal<FunctorType>::value) + FunctorFinal<FunctorType, WorkTag>::final( + static_cast<const FunctorType&>(functor), results_ptr); + }); + }); + m_space.fence(); + } - // Allocate USM shared memory for the functor - kernelMem.resize(std::max(kernelMem.size(), sizeof(functor))); + // Otherwise, we perform a reduction on the values in all workgroups + // separately, write the workgroup results back to global memory and recurse + // until only one workgroup does the reduction and thus gets the final + // value. + bool first_run = true; + while (size > 1) { + auto n_wgroups = (size + wgroup_size - 1) / wgroup_size; + q.submit([&](sycl::handler& cgh) { + sycl::accessor<value_type, 1, sycl::access::mode::read_write, + sycl::access::target::local> + local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u), + cgh); + + const BarePolicy bare_policy = m_policy; + + cgh.parallel_for(range, [=](sycl::nd_item<1> item) { + const auto local_id = item.get_local_linear_id(); + const auto global_id = + wgroup_size * item.get_group_linear_id() + local_id; + const auto& selected_reducer = ReducerConditional::select( + static_cast<const FunctorType&>(functor), + static_cast<const ReducerType&>(reducer)); + + // In the first iteration, we call functor to initialize the local + // memory. Otherwise, the local memory is initialized with the + // results from the previous iteration that are stored in global + // memory. + using index_type = typename Policy::index_type; + const auto upper_bound = + std::min<index_type>(global_id + wgroup_size, size); + if (first_run) { + reference_type update = ValueInit::init( + selected_reducer, &local_mem[local_id * value_count]); + + // SWAPPED here to be conforming with CUDA implementation + const index_type local_x = 0; + const index_type local_y = item.get_local_id(0); + const index_type local_z = 0; + const index_type global_x = item.get_group(0); + const index_type global_y = 0; + const index_type global_z = 0; + const index_type n_global_x = item.get_group_range(0); + const index_type n_global_y = 1; + const index_type n_global_z = 1; + + Kokkos::Impl::Reduce::DeviceIterateTile< + Policy::rank, BarePolicy, Functor, typename Policy::work_tag, + reference_type>(bare_policy, functor, update, + {n_global_x, n_global_y, n_global_z}, + {global_x, global_y, global_z}, + {local_x, local_y, local_z}) + .exec_range(); + } else { + if (global_id >= size) + ValueInit::init(selected_reducer, + &local_mem[local_id * value_count]); + else { + ValueOps::copy(functor, &local_mem[local_id * value_count], + &results_ptr[global_id * value_count]); + for (index_type id = global_id + wgroup_size; id < upper_bound; + id += wgroup_size) { + ValueJoin::join(selected_reducer, + &local_mem[local_id * value_count], + &results_ptr[id * value_count]); + } + } + } + item.barrier(sycl::access::fence_space::local_space); + + // Perform the actual workgroup reduction. To achieve a better + // memory access pattern, we use sequential addressing and a + // reversed loop. If the workgroup size is 8, the first element + // contains all the values with index%4==0, after the second one + // the values with index%2==0 and after the third one index%1==0, + // i.e., all values. + for (unsigned int stride = wgroup_size / 2; stride > 0; + stride >>= 1) { + const auto idx = local_id; + if (idx < stride) { + ValueJoin::join(selected_reducer, &local_mem[idx * value_count], + &local_mem[(idx + stride) * value_count]); + } + item.barrier(sycl::access::fence_space::local_space); + } - // Placement new a copy of functor into USM shared memory - // - // Store it in a unique_ptr to call its destructor on scope exit - std::unique_ptr<Functor, Kokkos::Impl::destruct_delete> kernelFunctorPtr( - new (kernelMem.data()) Functor(functor)); + // Finally, we copy the workgroup results back to global memory to + // be used in the next iteration. If this is the last iteration, + // i.e., there is only one workgroup also call final() if + // necessary. + if (local_id == 0) { + ValueOps::copy( + functor, + &results_ptr2[(item.get_group_linear_id()) * value_count], + &local_mem[0]); + if constexpr (ReduceFunctorHasFinal<FunctorType>::value) + if (n_wgroups <= 1) + FunctorFinal<FunctorType, WorkTag>::final( + static_cast<const FunctorType&>(functor), + &results_ptr2[(item.get_group_linear_id()) * value_count]); + } + }); + }); + m_space.fence(); + + // FIXME_SYCL this is likely not necessary, see above + Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCLDeviceUSMSpace>( + m_space, results_ptr, results_ptr2, + sizeof(*m_result_ptr) * value_count * n_wgroups); + m_space.fence(); + + first_run = false; + size = n_wgroups; + } - auto kernelFunctor = ExtendedReferenceWrapper<Functor>(*kernelFunctorPtr); - sycl_direct_launch(m_policy, kernelFunctor); + // At this point, the reduced value is written to the entry in results_ptr + // and all that is left is to copy it back to the given result pointer if + // necessary. + if (m_result_ptr) { + Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCLDeviceUSMSpace>( + m_space, m_result_ptr, results_ptr, + sizeof(*m_result_ptr) * value_count); + m_space.fence(); + } } public: - void execute() const { - if (m_policy.begin() == m_policy.end()) { - const Kokkos::Experimental::SYCL& space = m_policy.space(); - Kokkos::Experimental::Impl::SYCLInternal& instance = - *space.impl_internal_space_instance(); - cl::sycl::queue& q = *instance.m_queue; - - pointer_type result_ptr = - ReduceFunctorHasFinal<FunctorType>::value - ? static_cast<pointer_type>(sycl::malloc( - sizeof(*m_result_ptr), q, sycl::usm::alloc::shared)) - : m_result_ptr; - - sycl::usm::alloc result_ptr_type = - sycl::get_pointer_type(result_ptr, q.get_context()); - - switch (result_ptr_type) { - case sycl::usm::alloc::host: - case sycl::usm::alloc::shared: - ValueInit::init(m_functor, result_ptr); - break; - case sycl::usm::alloc::device: - // non-USM-allocated memory - case sycl::usm::alloc::unknown: { - value_type host_result; - ValueInit::init(m_functor, &host_result); - q.memcpy(result_ptr, &host_result, sizeof(host_result)).wait(); - break; - } - default: Kokkos::abort("pointer type outside of SYCL specs."); - } - - if constexpr (ReduceFunctorHasFinal<FunctorType>::value) { - FunctorFinal<FunctorType, WorkTag>::final(m_functor, result_ptr); - sycl::free(result_ptr, q); - } - - return; - } + template <typename Policy, typename Functor> + static int max_tile_size_product(const Policy& policy, const Functor&) { + return policy.space().impl_internal_space_instance()->m_maxThreadsPerSM; + } - if constexpr (std::is_trivially_copyable_v<decltype(m_functor)>) - sycl_direct_launch(m_policy, m_functor); - else - sycl_indirect_launch(m_functor); + void execute() const { + Kokkos::Experimental::Impl::SYCLInternal& instance = + *m_space.impl_internal_space_instance(); + using IndirectKernelMem = + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; + IndirectKernelMem& indirectKernelMem = instance.m_indirectKernelMem; + IndirectKernelMem& indirectReducerMem = instance.m_indirectReducerMem; + + const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_functor, indirectKernelMem); + const auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_reducer, indirectReducerMem); + + sycl_direct_launch(m_policy, functor_wrapper.get_functor(), + reducer_wrapper.get_functor()); } private: FunctorType m_functor; - Policy m_policy; + BarePolicy m_policy; + const Kokkos::Experimental::SYCL& m_space; ReducerType m_reducer; pointer_type m_result_ptr; }; diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp index 3595255b07648756d5aff44a000ae8b5078db5e0..5eac6bf9da62b29b9d15697bc5061c00db504e0c 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp @@ -83,21 +83,21 @@ class ParallelScanSYCLBase { private: template <typename Functor> - void scan_internal(cl::sycl::queue& q, const Functor& functor, + void scan_internal(sycl::queue& q, const Functor& functor, pointer_type global_mem, std::size_t size) const { // FIXME_SYCL optimize constexpr size_t wgroup_size = 32; auto n_wgroups = (size + wgroup_size - 1) / wgroup_size; // FIXME_SYCL The allocation should be handled by the execution space - auto deleter = [&q](value_type* ptr) { cl::sycl::free(ptr, q); }; + auto deleter = [&q](value_type* ptr) { sycl::free(ptr, q); }; std::unique_ptr<value_type[], decltype(deleter)> group_results_memory( static_cast<pointer_type>(sycl::malloc(sizeof(value_type) * n_wgroups, q, sycl::usm::alloc::shared)), deleter); auto group_results = group_results_memory.get(); - q.submit([&](cl::sycl::handler& cgh) { + q.submit([&](sycl::handler& cgh) { sycl::accessor<value_type, 1, sycl::access::mode::read_write, sycl::access::target::local> local_mem(sycl::range<1>(wgroup_size), cgh); @@ -159,7 +159,7 @@ class ParallelScanSYCLBase { }); if (n_wgroups > 1) scan_internal(q, functor, group_results, n_wgroups); - q.wait(); + m_policy.space().fence(); q.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::nd_range<1>(n_wgroups * wgroup_size, wgroup_size), @@ -171,7 +171,7 @@ class ParallelScanSYCLBase { &group_results[item.get_group_linear_id()]); }); }); - q.wait(); + m_policy.space().fence(); } template <typename Functor> @@ -180,18 +180,17 @@ class ParallelScanSYCLBase { const Kokkos::Experimental::SYCL& space = m_policy.space(); Kokkos::Experimental::Impl::SYCLInternal& instance = *space.impl_internal_space_instance(); - cl::sycl::queue& q = *instance.m_queue; + sycl::queue& q = *instance.m_queue; const std::size_t len = m_policy.end() - m_policy.begin(); // Initialize global memory q.submit([&](sycl::handler& cgh) { auto global_mem = m_scratch_space; - auto policy = m_policy; + auto begin = m_policy.begin(); cgh.parallel_for(sycl::range<1>(len), [=](sycl::item<1> item) { const typename Policy::index_type id = - static_cast<typename Policy::index_type>(item.get_id()) + - policy.begin(); + static_cast<typename Policy::index_type>(item.get_id()) + begin; value_type update{}; ValueInit::init(functor, &update); if constexpr (std::is_same<WorkTag, void>::value) @@ -201,7 +200,7 @@ class ParallelScanSYCLBase { ValueOps::copy(functor, &global_mem[id], &update); }); }); - q.wait(); + space.fence(); // Perform the actual exlcusive scan scan_internal(q, functor, m_scratch_space, len); @@ -220,51 +219,36 @@ class ParallelScanSYCLBase { ValueOps::copy(functor, &global_mem[global_id], &update); }); }); - q.wait(); - } - - template <typename Functor> - void sycl_indirect_launch(const Functor& functor) const { - // Convenience references - const Kokkos::Experimental::SYCL& space = m_policy.space(); - Kokkos::Experimental::Impl::SYCLInternal& instance = - *space.impl_internal_space_instance(); - Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMemory& kernelMem = - *instance.m_indirectKernel; - - // Allocate USM shared memory for the functor - kernelMem.resize(std::max(kernelMem.size(), sizeof(functor))); - - // Placement new a copy of functor into USM shared memory - // - // Store it in a unique_ptr to call its destructor on scope exit - std::unique_ptr<Functor, Kokkos::Impl::destruct_delete> kernelFunctorPtr( - new (kernelMem.data()) Functor(functor)); - - auto kernelFunctor = std::reference_wrapper(*kernelFunctorPtr); - sycl_direct_launch(kernelFunctor); + space.fence(); } public: template <typename PostFunctor> void impl_execute(const PostFunctor& post_functor) { - const auto& q = *(m_policy.space().impl_internal_space_instance()->m_queue); + if (m_policy.begin() == m_policy.end()) return; + + const auto& q = *m_policy.space().impl_internal_space_instance()->m_queue; const std::size_t len = m_policy.end() - m_policy.begin(); // FIXME_SYCL The allocation should be handled by the execution space // consider only storing one value per block and recreate initial results in // the end before doing the final pass - auto deleter = [&q](value_type* ptr) { cl::sycl::free(ptr, q); }; + auto deleter = [&q](value_type* ptr) { sycl::free(ptr, q); }; std::unique_ptr<value_type[], decltype(deleter)> result_memory( static_cast<pointer_type>(sycl::malloc(sizeof(value_type) * len, q, sycl::usm::alloc::shared)), deleter); m_scratch_space = result_memory.get(); - if constexpr (std::is_trivially_copyable_v<decltype(m_functor)>) - sycl_direct_launch(m_policy, m_functor); - else - sycl_indirect_launch(m_functor); + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& + indirectKernelMem = m_policy.space() + .impl_internal_space_instance() + ->m_indirectKernelMem; + + const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_functor, indirectKernelMem); + + sycl_direct_launch(functor_wrapper.get_functor()); post_functor(); } diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp new file mode 100644 index 0000000000000000000000000000000000000000..738620926b5496b9710ce001b77c6fb625325320 --- /dev/null +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp @@ -0,0 +1,835 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SYCL_PARALLEL_TEAM_HPP +#define KOKKOS_SYCL_PARALLEL_TEAM_HPP + +#include <Kokkos_Parallel.hpp> + +#include <SYCL/Kokkos_SYCL_Team.hpp> + +namespace Kokkos { +namespace Impl { +template <typename... Properties> +class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...> + : public PolicyTraits<Properties...> { + public: + using execution_policy = TeamPolicyInternal; + + using traits = PolicyTraits<Properties...>; + + template <typename ExecSpace, typename... OtherProperties> + friend class TeamPolicyInternal; + + private: + static int constexpr MAX_WARP = 8; + + typename traits::execution_space m_space; + int m_league_size; + int m_team_size; + int m_vector_length; + int m_team_scratch_size[2]; + int m_thread_scratch_size[2]; + int m_chunk_size; + bool m_tune_team_size; + bool m_tune_vector_length; + + public: + using execution_space = Kokkos::Experimental::SYCL; + + template <class... OtherProperties> + TeamPolicyInternal(TeamPolicyInternal<OtherProperties...> const& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + m_space = p.m_space; + m_tune_team_size = p.m_tune_team_size; + m_tune_vector_length = p.m_tune_vector_length; + } + + template <typename FunctorType> + int team_size_max(FunctorType const& f, ParallelForTag const&) const { + return internal_team_size_max_for(f); + } + + template <class FunctorType> + inline int team_size_max(const FunctorType& f, + const ParallelReduceTag&) const { + return internal_team_size_max_reduce(f); + } + + template <class FunctorType, class ReducerType> + inline int team_size_max(const FunctorType& f, const ReducerType& /*r*/, + const ParallelReduceTag&) const { + return internal_team_size_max_reduce(f); + } + + template <typename FunctorType> + int team_size_recommended(FunctorType const& f, ParallelForTag const&) const { + return internal_team_size_max_for(f); + } + + template <typename FunctorType> + inline int team_size_recommended(FunctorType const& f, + ParallelReduceTag const&) const { + return internal_team_size_recommended_reduce(f); + } + + template <class FunctorType, class ReducerType> + int team_size_recommended(FunctorType const& f, ReducerType const&, + ParallelReduceTag const&) const { + return internal_team_size_recommended_reduce(f); + } + inline bool impl_auto_vector_length() const { return m_tune_vector_length; } + inline bool impl_auto_team_size() const { return m_tune_team_size; } + static int vector_length_max() { + // FIXME_SYCL provide a reasonable value + return 1; + } + + static int verify_requested_vector_length(int requested_vector_length) { + int test_vector_length = + std::min(requested_vector_length, vector_length_max()); + + // Allow only power-of-two vector_length + if (!(is_integral_power_of_two(test_vector_length))) { + int test_pow2 = 1; + for (int i = 0; i < 5; i++) { + test_pow2 = test_pow2 << 1; + if (test_pow2 > test_vector_length) { + break; + } + } + test_vector_length = test_pow2 >> 1; + } + + return test_vector_length; + } + + static int scratch_size_max(int level) { + return level == 0 ? 1024 * 32 + : // FIXME_SYCL arbitrarily setting this to 32kB + 20 * 1024 * 1024; // FIXME_SYCL arbitrarily setting this to 20MB + } + inline void impl_set_vector_length(size_t size) { m_vector_length = size; } + inline void impl_set_team_size(size_t size) { m_team_size = size; } + int impl_vector_length() const { return m_vector_length; } + KOKKOS_DEPRECATED int vector_length() const { return impl_vector_length(); } + + int team_size() const { return m_team_size; } + + int league_size() const { return m_league_size; } + + int scratch_size(int level, int team_size_ = -1) const { + if (team_size_ < 0) team_size_ = m_team_size; + return m_team_scratch_size[level] + + team_size_ * m_thread_scratch_size[level]; + } + + int team_scratch_size(int level) const { return m_team_scratch_size[level]; } + + int thread_scratch_size(int level) const { + return m_thread_scratch_size[level]; + } + + typename traits::execution_space space() const { return m_space; } + + TeamPolicyInternal() + : m_space(typename traits::execution_space()), + m_league_size(0), + m_team_size(-1), + m_vector_length(0), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(0), + m_tune_team_size(false), + m_tune_vector_length(false) {} + + /** \brief Specify league size, request team size */ + TeamPolicyInternal(const execution_space space_, int league_size_, + int team_size_request, int vector_length_request = 1) + : m_space(space_), + m_league_size(league_size_), + m_team_size(team_size_request), + m_vector_length( + (vector_length_request > 0) + ? verify_requested_vector_length(vector_length_request) + : (verify_requested_vector_length(1))), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(0), + m_tune_team_size(bool(team_size_request <= 0)), + m_tune_vector_length(bool(vector_length_request <= 0)) { + // FIXME_SYCL check paramters + } + + /** \brief Specify league size, request team size */ + TeamPolicyInternal(const execution_space space_, int league_size_, + const Kokkos::AUTO_t& /* team_size_request */, + int vector_length_request = 1) + : TeamPolicyInternal(space_, league_size_, -1, vector_length_request) {} + // FLAG + /** \brief Specify league size and team size, request vector length*/ + TeamPolicyInternal(const execution_space space_, int league_size_, + int team_size_request, + const Kokkos::AUTO_t& /* vector_length_request */ + ) + : TeamPolicyInternal(space_, league_size_, team_size_request, -1) + + {} + + /** \brief Specify league size, request team size and vector length*/ + TeamPolicyInternal(const execution_space space_, int league_size_, + const Kokkos::AUTO_t& /* team_size_request */, + const Kokkos::AUTO_t& /* vector_length_request */ + + ) + : TeamPolicyInternal(space_, league_size_, -1, -1) + + {} + + TeamPolicyInternal(int league_size_, int team_size_request, + int vector_length_request = 1) + : TeamPolicyInternal(typename traits::execution_space(), league_size_, + team_size_request, vector_length_request) {} + + TeamPolicyInternal(int league_size_, + const Kokkos::AUTO_t& /* team_size_request */, + int vector_length_request = 1) + : TeamPolicyInternal(typename traits::execution_space(), league_size_, -1, + vector_length_request) {} + + /** \brief Specify league size and team size, request vector length*/ + TeamPolicyInternal(int league_size_, int team_size_request, + const Kokkos::AUTO_t& /* vector_length_request */ + + ) + : TeamPolicyInternal(typename traits::execution_space(), league_size_, + team_size_request, -1) + + {} + + /** \brief Specify league size, request team size and vector length*/ + TeamPolicyInternal(int league_size_, + const Kokkos::AUTO_t& /* team_size_request */, + const Kokkos::AUTO_t& /* vector_length_request */ + + ) + : TeamPolicyInternal(typename traits::execution_space(), league_size_, -1, + -1) {} + + int chunk_size() const { return m_chunk_size; } + + TeamPolicyInternal& set_chunk_size(typename traits::index_type chunk_size_) { + m_chunk_size = chunk_size_; + return *this; + } + + /** \brief set per team scratch size for a specific level of the scratch + * hierarchy */ + TeamPolicyInternal& set_scratch_size(int level, + PerTeamValue const& per_team) { + m_team_scratch_size[level] = per_team.value; + return *this; + } + + /** \brief set per thread scratch size for a specific level of the scratch + * hierarchy */ + TeamPolicyInternal& set_scratch_size(int level, + PerThreadValue const& per_thread) { + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + /** \brief set per thread and per team scratch size for a specific level of + * the scratch hierarchy */ + TeamPolicyInternal& set_scratch_size(int level, PerTeamValue const& per_team, + PerThreadValue const& per_thread) { + m_team_scratch_size[level] = per_team.value; + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + using member_type = Kokkos::Impl::SYCLTeamMember; + + protected: + template <class FunctorType> + int internal_team_size_max_for(const FunctorType& /*f*/) const { + // nested_reducer_memsize = (sizeof(double) * (m_team_size + 2) + // custom: m_team_scratch_size[0] + m_thread_scratch_size[0] * m_team_size + // total: + // 2*sizeof(double)+m_team_scratch_size[0] + // + m_team_size(sizeof(double)+m_thread_scratch_size[0]) + const int max_threads_for_memory = + (space().impl_internal_space_instance()->m_maxShmemPerBlock - + 2 * sizeof(double) - m_team_scratch_size[0]) / + (sizeof(double) + m_thread_scratch_size[0]); + return std::min<int>( + m_space.impl_internal_space_instance()->m_maxWorkgroupSize, + max_threads_for_memory); + } + + template <class FunctorType> + int internal_team_size_max_reduce(const FunctorType& f) const { + using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE, + TeamPolicyInternal, FunctorType>; + using value_type = typename Analysis::value_type; + const int value_count = Analysis::value_count(f); + + // nested_reducer_memsize = (sizeof(double) * (m_team_size + 2) + // reducer_memsize = sizeof(value_type) * m_team_size * value_count + // custom: m_team_scratch_size[0] + m_thread_scratch_size[0] * m_team_size + // total: + // 2*sizeof(double)+m_team_scratch_size[0] + // + m_team_size(sizeof(double)+sizeof(value_type)*value_count + // +m_thread_scratch_size[0]) + const int max_threads_for_memory = + (space().impl_internal_space_instance()->m_maxShmemPerBlock - + 2 * sizeof(double) - m_team_scratch_size[0]) / + (sizeof(double) + sizeof(value_type) * value_count + + m_thread_scratch_size[0]); + return std::min<int>( + m_space.impl_internal_space_instance()->m_maxWorkgroupSize, + max_threads_for_memory); + } + + template <class FunctorType> + int internal_team_size_recommended_for(const FunctorType& f) const { + // FIXME_SYCL improve + return internal_team_size_max_for(f); + } + + template <class FunctorType> + int internal_team_size_recommended_reduce(const FunctorType& f) const { + // FIXME_SYCL improve + return internal_team_size_max_reduce(f); + } +}; + +template <typename FunctorType, typename... Properties> +class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>, + Kokkos::Experimental::SYCL> { + public: + using Policy = TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>; + using functor_type = FunctorType; + using size_type = ::Kokkos::Experimental::SYCL::size_type; + + private: + using member_type = typename Policy::member_type; + using work_tag = typename Policy::work_tag; + using launch_bounds = typename Policy::launch_bounds; + + FunctorType const m_functor; + Policy const m_policy; + size_type const m_league_size; + int m_team_size; + size_type const m_vector_size; + int m_shmem_begin; + int m_shmem_size; + void* m_scratch_ptr[2]; + int m_scratch_size[2]; + + template <typename Functor> + void sycl_direct_launch(const Policy& policy, const Functor& functor) const { + // Convenience references + const Kokkos::Experimental::SYCL& space = policy.space(); + Kokkos::Experimental::Impl::SYCLInternal& instance = + *space.impl_internal_space_instance(); + sycl::queue& q = *instance.m_queue; + + q.submit([&](sycl::handler& cgh) { + // FIXME_SYCL accessors seem to need a size greater than zero at least for + // host queues + sycl::accessor<char, 1, sycl::access::mode::read_write, + sycl::access::target::local> + team_scratch_memory_L0( + sycl::range<1>(std::max(m_scratch_size[0] + m_shmem_begin, 1)), + cgh); + + // Avoid capturing *this since it might not be trivially copyable + const auto shmem_begin = m_shmem_begin; + const int scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; + void* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]}; + + cgh.parallel_for( + sycl::nd_range<2>( + sycl::range<2>(m_league_size * m_team_size, m_vector_size), + sycl::range<2>(m_team_size, m_vector_size)), + [=](sycl::nd_item<2> item) { + const member_type team_member( + team_scratch_memory_L0.get_pointer(), shmem_begin, + scratch_size[0], + static_cast<char*>(scratch_ptr[1]) + + item.get_group(0) * scratch_size[1], + scratch_size[1], item); + if constexpr (std::is_same<work_tag, void>::value) + functor(team_member); + else + functor(work_tag(), team_member); + }); + }); + space.fence(); + } + + public: + inline void execute() const { + if (m_league_size == 0) return; + + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& + indirectKernelMem = m_policy.space() + .impl_internal_space_instance() + ->m_indirectKernelMem; + + const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_functor, indirectKernelMem); + + sycl_direct_launch(m_policy, functor_wrapper.get_functor()); + } + + ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy) + : m_functor(arg_functor), + m_policy(arg_policy), + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.impl_vector_length()) { + // FIXME_SYCL optimize + if (m_team_size < 0) m_team_size = 32; + + m_shmem_begin = (sizeof(double) * (m_team_size + 2)); + m_shmem_size = + (m_policy.scratch_size(0, m_team_size) + + FunctorTeamShmemSize<FunctorType>::value(m_functor, m_team_size)); + m_scratch_size[0] = m_shmem_size; + m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); + + // FIXME_SYCL so far accessors used instead of these pointers + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + const auto& space = *m_policy.space().impl_internal_space_instance(); + const sycl::queue& q = *space.m_queue; + m_scratch_ptr[0] = nullptr; + m_scratch_ptr[1] = sycl::malloc_device( + sizeof(char) * m_scratch_size[1] * m_league_size, q); + + if (static_cast<int>(space.m_maxShmemPerBlock) < + m_shmem_size - m_shmem_begin) { + std::stringstream out; + out << "Kokkos::Impl::ParallelFor<SYCL> insufficient shared memory! " + "Requested " + << m_shmem_size - m_shmem_begin << " bytes but maximum is " + << m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock + << '\n'; + Kokkos::Impl::throw_runtime_exception(out.str()); + } + + if (m_team_size > m_policy.team_size_max(arg_functor, ParallelForTag{})) + Kokkos::Impl::throw_runtime_exception( + "Kokkos::Impl::ParallelFor<SYCL> requested too large team size."); + } + + // FIXME_SYCL remove when managing m_scratch_ptr[1] in the execution space + // instance + ParallelFor(const ParallelFor&) = delete; + ParallelFor& operator=(const ParallelFor&) = delete; + + ~ParallelFor() { + const Kokkos::Experimental::SYCL& space = m_policy.space(); + Kokkos::Experimental::Impl::SYCLInternal& instance = + *space.impl_internal_space_instance(); + sycl::queue& q = *instance.m_queue; + sycl::free(m_scratch_ptr[1], q); + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template <class FunctorType, class ReducerType, class... Properties> +class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>, + ReducerType, Kokkos::Experimental::SYCL> { + public: + using Policy = TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>; + + private: + using Analysis = + FunctorAnalysis<FunctorPatternInterface::REDUCE, Policy, FunctorType>; + using member_type = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using launch_bounds = typename Policy::launch_bounds; + + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; + + public: + using functor_type = FunctorType; + using size_type = Kokkos::Experimental::SYCL::size_type; + + private: + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + // FIXME_SYCL avoid reallocating memory for reductions + /* size_type* m_scratch_space; + size_type* m_scratch_flags; + size_type m_team_begin;*/ + size_type m_shmem_begin; + size_type m_shmem_size; + void* m_scratch_ptr[2]; + int m_scratch_size[2]; + const size_type m_league_size; + int m_team_size; + const size_type m_vector_size; + + template <typename PolicyType, typename Functor, typename Reducer> + void sycl_direct_launch(const PolicyType& policy, const Functor& functor, + const Reducer& reducer) const { + using ReducerConditional = + Kokkos::Impl::if_c<std::is_same<InvalidType, ReducerType>::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + std::conditional_t<std::is_same<InvalidType, ReducerType>::value, + WorkTag, void>; + using ValueInit = + Kokkos::Impl::FunctorValueInit<ReducerTypeFwd, WorkTagFwd>; + using ValueJoin = + Kokkos::Impl::FunctorValueJoin<ReducerTypeFwd, WorkTagFwd>; + using ValueOps = Kokkos::Impl::FunctorValueOps<FunctorType, WorkTag>; + + auto selected_reducer = ReducerConditional::select(functor, reducer); + + // Convenience references + const Kokkos::Experimental::SYCL& space = policy.space(); + Kokkos::Experimental::Impl::SYCLInternal& instance = + *space.impl_internal_space_instance(); + sycl::queue& q = *instance.m_queue; + + // FIXME_SYCL optimize + const size_t wgroup_size = m_team_size; + std::size_t size = m_league_size * m_team_size; + const auto init_size = + std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1); + const unsigned int value_count = + FunctorValueTraits<ReducerTypeFwd, WorkTagFwd>::value_count( + selected_reducer); + // FIXME_SYCL only use the first half + const auto results_ptr = static_cast<pointer_type>(instance.scratch_space( + sizeof(value_type) * std::max(value_count, 1u) * init_size * 2)); + // FIXME_SYCL without this we are running into a race condition + const auto results_ptr2 = + results_ptr + std::max(value_count, 1u) * init_size; + + // If size<=1 we only call init(), the functor and possibly final once + // working with the global scratch memory but don't copy back to + // m_result_ptr yet. + if (size <= 1) { + q.submit([&](sycl::handler& cgh) { + // FIXME_SYCL accessors seem to need a size greater than zero at least + // for host queues + sycl::accessor<char, 1, sycl::access::mode::read_write, + sycl::access::target::local> + team_scratch_memory_L0( + sycl::range<1>(std::max(m_scratch_size[0] + m_shmem_begin, 1)), + cgh); + + // Avoid capturing *this since it might not be trivially copyable + const auto shmem_begin = m_shmem_begin; + const int scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; + void* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]}; + + cgh.parallel_for( + sycl::nd_range<2>(sycl::range<2>(1, 1), sycl::range<2>(1, 1)), + [=](sycl::nd_item<2> item) { + const auto& selected_reducer = ReducerConditional::select( + static_cast<const FunctorType&>(functor), + static_cast<const ReducerType&>(reducer)); + reference_type update = + ValueInit::init(selected_reducer, results_ptr); + if (size == 1) { + const member_type team_member( + team_scratch_memory_L0.get_pointer(), shmem_begin, + scratch_size[0], static_cast<char*>(scratch_ptr[1]), + scratch_size[1], item); + if constexpr (std::is_same<WorkTag, void>::value) + functor(team_member, update); + else + functor(WorkTag(), team_member, update); + } + if constexpr (ReduceFunctorHasFinal<FunctorType>::value) + FunctorFinal<FunctorType, WorkTag>::final( + static_cast<const FunctorType&>(functor), results_ptr); + }); + }); + space.fence(); + } + + // Otherwise, we perform a reduction on the values in all workgroups + // separately, write the workgroup results back to global memory and recurse + // until only one workgroup does the reduction and thus gets the final + // value. + bool first_run = true; + while (size > 1) { + auto n_wgroups = (size + wgroup_size - 1) / wgroup_size; + q.submit([&](sycl::handler& cgh) { + sycl::accessor<value_type, 1, sycl::access::mode::read_write, + sycl::access::target::local> + local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u), + cgh); + // FIXME_SYCL accessors seem to need a size greater than zero at least + // for host queues + sycl::accessor<char, 1, sycl::access::mode::read_write, + sycl::access::target::local> + team_scratch_memory_L0( + sycl::range<1>(std::max(m_scratch_size[0] + m_shmem_begin, 1)), + cgh); + + // Avoid capturing *this since it might not be trivially copyable + const auto shmem_begin = m_shmem_begin; + const int scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; + void* const scratch_ptr[2] = {m_scratch_ptr[0], m_scratch_ptr[1]}; + + cgh.parallel_for( + sycl::nd_range<2>( + sycl::range<2>(m_league_size * m_team_size, m_vector_size), + sycl::range<2>(m_team_size, m_vector_size)), + [=](sycl::nd_item<2> item) { + const auto local_id = item.get_local_linear_id(); + const auto global_id = + wgroup_size * item.get_group_linear_id() + local_id; + const auto& selected_reducer = ReducerConditional::select( + static_cast<const FunctorType&>(functor), + static_cast<const ReducerType&>(reducer)); + + // In the first iteration, we call functor to initialize the local + // memory. Otherwise, the local memory is initialized with the + // results from the previous iteration that are stored in global + // memory. Note that we load values_per_thread values per thread + // and immediately combine them to avoid too many threads being + // idle in the actual workgroup reduction. + if (first_run) { + reference_type update = ValueInit::init( + selected_reducer, &local_mem[local_id * value_count]); + const member_type team_member( + team_scratch_memory_L0.get_pointer(), shmem_begin, + scratch_size[0], + static_cast<char*>(scratch_ptr[1]) + + item.get_group(0) * scratch_size[1], + scratch_size[1], item); + if constexpr (std::is_same<WorkTag, void>::value) + functor(team_member, update); + else + functor(WorkTag(), team_member, update); + } else { + if (global_id >= size) + ValueInit::init(selected_reducer, + &local_mem[local_id * value_count]); + else { + ValueOps::copy(functor, &local_mem[local_id * value_count], + &results_ptr[global_id * value_count]); + } + } + item.barrier(sycl::access::fence_space::local_space); + + // Perform the actual workgroup reduction. To achieve a better + // memory access pattern, we use sequential addressing and a + // reversed loop. If the workgroup size is 8, the first element + // contains all the values with index%4==0, after the second one + // the values with index%2==0 and after the third one index%1==0, + // i.e., all values. + for (unsigned int stride = wgroup_size / 2; stride > 0; + stride >>= 1) { + const auto idx = local_id; + if (idx < stride) { + ValueJoin::join(selected_reducer, + &local_mem[idx * value_count], + &local_mem[(idx + stride) * value_count]); + } + item.barrier(sycl::access::fence_space::local_space); + } + + // Finally, we copy the workgroup results back to global memory to + // be used in the next iteration. If this is the last iteration, + // i.e., there is only one workgroup also call final() if + // necessary. + if (local_id == 0) { + ValueOps::copy( + functor, + &results_ptr2[(item.get_group_linear_id()) * value_count], + &local_mem[0]); + if constexpr (ReduceFunctorHasFinal<FunctorType>::value) + if (n_wgroups <= 1 && item.get_group_linear_id() == 0) { + FunctorFinal<FunctorType, WorkTag>::final( + static_cast<const FunctorType&>(functor), + &results_ptr2[(item.get_group_linear_id()) * + value_count]); + } + } + }); + }); + space.fence(); + + // FIXME_SYCL this is likely not necessary, see above + Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCLDeviceUSMSpace>( + space, results_ptr, results_ptr2, + sizeof(*m_result_ptr) * value_count * n_wgroups); + space.fence(); + + first_run = false; + size = n_wgroups; + } + + // At this point, the reduced value is written to the entry in results_ptr + // and all that is left is to copy it back to the given result pointer if + // necessary. + if (m_result_ptr) { + Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::Experimental::SYCLDeviceUSMSpace>( + space, m_result_ptr, results_ptr, + sizeof(*m_result_ptr) * value_count); + space.fence(); + } + } + + public: + inline void execute() { + Kokkos::Experimental::Impl::SYCLInternal& instance = + *m_policy.space().impl_internal_space_instance(); + using IndirectKernelMem = + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; + IndirectKernelMem& indirectKernelMem = instance.m_indirectKernelMem; + IndirectKernelMem& indirectReducerMem = instance.m_indirectReducerMem; + + const auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_functor, indirectKernelMem); + const auto reducer_wrapper = Experimental::Impl::make_sycl_function_wrapper( + m_reducer, indirectReducerMem); + + sycl_direct_launch(m_policy, functor_wrapper.get_functor(), + reducer_wrapper.get_functor()); + } + + private: + void initialize() { + // FIXME_SYCL optimize + if (m_team_size < 0) m_team_size = 32; + // Must be a power of two greater than two, get the one not bigger than the + // requested one. + if ((m_team_size & m_team_size - 1) || m_team_size < 2) { + int temp_team_size = 2; + while ((temp_team_size << 1) < m_team_size) temp_team_size <<= 1; + m_team_size = temp_team_size; + } + + m_shmem_begin = (sizeof(double) * (m_team_size + 2)); + m_shmem_size = + (m_policy.scratch_size(0, m_team_size) + + FunctorTeamShmemSize<FunctorType>::value(m_functor, m_team_size)); + m_scratch_size[0] = m_shmem_size; + m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); + + // FIXME_SYCL so far accessors used instead of these pointers + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + const auto& space = *m_policy.space().impl_internal_space_instance(); + const sycl::queue& q = *space.m_queue; + m_scratch_ptr[0] = nullptr; + m_scratch_ptr[1] = sycl::malloc_device( + sizeof(char) * m_scratch_size[1] * m_league_size, q); + + if (static_cast<int>(space.m_maxShmemPerBlock) < + m_shmem_size - m_shmem_begin) { + std::stringstream out; + out << "Kokkos::Impl::ParallelFor<SYCL> insufficient shared memory! " + "Requested " + << m_shmem_size - m_shmem_begin << " bytes but maximum is " + << m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock + << '\n'; + Kokkos::Impl::throw_runtime_exception(out.str()); + } + + if (m_team_size > m_policy.team_size_max(m_functor, ParallelForTag{})) + Kokkos::Impl::throw_runtime_exception( + "Kokkos::Impl::ParallelFor<SYCL> requested too large team size."); + } + + public: + template <class ViewType> + ParallelReduce(FunctorType const& arg_functor, Policy const& arg_policy, + ViewType const& arg_result, + typename std::enable_if<Kokkos::is_view<ViewType>::value, + void*>::type = nullptr) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.impl_vector_length()) { + initialize(); + } + + ParallelReduce(FunctorType const& arg_functor, Policy const& arg_policy, + ReducerType const& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.impl_vector_length()) { + initialize(); + } +}; +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp index fd6f11d58c89f91410f448aee3f9af2db1cd9c46..75741438e295c543db2737e6943ea52e244d69db 100644 --- a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp @@ -42,6 +42,8 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> + #include <Kokkos_HostSpace.hpp> #include <Kokkos_SYCL.hpp> #include <Kokkos_SYCL_Space.hpp> @@ -55,7 +57,7 @@ namespace Kokkos { namespace Impl { namespace { -auto USM_memcpy(cl::sycl::queue& q, void* dst, const void* src, size_t n) { +auto USM_memcpy(sycl::queue& q, void* dst, const void* src, size_t n) { return q.memcpy(dst, src, n); } @@ -65,10 +67,10 @@ void USM_memcpy(Kokkos::Experimental::Impl::SYCLInternal& space, void* dst, } void USM_memcpy(void* dst, const void* src, size_t n) { - Kokkos::Experimental::Impl::SYCLInternal::singleton().m_queue->wait(); - USM_memcpy(*Kokkos::Experimental::Impl::SYCLInternal::singleton().m_queue, - dst, src, n) - .wait(); + Experimental::SYCL().fence(); + auto event = USM_memcpy( + *Experimental::Impl::SYCLInternal::singleton().m_queue, dst, src, n); + Experimental::Impl::SYCLInternal::fence(event); } } // namespace @@ -123,29 +125,27 @@ DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, Kokkos::HostSpace, namespace Kokkos { namespace Experimental { -SYCLDeviceUSMSpace::SYCLDeviceUSMSpace() : m_device(SYCL().sycl_device()) {} +SYCLDeviceUSMSpace::SYCLDeviceUSMSpace() + : m_queue(*SYCL().impl_internal_space_instance()->m_queue) {} +SYCLDeviceUSMSpace::SYCLDeviceUSMSpace(sycl::queue queue) + : m_queue(std::move(queue)) {} -void* SYCLDeviceUSMSpace::allocate(const size_t arg_alloc_size) const { - return allocate("[unlabeled]", arg_alloc_size); -} -void* SYCLDeviceUSMSpace::allocate(const char* arg_label, - const size_t arg_alloc_size, - const size_t arg_logical_size) const { - return impl_allocate(arg_label, arg_alloc_size, arg_logical_size); -} +SYCLSharedUSMSpace::SYCLSharedUSMSpace() + : m_queue(*SYCL().impl_internal_space_instance()->m_queue) {} +SYCLSharedUSMSpace::SYCLSharedUSMSpace(sycl::queue queue) + : m_queue(std::move(queue)) {} -void* SYCLDeviceUSMSpace::impl_allocate( +void* allocate_sycl( const char* arg_label, const size_t arg_alloc_size, - const size_t arg_logical_size, - const Kokkos::Tools::SpaceHandle arg_handle) const { - const cl::sycl::queue& queue = - *SYCL().impl_internal_space_instance()->m_queue; - void* const hostPtr = cl::sycl::malloc_device(arg_alloc_size, queue); + const size_t arg_logical_size, const Kokkos::Tools::SpaceHandle arg_handle, + const RawMemoryAllocationFailure::AllocationMechanism failure_tag, + const sycl::usm::alloc allocation_kind, const sycl::queue& queue) { + void* const hostPtr = sycl::malloc(arg_alloc_size, queue, allocation_kind); if (hostPtr == nullptr) throw RawMemoryAllocationFailure( arg_alloc_size, 1, RawMemoryAllocationFailure::FailureMode::Unknown, - RawMemoryAllocationFailure::AllocationMechanism::SYCLMalloc); + failure_tag); if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -157,6 +157,47 @@ void* SYCLDeviceUSMSpace::impl_allocate( return hostPtr; } +void* SYCLDeviceUSMSpace::allocate(const size_t arg_alloc_size) const { + return allocate("[unlabeled]", arg_alloc_size); +} + +void* SYCLDeviceUSMSpace::allocate(const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return allocate_sycl( + arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocDevice, + sycl::usm::alloc::device, m_queue); +} + +void* SYCLSharedUSMSpace::allocate(const size_t arg_alloc_size) const { + return allocate("[unlabeled]", arg_alloc_size); +} +void* SYCLSharedUSMSpace::allocate(const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + return allocate_sycl( + arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocShared, + sycl::usm::alloc::shared, m_queue); +} + +void sycl_deallocate(const char* arg_label, void* const arg_alloc_ptr, + const size_t arg_alloc_size, const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle, + const sycl::queue& queue) { + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, + reported_size); + } + + sycl::free(arg_alloc_ptr, queue); +} + void SYCLDeviceUSMSpace::deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const { deallocate("[unlabeled]", arg_alloc_ptr, arg_alloc_size); @@ -165,21 +206,21 @@ void SYCLDeviceUSMSpace::deallocate(const char* arg_label, void* const arg_alloc_ptr, const size_t arg_alloc_size, const size_t arg_logical_size) const { - impl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size); + sycl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), m_queue); } -void SYCLDeviceUSMSpace::impl_deallocate( - const char* arg_label, void* const arg_alloc_ptr, - const size_t arg_alloc_size, const size_t arg_logical_size, - const Kokkos::Tools::SpaceHandle arg_handle) const { - if (Kokkos::Profiling::profileLibraryLoaded()) { - const size_t reported_size = - (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; - Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, - reported_size); - } - const cl::sycl::queue& queue = - *SYCL().impl_internal_space_instance()->m_queue; - cl::sycl::free(arg_alloc_ptr, queue); + +void SYCLSharedUSMSpace::deallocate(void* const arg_alloc_ptr, + const size_t arg_alloc_size) const { + deallocate("[unlabeled]", arg_alloc_ptr, arg_alloc_size); +} + +void SYCLSharedUSMSpace::deallocate(const char* arg_label, + void* const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size) const { + sycl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), m_queue); } } // namespace Experimental @@ -191,6 +232,9 @@ namespace Impl { #ifdef KOKKOS_ENABLE_DEBUG SharedAllocationRecord<void, void> SharedAllocationRecord< Kokkos::Experimental::SYCLDeviceUSMSpace, void>::s_root_record; + +SharedAllocationRecord<void, void> SharedAllocationRecord< + Kokkos::Experimental::SYCLSharedUSMSpace, void>::s_root_record; #endif SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: @@ -200,7 +244,7 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: const SharedAllocationRecord<void, void>::function_type dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord<void, void>( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>::s_root_record, @@ -208,27 +252,36 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: Kokkos::Impl::checked_allocation_with_header(space, label, size), sizeof(SharedAllocationHeader) + size, dealloc), m_space(space) { - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::allocateData( - Kokkos::Profiling::make_space_handle(space.name()), label, data(), - size); - } - SharedAllocationHeader header; - // Fill in the Header information - header.m_record = static_cast<SharedAllocationRecord<void, void>*>(this); - - strncpy(header.m_label, label.c_str(), - SharedAllocationHeader::maximum_label_length); - // Set last element zero, in case c_str is too long - header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; + this->base_t::_fill_host_accessible_header_info(header, label); // Copy to device memory Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, HostSpace>( RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader)); } +SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, void>:: + SharedAllocationRecord( + const Kokkos::Experimental::SYCLSharedUSMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord<void, void>::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, + void>::s_root_record, +#endif + Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc), + m_space(arg_space) { + + this->base_t::_fill_host_accessible_header_info(*base_t::m_alloc_ptr, + arg_label); +} + } // namespace Impl } // namespace Kokkos @@ -238,201 +291,57 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: namespace Kokkos { namespace Impl { -std::string SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, - void>::get_label() const { - SharedAllocationHeader header; - - Kokkos::Impl::DeepCopy<Kokkos::HostSpace, - Kokkos::Experimental::SYCLDeviceUSMSpace>( - &header, RecordBase::head(), sizeof(SharedAllocationHeader)); - - return std::string(header.m_label); -} - -SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>* -SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: - allocate(const Kokkos::Experimental::SYCLDeviceUSMSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size) { - return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); -} - -void SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: - deallocate(SharedAllocationRecord<void, void>* arg_rec) { - delete static_cast<SharedAllocationRecord*>(arg_rec); -} - SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>::~SharedAllocationRecord() { + const char* label = nullptr; if (Kokkos::Profiling::profileLibraryLoaded()) { SharedAllocationHeader header; Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, Kokkos::HostSpace>(&header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader)); - - Kokkos::Profiling::deallocateData( - Kokkos::Profiling::make_space_handle( - Kokkos::Experimental::SYCLDeviceUSMSpace::name()), - header.m_label, data(), size()); + label = header.label(); } - - m_space.deallocate(SharedAllocationRecord<void, void>::m_alloc_ptr, - SharedAllocationRecord<void, void>::m_alloc_size); -} - -//---------------------------------------------------------------------------- - -void* SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: - allocate_tracked(const Kokkos::Experimental::SYCLDeviceUSMSpace& arg_space, - const std::string& arg_alloc_label, - const size_t arg_alloc_size) { - if (!arg_alloc_size) return nullptr; - - SharedAllocationRecord* const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - - RecordBase::increment(r); - - return r->data(); + const auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size; + m_space.deallocate(label, SharedAllocationRecord<void, void>::m_alloc_ptr, + alloc_size, alloc_size - sizeof(SharedAllocationHeader)); } -void SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, - void>::deallocate_tracked(void* const - arg_alloc_ptr) { - if (arg_alloc_ptr != nullptr) { - SharedAllocationRecord* const r = get_record(arg_alloc_ptr); - - RecordBase::decrement(r); +SharedAllocationRecord<Kokkos::Experimental::SYCLSharedUSMSpace, + void>::~SharedAllocationRecord() { + const char* label = nullptr; + if (Kokkos::Profiling::profileLibraryLoaded()) { + label = RecordBase::m_alloc_ptr->m_label; } -} - -void* SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: - reallocate_tracked(void* const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord* const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord* const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, - Kokkos::Experimental::SYCLDeviceUSMSpace>( - r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); + const auto alloc_size = SharedAllocationRecord<void, void>::m_alloc_size; + m_space.deallocate(label, SharedAllocationRecord<void, void>::m_alloc_ptr, + alloc_size, alloc_size - sizeof(SharedAllocationHeader)); } //---------------------------------------------------------------------------- -SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>* -SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, - void>::get_record(void* alloc_ptr) { - using Header = SharedAllocationHeader; - using RecordSYCL = - SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>; - - // Copy the header from the allocation - Header head; - - Header const* const head_sycl = - alloc_ptr ? Header::get_header(alloc_ptr) : nullptr; +} // namespace Impl +} // namespace Kokkos - if (alloc_ptr) { - Kokkos::Impl::DeepCopy<Kokkos::HostSpace, - Kokkos::Experimental::SYCLDeviceUSMSpace>( - &head, head_sycl, sizeof(SharedAllocationHeader)); - } +//============================================================================== +// <editor-fold desc="Explicit instantiations of CRTP Base classes"> {{{1 - RecordSYCL* const record = - alloc_ptr ? static_cast<RecordSYCL*>(head.m_record) : nullptr; +#include <impl/Kokkos_SharedAlloc_timpl.hpp> - if (!alloc_ptr || record->m_alloc_ptr != head_sycl) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::SharedAllocationRecord< " - "Kokkos::Experimental::SYCLDeviceUSMSpace " - ", void >::get_record ERROR")); - } - - return record; -} +namespace Kokkos { +namespace Impl { -// Iterate records to print orphaned memory ... -void SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>:: - print_records(std::ostream& s, - const Kokkos::Experimental::SYCLDeviceUSMSpace&, - bool detail) { -#ifdef KOKKOS_ENABLE_DEBUG - SharedAllocationRecord<void, void>* r = &s_root_record; - - char buffer[256]; - - SharedAllocationHeader head; - - if (detail) { - do { - if (r->m_alloc_ptr) { - Kokkos::Impl::DeepCopy<Kokkos::HostSpace, - Kokkos::Experimental::SYCLDeviceUSMSpace>( - &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); - } else { - head.m_label[0] = 0; - } - - // Formatting dependent on sizeof(uintptr_t) - const char* format_string; - - if (sizeof(uintptr_t) == sizeof(unsigned long)) { - format_string = - "SYCL addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx " - "+ %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; - } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { - format_string = - "SYCL addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ " - "0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; - } - - snprintf(buffer, 256, format_string, reinterpret_cast<uintptr_t>(r), - reinterpret_cast<uintptr_t>(r->m_prev), - reinterpret_cast<uintptr_t>(r->m_next), - reinterpret_cast<uintptr_t>(r->m_alloc_ptr), r->m_alloc_size, - r->m_count, reinterpret_cast<uintptr_t>(r->m_dealloc), - head.m_label); - s << buffer; - r = r->m_next; - } while (r != &s_root_record); - } else { - do { - if (r->m_alloc_ptr) { - Kokkos::Impl::DeepCopy<Kokkos::HostSpace, - Kokkos::Experimental::SYCLDeviceUSMSpace>( - &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); - - // Formatting dependent on sizeof(uintptr_t) - const char* format_string; - - if (sizeof(uintptr_t) == sizeof(unsigned long)) { - format_string = "SYCL [ 0x%.12lx + %ld ] %s\n"; - } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { - format_string = "SYCL [ 0x%.12llx + %ld ] %s\n"; - } - - snprintf(buffer, 256, format_string, - reinterpret_cast<uintptr_t>(r->data()), r->size(), - head.m_label); - } else { - snprintf(buffer, 256, "SYCL [ 0 + 0 ]\n"); - } - s << buffer; - r = r->m_next; - } while (r != &s_root_record); - } -#else - (void)s; - (void)detail; - throw_runtime_exception( - "Kokkos::Impl::SharedAllocationRecord<SYCLDeviceUSMSpace>::print_records" - " only works with KOKKOS_ENABLE_DEBUG enabled"); -#endif -} +// To avoid additional compilation cost for something that's (mostly?) not +// performance sensitive, we explicity instantiate these CRTP base classes here, +// where we have access to the associated *_timpl.hpp header files. +template class HostInaccessibleSharedAllocationRecordCommon< + Kokkos::Experimental::SYCLDeviceUSMSpace>; +template class SharedAllocationRecordCommon< + Kokkos::Experimental::SYCLDeviceUSMSpace>; +template class SharedAllocationRecordCommon< + Kokkos::Experimental::SYCLSharedUSMSpace>; } // namespace Impl } // namespace Kokkos + +// </editor-fold> end Explicit instantiations of CRTP Base classes }}}1 +//============================================================================== diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a30cf2109a60ccc5934bfc6ee834a831c539d485 --- /dev/null +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp @@ -0,0 +1,816 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SYCL_TEAM_HPP +#define KOKKOS_SYCL_TEAM_HPP + +#include <Kokkos_Macros.hpp> + +#ifdef KOKKOS_ENABLE_SYCL + +#include <utility> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/**\brief Team member_type passed to TeamPolicy or TeamTask closures. + */ +class SYCLTeamMember { + public: + using execution_space = Kokkos::Experimental::SYCL; + using scratch_memory_space = execution_space::scratch_memory_space; + + private: + mutable void* m_team_reduce; + scratch_memory_space m_team_shared; + int m_team_reduce_size; + sycl::nd_item<2> m_item; + + public: + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& team_shmem() const { + return m_team_shared.set_team_thread_mode(0, 1, 0); + } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& team_scratch( + const int level) const { + return m_team_shared.set_team_thread_mode(level, 1, 0); + } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& thread_scratch( + const int level) const { + return m_team_shared.set_team_thread_mode(level, team_size(), team_rank()); + } + + KOKKOS_INLINE_FUNCTION int league_rank() const { + return m_item.get_group_linear_id(); + } + KOKKOS_INLINE_FUNCTION int league_size() const { + // FIXME_SYCL needs to be revised for vector_length>1. + return m_item.get_group_range(0); + } + KOKKOS_INLINE_FUNCTION int team_rank() const { + return m_item.get_local_linear_id(); + } + KOKKOS_INLINE_FUNCTION int team_size() const { + // FIXME_SYCL needs to be revised for vector_length>1. + return m_item.get_local_range(0); + } + KOKKOS_INLINE_FUNCTION void team_barrier() const { m_item.barrier(); } + + KOKKOS_INLINE_FUNCTION const sycl::nd_item<2>& item() const { return m_item; } + + //-------------------------------------------------------------------------- + + template <class ValueType> + KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& val, + const int thread_id) const { + // Wait for shared data write until all threads arrive here + m_item.barrier(sycl::access::fence_space::local_space); + if (m_item.get_local_id(1) == 0 && + static_cast<int>(m_item.get_local_id(0)) == thread_id) { + *static_cast<ValueType*>(m_team_reduce) = val; + } + // Wait for shared data read until root thread writes + m_item.barrier(sycl::access::fence_space::local_space); + val = *static_cast<ValueType*>(m_team_reduce); + } + + template <class Closure, class ValueType> + KOKKOS_INLINE_FUNCTION void team_broadcast(Closure const& f, ValueType& val, + const int thread_id) const { + f(val); + team_broadcast(val, thread_id); + } + + //-------------------------------------------------------------------------- + /**\brief Reduction across a team + */ + template <typename ReducerType> + KOKKOS_INLINE_FUNCTION + typename std::enable_if<is_reducer<ReducerType>::value>::type + team_reduce(ReducerType const& reducer) const noexcept { + team_reduce(reducer, reducer.reference()); + } + + template <typename ReducerType> + KOKKOS_INLINE_FUNCTION + typename std::enable_if<is_reducer<ReducerType>::value>::type + team_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) const noexcept { + using value_type = typename ReducerType::value_type; + + // We need to chunk up the whole reduction because we might not have + // allocated enough memory. + const int maximum_work_range = + std::min<int>(m_team_reduce_size / sizeof(value_type), team_size()); + + int smaller_power_of_two = 1; + while ((smaller_power_of_two << 1) < maximum_work_range) + smaller_power_of_two <<= 1; + + const int idx = team_rank(); + auto reduction_array = static_cast<value_type*>(m_team_reduce); + + // Load values into the first maximum_work_range values of the reduction + // array in chunks. This means that only threads with an id in the + // corresponding chunk load values and the reduction is always done by the + // first smaller_power_of_two threads. + if (idx < maximum_work_range) reduction_array[idx] = value; + m_item.barrier(sycl::access::fence_space::local_space); + + for (int start = maximum_work_range; start < team_size(); + start += maximum_work_range) { + if (idx >= start && + idx < std::min(start + maximum_work_range, team_size())) + reducer.join(reduction_array[idx - start], value); + m_item.barrier(sycl::access::fence_space::local_space); + } + + for (int stride = smaller_power_of_two; stride > 0; stride >>= 1) { + if (idx < stride && idx + stride < maximum_work_range) + reducer.join(reduction_array[idx], reduction_array[idx + stride]); + m_item.barrier(sycl::access::fence_space::local_space); + } + reducer.reference() = reduction_array[0]; + m_item.barrier(sycl::access::fence_space::local_space); + } + + // FIXME_SYCL move somewhere else and combine with other places that do + // parallel_scan + // Exclusive scan returning the total sum. + // n is required to be a power of two and + // temp must point to an array containing the data to be processed + // The accumulated value is returned. + template <typename Type> + static Type prescan(sycl::nd_item<2> m_item, Type* temp, int n) { + int thid = m_item.get_local_id(0); + + // First do a reduction saving intermediate results + for (int stride = 1; stride < n; stride <<= 1) { + auto idx = 2 * stride * (thid + 1) - 1; + if (idx < n) temp[idx] += temp[idx - stride]; + m_item.barrier(sycl::access::fence_space::local_space); + } + + Type total_sum = temp[n - 1]; + m_item.barrier(sycl::access::fence_space::local_space); + + // clear the last element so we get an exclusive scan + if (thid == 0) temp[n - 1] = Type{}; + m_item.barrier(sycl::access::fence_space::local_space); + + // Now add the intermediate results to the remaining items again + for (int stride = n / 2; stride > 0; stride >>= 1) { + auto idx = 2 * stride * (thid + 1) - 1; + if (idx < n) { + Type dummy = temp[idx - stride]; + temp[idx - stride] = temp[idx]; + temp[idx] += dummy; + } + m_item.barrier(sycl::access::fence_space::local_space); + } + + return total_sum; + } + + //-------------------------------------------------------------------------- + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template <typename Type> + KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value, + Type* const global_accum) const { + // We need to chunk up the whole reduction because we might not have + // allocated enough memory. + const int maximum_work_range = + std::min<int>(m_team_reduce_size / sizeof(Type), team_size()); + + int not_greater_power_of_two = 1; + while ((not_greater_power_of_two << 1) < maximum_work_range + 1) + not_greater_power_of_two <<= 1; + + Type intermediate; + Type total{}; + + const int idx = team_rank(); + const auto base_data = static_cast<Type*>(m_team_reduce); + + // Load values into the first not_greater_power_of_two values of the + // reduction array in chunks. This means that only threads with an id in the + // corresponding chunk load values and the reduction is always done by the + // first not_greater_power_of_two threads. + for (int start = 0; start < team_size(); + start += not_greater_power_of_two) { + m_item.barrier(sycl::access::fence_space::local_space); + if (idx >= start && idx < start + not_greater_power_of_two) { + base_data[idx - start] = value; + } + m_item.barrier(sycl::access::fence_space::local_space); + + const Type partial_total = + prescan(m_item, base_data, not_greater_power_of_two); + if (idx >= start && idx < start + not_greater_power_of_two) + intermediate = base_data[idx - start] + total; + if (start == 0) + total = partial_total; + else + total += partial_total; + } + + if (global_accum) { + if (team_size() == idx + 1) { + base_data[team_size()] = atomic_fetch_add(global_accum, total); + } + m_item.barrier(); // Wait for atomic + intermediate += base_data[team_size()]; + } + + return intermediate; + } + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template <typename Type> + KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const { + return this->template team_scan<Type>(value, nullptr); + } + + //---------------------------------------- + + template <typename ReducerType> + KOKKOS_INLINE_FUNCTION static + typename std::enable_if<is_reducer<ReducerType>::value>::type + vector_reduce(ReducerType const& reducer) { + vector_reduce(reducer, reducer.reference()); + } + + template <typename ReducerType> + KOKKOS_INLINE_FUNCTION static + typename std::enable_if<is_reducer<ReducerType>::value>::type + vector_reduce(ReducerType const& /*reducer*/, + typename ReducerType::value_type& /*value*/) { + // FIXME_SYCL + Kokkos::abort("Not implemented!"); + } + + //-------------------------------------------------------------------------- + /**\brief Global reduction across all blocks + * + * Return !0 if reducer contains the final value + */ + template <typename ReducerType> + KOKKOS_INLINE_FUNCTION static + typename std::enable_if<is_reducer<ReducerType>::value, int>::type + global_reduce(ReducerType const& /*reducer*/, + int* const /*global_scratch_flags*/, + void* const /*global_scratch_space*/, void* const /*shmem*/, + int const /*shmem_size*/) { + // FIXME_SYCL + Kokkos::abort("Not implemented!"); + } + + //---------------------------------------- + // Private for the driver + + KOKKOS_INLINE_FUNCTION + SYCLTeamMember(void* shared, const int shared_begin, const int shared_size, + void* scratch_level_1_ptr, const int scratch_level_1_size, + const sycl::nd_item<2> item) + : m_team_reduce(shared), + m_team_shared(static_cast<char*>(shared) + shared_begin, shared_size, + scratch_level_1_ptr, scratch_level_1_size), + m_team_reduce_size(shared_begin), + m_item(item) {} + + public: + // Declare to avoid unused private member warnings which are trigger + // when SFINAE excludes the member function which uses these variables + // Making another class a friend also surpresses these warnings + bool impl_avoid_sfinae_warning() const noexcept { + return m_team_reduce_size > 0 && m_team_reduce != nullptr; + } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template <typename iType> +struct TeamThreadRangeBoundariesStruct<iType, SYCLTeamMember> { + using index_type = iType; + const SYCLTeamMember& member; + const iType start; + const iType end; + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct(const SYCLTeamMember& thread_, iType count) + : member(thread_), start(0), end(count) {} + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct(const SYCLTeamMember& thread_, iType begin_, + iType end_) + : member(thread_), start(begin_), end(end_) {} +}; + +template <typename iType> +struct TeamVectorRangeBoundariesStruct<iType, SYCLTeamMember> { + using index_type = iType; + const SYCLTeamMember& member; + const iType start; + const iType end; + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct(const SYCLTeamMember& thread_, + const iType& count) + : member(thread_), start(0), end(count) {} + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct(const SYCLTeamMember& thread_, + const iType& begin_, const iType& end_) + : member(thread_), start(begin_), end(end_) {} +}; + +template <typename iType> +struct ThreadVectorRangeBoundariesStruct<iType, SYCLTeamMember> { + using index_type = iType; + const SYCLTeamMember& member; + const index_type start; + const index_type end; + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct(const SYCLTeamMember& thread, + index_type count) + : member(thread), start(static_cast<index_type>(0)), end(count) {} + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct(const SYCLTeamMember& thread, + index_type arg_begin, index_type arg_end) + : member(thread), start(arg_begin), end(arg_end) {} +}; + +} // namespace Impl + +template <typename iType> +KOKKOS_INLINE_FUNCTION + Impl::TeamThreadRangeBoundariesStruct<iType, Impl::SYCLTeamMember> + TeamThreadRange(const Impl::SYCLTeamMember& thread, iType count) { + return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( + thread, count); +} + +template <typename iType1, typename iType2> +KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, Impl::SYCLTeamMember> +TeamThreadRange(const Impl::SYCLTeamMember& thread, iType1 begin, iType2 end) { + using iType = typename std::common_type<iType1, iType2>::type; + return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( + thread, iType(begin), iType(end)); +} + +template <typename iType> +KOKKOS_INLINE_FUNCTION + Impl::TeamVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember> + TeamVectorRange(const Impl::SYCLTeamMember& thread, const iType& count) { + return Impl::TeamVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( + thread, count); +} + +template <typename iType1, typename iType2> +KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, Impl::SYCLTeamMember> +TeamVectorRange(const Impl::SYCLTeamMember& thread, const iType1& begin, + const iType2& end) { + using iType = typename std::common_type<iType1, iType2>::type; + return Impl::TeamVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( + thread, iType(begin), iType(end)); +} + +template <typename iType> +KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember> + ThreadVectorRange(const Impl::SYCLTeamMember& thread, iType count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( + thread, count); +} + +template <typename iType1, typename iType2> +KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, Impl::SYCLTeamMember> +ThreadVectorRange(const Impl::SYCLTeamMember& thread, iType1 arg_begin, + iType2 arg_end) { + using iType = typename std::common_type<iType1, iType2>::type; + return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>( + thread, iType(arg_begin), iType(arg_end)); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::SYCLTeamMember> PerTeam( + const Impl::SYCLTeamMember& thread) { + return Impl::ThreadSingleStruct<Impl::SYCLTeamMember>(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::SYCLTeamMember> PerThread( + const Impl::SYCLTeamMember& thread) { + return Impl::VectorSingleStruct<Impl::SYCLTeamMember>(thread); +} + +//---------------------------------------------------------------------------- + +/** \brief Inter-thread parallel_for. + * + * Executes closure(iType i) for each i=[0..N). + * + * The range [0..N) is mapped to all threads of the calling thread team. + */ +template <typename iType, class Closure> +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::TeamThreadRangeBoundariesStruct<iType, Impl::SYCLTeamMember>& + loop_boundaries, + const Closure& closure) { + // FIXME_SYCL Fix for vector_length>1. + for (iType i = loop_boundaries.start + + loop_boundaries.member.item().get_local_id(0); + i < loop_boundaries.end; + i += loop_boundaries.member.item().get_local_range(0)) + closure(i); +} + +//---------------------------------------------------------------------------- + +/** \brief Inter-thread parallel_reduce with a reducer. + * + * Executes closure(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the + * calling thread team and a summation of val is + * performed and put into result. + */ +template <typename iType, class Closure, class ReducerType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type + parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { + typename ReducerType::value_type value; + reducer.init(value); + + // FIXME_SYCL Fix for vector_length>1. + for (iType i = loop_boundaries.start + + loop_boundaries.member.item().get_local_id(0); + i < loop_boundaries.end; + i += loop_boundaries.member.item().get_local_range(0)) { + closure(i, value); + } + + loop_boundaries.member.team_reduce(reducer, value); +} + +/** \brief Inter-thread parallel_reduce assuming summation. + * + * Executes closure(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the + * calling thread team and a summation of val is + * performed and put into result. + */ +template <typename iType, class Closure, typename ValueType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type + parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { + ValueType val; + Kokkos::Sum<ValueType> reducer(val); + + reducer.init(reducer.reference()); + + // FIXME_SYCL Fix for vector_length>1. + for (iType i = loop_boundaries.start + + loop_boundaries.member.item().get_local_id(0); + i < loop_boundaries.end; + i += loop_boundaries.member.item().get_local_range(0)) { + closure(i, val); + } + + loop_boundaries.member.team_reduce(reducer, val); + result = reducer.reference(); +} + +/** \brief Inter-thread parallel exclusive prefix sum. + * + * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) + * + * The range [0..N) is mapped to each rank in the team (whose global rank is + * less than N) and a scan operation is performed. The last call to closure has + * final == true. + */ +// This is the same code as in CUDA and largely the same as in OpenMPTarget +template <typename iType, typename FunctorType> +KOKKOS_INLINE_FUNCTION void parallel_scan( + const Impl::TeamThreadRangeBoundariesStruct<iType, Impl::SYCLTeamMember>& + loop_bounds, + const FunctorType& lambda) { + // Extract value_type from lambda + using value_type = typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, void, + FunctorType>::value_type; + + const auto start = loop_bounds.start; + const auto end = loop_bounds.end; + auto& member = loop_bounds.member; + const auto team_size = member.team_size(); + const auto team_rank = member.team_rank(); + const auto nchunk = (end - start + team_size - 1) / team_size; + value_type accum = 0; + // each team has to process one or more chunks of the prefix scan + for (iType i = 0; i < nchunk; ++i) { + auto ii = start + i * team_size + team_rank; + // local accumulation for this chunk + value_type local_accum = 0; + // user updates value with prefix value + if (ii < loop_bounds.end) lambda(ii, local_accum, false); + // perform team scan + local_accum = member.team_scan(local_accum); + // add this blocks accum to total accumulation + auto val = accum + local_accum; + // user updates their data with total accumulation + if (ii < loop_bounds.end) lambda(ii, val, true); + // the last value needs to be propogated to next chunk + if (team_rank == team_size - 1) accum = val; + // broadcast last value to rest of the team + member.team_broadcast(accum, team_size - 1); + } +} + +template <typename iType, class Closure> +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::TeamVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>& + loop_boundaries, + const Closure& closure) { + // FIXME_SYCL adapt for vector_length != 1 + for (iType i = loop_boundaries.start + + loop_boundaries.member.item().get_local_id(0); + i < loop_boundaries.end; + i += loop_boundaries.member.item().get_local_range(0)) + closure(i); +} + +template <typename iType, class Closure, class ReducerType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type + parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { + // FIXME_SYCL adapt for vector_length != 1 + typename ReducerType::value_type value; + reducer.init(value); + + for (iType i = loop_boundaries.start + + loop_boundaries.member.item().get_local_id(0); + i < loop_boundaries.end; + i += loop_boundaries.member.item().get_local_range(0)) { + closure(i, value); + } + + loop_boundaries.member.team_reduce(reducer, value); +} + +template <typename iType, class Closure, typename ValueType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<!Kokkos::is_reducer<ValueType>::value>::type + parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { + // FIXME_SYCL adapt for vector_length != 1 + ValueType val; + Kokkos::Sum<ValueType> reducer(val); + + reducer.init(reducer.reference()); + + for (iType i = loop_boundaries.start + + loop_boundaries.member.item().get_local_id(0); + i < loop_boundaries.end; + i += loop_boundaries.member.item().get_local_range(0)) { + closure(i, val); + } + + loop_boundaries.member.team_reduce(reducer, val); + result = reducer.reference(); +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel_for. + * + * Executes closure(iType i) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes of the calling thread. + */ +template <typename iType, class Closure> +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>& + loop_boundaries, + const Closure& closure) { + // FIXME_SYC: adapt for vector_length!=1 + for (auto i = loop_boundaries.start; i != loop_boundaries.end; ++i) + closure(i); +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel_reduce. + * + * Calls closure(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed using += + * and output into result. + * + * The identity value for the += operator is assumed to be the default + * constructed value. + */ +template <typename iType, class Closure, class ReducerType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<is_reducer<ReducerType>::value>::type + parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember> const& loop_boundaries, + Closure const& closure, ReducerType const& reducer) { + // FIXME_SYCL adapt for vector_length != 1 + reducer.init(reducer.reference()); + + for (iType i = loop_boundaries.start; i < loop_boundaries.end; ++i) { + closure(i, reducer.reference()); + } +} + +/** \brief Intra-thread vector parallel_reduce. + * + * Calls closure(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed using += + * and output into result. + * + * The identity value for the += operator is assumed to be the default + * constructed value. + */ +template <typename iType, class Closure, typename ValueType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<!is_reducer<ValueType>::value>::type + parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember> const& loop_boundaries, + Closure const& closure, ValueType& result) { + // FIXME_SYCL adapt for vector_length != 1 + result = ValueType(); + + for (iType i = loop_boundaries.start; i < loop_boundaries.end; ++i) { + closure(i, result); + } +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel exclusive prefix sum with reducer. + * + * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes in the + * thread and a scan operation is performed. + * The last call to closure has final == true. + */ +template <typename iType, class Closure, typename ReducerType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type + parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::SYCLTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { + // FIXME_SYCL modify for vector_length!=1 + using value_type = typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure>::value_type; + + value_type accum; + reducer.init(accum); + + for (iType i = loop_boundaries.start; i < loop_boundaries.end; ++i) { + closure(i, accum, true); + } +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. + * + * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes in the + * thread and a scan operation is performed. + * The last call to closure has final == true. + */ +template <typename iType, class Closure> +KOKKOS_INLINE_FUNCTION void parallel_scan( + const Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::SYCLTeamMember>& + loop_boundaries, + const Closure& closure) { + using value_type = typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure>::value_type; + value_type dummy; + parallel_scan(loop_boundaries, closure, Kokkos::Sum<value_type>{dummy}); +} + +} // namespace Kokkos + +namespace Kokkos { + +template <class FunctorType> +KOKKOS_INLINE_FUNCTION void single( + const Impl::VectorSingleStruct<Impl::SYCLTeamMember>& single_struct, + const FunctorType& lambda) { + if (single_struct.team_member.item().get_local_id(1) == 0) lambda(); +} + +template <class FunctorType> +KOKKOS_INLINE_FUNCTION void single( + const Impl::ThreadSingleStruct<Impl::SYCLTeamMember>& single_struct, + const FunctorType& lambda) { + if (single_struct.team_member.team_rank() == 0) lambda(); +} + +template <class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION void single( + const Impl::VectorSingleStruct<Impl::SYCLTeamMember>& single_struct, + const FunctorType& lambda, ValueType& val) { + if (single_struct.team_member.item().get_local_id(1) == 0) lambda(val); +} + +template <class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION void single( + const Impl::ThreadSingleStruct<Impl::SYCLTeamMember>& single_struct, + const FunctorType& lambda, ValueType& val) { + if (single_struct.team_member.team_rank() == 0) lambda(val); +} + +} // namespace Kokkos + +#endif + +#endif /* #ifndef KOKKOS_SYCL_TEAM_HPP */ diff --git a/packages/kokkos/core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp new file mode 100644 index 0000000000000000000000000000000000000000..141a692f6090555cf129997a64bc9e99941f830d --- /dev/null +++ b/packages/kokkos/core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp @@ -0,0 +1,134 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SYCL_UNIQUE_TOKEN_HPP +#define KOKKOS_SYCL_UNIQUE_TOKEN_HPP + +#include <impl/Kokkos_ConcurrentBitset.hpp> +#include <Kokkos_SYCL_Space.hpp> +#include <Kokkos_UniqueToken.hpp> + +namespace Kokkos { +namespace Experimental { + +// both global and instance Unique Tokens are implemented in the same way +template <> +class UniqueToken<SYCL, UniqueTokenScope::Global> { + protected: + uint32_t volatile* m_buffer; + uint32_t m_count; + + public: + using execution_space = SYCL; + using size_type = int32_t; + + explicit UniqueToken(execution_space const& = execution_space()) + : m_buffer(Impl::SYCLInternal::singleton().m_scratchConcurrentBitset), + m_count(SYCL::concurrency()) {} + + KOKKOS_DEFAULTED_FUNCTION + UniqueToken(const UniqueToken&) = default; + + KOKKOS_DEFAULTED_FUNCTION + UniqueToken(UniqueToken&&) = default; + + KOKKOS_DEFAULTED_FUNCTION + UniqueToken& operator=(const UniqueToken&) = default; + + KOKKOS_DEFAULTED_FUNCTION + UniqueToken& operator=(UniqueToken&&) = default; + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + size_type size() const noexcept { return m_count; } + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + size_type acquire() const { + const Kokkos::pair<int, int> result = + Kokkos::Impl::concurrent_bitset::acquire_bounded( + m_buffer, m_count +#if defined(KOKKOS_ARCH_INTEL_GEN) + , + Kokkos::Impl::clock_tic() % m_count +#endif + ); + + if (result.first < 0) { + Kokkos::abort( + "UniqueToken<SYCL> failure to acquire tokens, no tokens available"); + } + + return result.first; + } + + /// \brief release an acquired value + KOKKOS_INLINE_FUNCTION + void release(size_type i) const noexcept { + Kokkos::Impl::concurrent_bitset::release(m_buffer, i); + } +}; + +template <> +class UniqueToken<SYCL, UniqueTokenScope::Instance> + : public UniqueToken<SYCL, UniqueTokenScope::Global> { + View<uint32_t*, SYCLDeviceUSMSpace> m_buffer_view; + + public: + explicit UniqueToken(execution_space const& arg = execution_space()) + : UniqueToken<SYCL, UniqueTokenScope::Global>(arg) {} + + UniqueToken(size_type max_size, execution_space const& = execution_space()) + : m_buffer_view( + "UniqueToken::m_buffer_view", + ::Kokkos::Impl::concurrent_bitset::buffer_bound(max_size)) { + m_buffer = m_buffer_view.data(); + m_count = max_size; + } +}; + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 443ed7b721932fd14ddbd0f3a116d86b62ab4c66..92bd671bd53bf89482aee39cdd34b3391e9a01a2 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -111,7 +111,7 @@ namespace Impl { void execute_function_noop(ThreadsExec &, const void *) {} -void ThreadsExec::driver(void) { +void ThreadsExec::driver() { SharedAllocationRecord<void, void>::tracking_enable(); ThreadsExec this_thread; @@ -427,7 +427,7 @@ void ThreadsExec::execute_resize_scratch(ThreadsExec &exec, const void *) { // Allocate tracked memory: { Record *const r = - Record::allocate(Kokkos::HostSpace(), "thread_scratch", + Record::allocate(Kokkos::HostSpace(), "Kokkos::thread_scratch", s_threads_process.m_scratch_thread_end); Record::increment(r); diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index fb7736b478197ca79ad2548ab8c195f121d1a4ce..e0ae43dd87ec337d24f659e3da74a662f31dfb84 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -891,14 +891,16 @@ KOKKOS_INLINE_FUNCTION thread, count); } -template <typename iType> -KOKKOS_INLINE_FUNCTION - Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::ThreadsExecTeamMember> - ThreadVectorRange(const Impl::ThreadsExecTeamMember& thread, - const iType& arg_begin, const iType& arg_end) { +template <typename iType1, typename iType2> +KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, + Impl::ThreadsExecTeamMember> +ThreadVectorRange(const Impl::ThreadsExecTeamMember& thread, + const iType1& arg_begin, const iType2& arg_end) { + using iType = typename std::common_type<iType1, iType2>::type; return Impl::ThreadVectorRangeBoundariesStruct<iType, Impl::ThreadsExecTeamMember>( - thread, arg_begin, arg_end); + thread, iType(arg_begin), iType(arg_end)); } KOKKOS_INLINE_FUNCTION @@ -1095,6 +1097,27 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( } } +/** \brief Intra-thread vector parallel scan with reducer + * + */ +template <typename iType, class FunctorType, typename ReducerType> +KOKKOS_INLINE_FUNCTION + typename std::enable_if<Kokkos::is_reducer<ReducerType>::value>::type + parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::ThreadsExecTeamMember>& loop_boundaries, + const FunctorType& lambda, const ReducerType& reducer) { + typename ReducerType::value_type scan_val; + reducer.init(scan_val); + +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, true); + } +} + } // namespace Kokkos namespace Kokkos { diff --git a/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp b/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp index eac96998650c707dc8c6ab9a905576e6c781ee88..df09e9e7215310e26d72009cc32f7e5339dfdc5b 100644 --- a/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp +++ b/packages/kokkos/core/src/decl/Kokkos_Declare_CUDA.hpp @@ -47,6 +47,15 @@ #if defined(KOKKOS_ENABLE_CUDA) #include <Kokkos_Cuda.hpp> +#include <Cuda/Kokkos_Cuda_Parallel.hpp> +#include <Cuda/Kokkos_Cuda_KernelLaunch.hpp> +#include <Cuda/Kokkos_Cuda_Instance.hpp> +#include <Cuda/Kokkos_Cuda_View.hpp> +#include <Cuda/Kokkos_Cuda_Team.hpp> +#include <Cuda/Kokkos_Cuda_Parallel.hpp> +#include <Cuda/Kokkos_Cuda_Task.hpp> +#include <Cuda/Kokkos_Cuda_MDRangePolicy.hpp> +#include <Cuda/Kokkos_Cuda_UniqueToken.hpp> #endif #endif diff --git a/packages/kokkos/core/src/decl/Kokkos_Declare_OPENMPTARGET.hpp b/packages/kokkos/core/src/decl/Kokkos_Declare_OPENMPTARGET.hpp index 769585fc7392300cc01a8d29f494236ed892fd9d..b193d1e741bc19d1725994839c682fa84f2267f9 100644 --- a/packages/kokkos/core/src/decl/Kokkos_Declare_OPENMPTARGET.hpp +++ b/packages/kokkos/core/src/decl/Kokkos_Declare_OPENMPTARGET.hpp @@ -48,6 +48,7 @@ #if defined(KOKKOS_ENABLE_OPENMPTARGET) #include <Kokkos_OpenMPTarget.hpp> #include <Kokkos_OpenMPTargetSpace.hpp> +#include <OpenMPTarget/Kokkos_OpenMPTarget_UniqueToken.hpp> #endif #endif diff --git a/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp b/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp index 4981435c829881d7aa2e0d0c1ae7165f4e689b0b..92cd85bcae8b9e8c65d37b9308033a0748c8d3aa 100644 --- a/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp +++ b/packages/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp @@ -48,9 +48,12 @@ #if defined(KOKKOS_ENABLE_SYCL) #include <Kokkos_SYCL.hpp> #include <SYCL/Kokkos_SYCL_DeepCopy.hpp> +#include <SYCL/Kokkos_SYCL_MDRangePolicy.hpp> #include <SYCL/Kokkos_SYCL_Parallel_Range.hpp> #include <SYCL/Kokkos_SYCL_Parallel_Reduce.hpp> #include <SYCL/Kokkos_SYCL_Parallel_Scan.hpp> +#include <SYCL/Kokkos_SYCL_Parallel_Team.hpp> +#include <SYCL/Kokkos_SYCL_UniqueToken.hpp> #endif #endif diff --git a/packages/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp b/packages/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp index 0465c380cbac1be27ee9769ab7f93f87fb315aa5..1a4e7b482c44b93f87ed981682e3895cf5a534ff 100644 --- a/packages/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp +++ b/packages/kokkos/core/src/fwd/Kokkos_Fwd_HIP.hpp @@ -48,8 +48,9 @@ #if defined(KOKKOS_ENABLE_HIP) namespace Kokkos { namespace Experimental { -class HIPSpace; ///< Memory space on HIP GPU -class HIP; ///< Execution space for HIP GPU +class HIPSpace; ///< Memory space on HIP GPU +class HIPHostPinnedSpace; ///< Memory space on Host accessible to HIP GPU +class HIP; ///< Execution space for HIP GPU } // namespace Experimental } // namespace Kokkos #endif diff --git a/packages/kokkos/core/src/fwd/Kokkos_Fwd_SYCL.hpp b/packages/kokkos/core/src/fwd/Kokkos_Fwd_SYCL.hpp index fc21b942c29527e8191151c96c547551291409e1..7754daa8a0189a3d0708ce6505955be4b76b2d61 100644 --- a/packages/kokkos/core/src/fwd/Kokkos_Fwd_SYCL.hpp +++ b/packages/kokkos/core/src/fwd/Kokkos_Fwd_SYCL.hpp @@ -48,7 +48,10 @@ #if defined(KOKKOS_ENABLE_SYCL) namespace Kokkos { namespace Experimental { -class SYCLDeviceUSMSpace; ///< Memory space on SYCL device +class SYCLDeviceUSMSpace; ///< Memory space on SYCL device, not accessible from + ///< the host +class SYCLSharedUSMSpace; ///< Memory space accessible from both the SYCL + ///< device and the host class SYCL; ///< Execution space for SYCL } // namespace Experimental } // namespace Kokkos diff --git a/packages/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp b/packages/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp index d9f02b47acaac5a0611878c180bcb93bcc0d57c6..7f72b3983f57c9adea157cf70d815339696cd986 100644 --- a/packages/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp +++ b/packages/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp @@ -1582,7 +1582,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -1600,7 +1600,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - Tile_Loop_Type<RP::rank, (RP::inner_direction == RP::Left), index_type, + Tile_Loop_Type<RP::rank, (RP::inner_direction == Iterate::Left), index_type, Tag>::apply(m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims); } @@ -1618,7 +1618,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -1636,7 +1636,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_2L(index_type, m_tiledims) { apply(LOOP_ARGS_2); } @@ -1644,7 +1644,7 @@ struct HostIterateTile< // #pragma simd LOOP_2L(index_type, m_tiledims) { apply(LOOP_ARGS_2); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -1653,7 +1653,7 @@ struct HostIterateTile< // #pragma simd LOOP_2R(index_type, m_tiledims) { apply(LOOP_ARGS_2); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 2 @@ -1662,7 +1662,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -1680,7 +1680,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_3L(index_type, m_tiledims) { apply(LOOP_ARGS_3); } @@ -1688,7 +1688,7 @@ struct HostIterateTile< // #pragma simd LOOP_3L(index_type, m_tiledims) { apply(LOOP_ARGS_3); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -1697,7 +1697,7 @@ struct HostIterateTile< // #pragma simd LOOP_3R(index_type, m_tiledims) { apply(LOOP_ARGS_3); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 3 @@ -1706,7 +1706,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -1724,7 +1724,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_4L(index_type, m_tiledims) { apply(LOOP_ARGS_4); } @@ -1732,7 +1732,7 @@ struct HostIterateTile< // #pragma simd LOOP_4L(index_type, m_tiledims) { apply(LOOP_ARGS_4); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -1741,7 +1741,7 @@ struct HostIterateTile< // #pragma simd LOOP_4R(index_type, m_tiledims) { apply(LOOP_ARGS_4); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 4 @@ -1750,7 +1750,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -1768,7 +1768,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_5L(index_type, m_tiledims) { apply(LOOP_ARGS_5); } @@ -1776,7 +1776,7 @@ struct HostIterateTile< // #pragma simd LOOP_5L(index_type, m_tiledims) { apply(LOOP_ARGS_5); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -1785,7 +1785,7 @@ struct HostIterateTile< // #pragma simd LOOP_5R(index_type, m_tiledims) { apply(LOOP_ARGS_5); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 5 @@ -1794,7 +1794,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -1812,7 +1812,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_6L(index_type, m_tiledims) { apply(LOOP_ARGS_6); } @@ -1820,7 +1820,7 @@ struct HostIterateTile< // #pragma simd LOOP_6L(index_type, m_tiledims) { apply(LOOP_ARGS_6); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -1829,7 +1829,7 @@ struct HostIterateTile< // #pragma simd LOOP_6R(index_type, m_tiledims) { apply(LOOP_ARGS_6); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 6 @@ -1838,7 +1838,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -1856,7 +1856,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_7L(index_type, m_tiledims) { apply(LOOP_ARGS_7); } @@ -1864,7 +1864,7 @@ struct HostIterateTile< // #pragma simd LOOP_7L(index_type, m_tiledims) { apply(LOOP_ARGS_7); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -1873,7 +1873,7 @@ struct HostIterateTile< // #pragma simd LOOP_7R(index_type, m_tiledims) { apply(LOOP_ARGS_7); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 7 @@ -1882,7 +1882,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -1900,7 +1900,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_8L(index_type, m_tiledims) { apply(LOOP_ARGS_8); } @@ -1908,7 +1908,7 @@ struct HostIterateTile< // #pragma simd LOOP_8L(index_type, m_tiledims) { apply(LOOP_ARGS_8); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -1917,7 +1917,7 @@ struct HostIterateTile< // #pragma simd LOOP_8R(index_type, m_tiledims) { apply(LOOP_ARGS_8); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 8 #endif @@ -2003,7 +2003,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2021,7 +2021,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - Tile_Loop_Type<RP::rank, (RP::inner_direction == RP::Left), index_type, + Tile_Loop_Type<RP::rank, (RP::inner_direction == Iterate::Left), index_type, Tag>::apply(m_v, m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims); } @@ -2039,7 +2039,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2057,7 +2057,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_2L(index_type, m_tiledims) { apply(LOOP_ARGS_2); } @@ -2065,7 +2065,7 @@ struct HostIterateTile< // #pragma simd LOOP_2L(index_type, m_tiledims) { apply(LOOP_ARGS_2); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2074,7 +2074,7 @@ struct HostIterateTile< // #pragma simd LOOP_2R(index_type, m_tiledims) { apply(LOOP_ARGS_2); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 2 @@ -2083,7 +2083,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2101,7 +2101,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_3L(index_type, m_tiledims) { apply(LOOP_ARGS_3); } @@ -2109,7 +2109,7 @@ struct HostIterateTile< // #pragma simd LOOP_3L(index_type, m_tiledims) { apply(LOOP_ARGS_3); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2118,7 +2118,7 @@ struct HostIterateTile< // #pragma simd LOOP_3R(index_type, m_tiledims) { apply(LOOP_ARGS_3); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 3 @@ -2127,7 +2127,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2145,7 +2145,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_4L(index_type, m_tiledims) { apply(LOOP_ARGS_4); } @@ -2153,7 +2153,7 @@ struct HostIterateTile< // #pragma simd LOOP_4L(index_type, m_tiledims) { apply(LOOP_ARGS_4); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2162,7 +2162,7 @@ struct HostIterateTile< // #pragma simd LOOP_4R(index_type, m_tiledims) { apply(LOOP_ARGS_4); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 4 @@ -2171,7 +2171,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2189,7 +2189,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_5L(index_type, m_tiledims) { apply(LOOP_ARGS_5); } @@ -2197,7 +2197,7 @@ struct HostIterateTile< // #pragma simd LOOP_5L(index_type, m_tiledims) { apply(LOOP_ARGS_5); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2206,7 +2206,7 @@ struct HostIterateTile< // #pragma simd LOOP_5R(index_type, m_tiledims) { apply(LOOP_ARGS_5); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 5 @@ -2215,7 +2215,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2233,7 +2233,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_6L(index_type, m_tiledims) { apply(LOOP_ARGS_6); } @@ -2241,7 +2241,7 @@ struct HostIterateTile< // #pragma simd LOOP_6L(index_type, m_tiledims) { apply(LOOP_ARGS_6); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2250,7 +2250,7 @@ struct HostIterateTile< // #pragma simd LOOP_6R(index_type, m_tiledims) { apply(LOOP_ARGS_6); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 6 @@ -2259,7 +2259,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2277,7 +2277,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_7L(index_type, m_tiledims) { apply(LOOP_ARGS_7); } @@ -2285,7 +2285,7 @@ struct HostIterateTile< // #pragma simd LOOP_7L(index_type, m_tiledims) { apply(LOOP_ARGS_7); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2294,7 +2294,7 @@ struct HostIterateTile< // #pragma simd LOOP_7R(index_type, m_tiledims) { apply(LOOP_ARGS_7); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 7 @@ -2303,7 +2303,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2321,7 +2321,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_8L(index_type, m_tiledims) { apply(LOOP_ARGS_8); } @@ -2329,7 +2329,7 @@ struct HostIterateTile< // #pragma simd LOOP_8L(index_type, m_tiledims) { apply(LOOP_ARGS_8); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2338,7 +2338,7 @@ struct HostIterateTile< // #pragma simd LOOP_8R(index_type, m_tiledims) { apply(LOOP_ARGS_8); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 8 #endif @@ -2426,7 +2426,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2444,7 +2444,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - Tile_Loop_Type<RP::rank, (RP::inner_direction == RP::Left), index_type, + Tile_Loop_Type<RP::rank, (RP::inner_direction == Iterate::Left), index_type, Tag>::apply(m_v, m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims); } @@ -2462,7 +2462,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2480,7 +2480,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_2L(index_type, m_tiledims) { apply(LOOP_ARGS_2); } @@ -2488,7 +2488,7 @@ struct HostIterateTile< // #pragma simd LOOP_2L(index_type, m_tiledims) { apply(LOOP_ARGS_2); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2497,7 +2497,7 @@ struct HostIterateTile< // #pragma simd LOOP_2R(index_type, m_tiledims) { apply(LOOP_ARGS_2); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 2 @@ -2506,7 +2506,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2524,7 +2524,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_3L(index_type, m_tiledims) { apply(LOOP_ARGS_3); } @@ -2532,7 +2532,7 @@ struct HostIterateTile< // #pragma simd LOOP_3L(index_type, m_tiledims) { apply(LOOP_ARGS_3); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2541,7 +2541,7 @@ struct HostIterateTile< // #pragma simd LOOP_3R(index_type, m_tiledims) { apply(LOOP_ARGS_3); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 3 @@ -2550,7 +2550,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2568,7 +2568,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_4L(index_type, m_tiledims) { apply(LOOP_ARGS_4); } @@ -2576,7 +2576,7 @@ struct HostIterateTile< // #pragma simd LOOP_4L(index_type, m_tiledims) { apply(LOOP_ARGS_4); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2585,7 +2585,7 @@ struct HostIterateTile< // #pragma simd LOOP_4R(index_type, m_tiledims) { apply(LOOP_ARGS_4); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 4 @@ -2594,7 +2594,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2612,7 +2612,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_5L(index_type, m_tiledims) { apply(LOOP_ARGS_5); } @@ -2620,7 +2620,7 @@ struct HostIterateTile< // #pragma simd LOOP_5L(index_type, m_tiledims) { apply(LOOP_ARGS_5); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2629,7 +2629,7 @@ struct HostIterateTile< // #pragma simd LOOP_5R(index_type, m_tiledims) { apply(LOOP_ARGS_5); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 5 @@ -2638,7 +2638,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2656,7 +2656,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_6L(index_type, m_tiledims) { apply(LOOP_ARGS_6); } @@ -2664,7 +2664,7 @@ struct HostIterateTile< // #pragma simd LOOP_6L(index_type, m_tiledims) { apply(LOOP_ARGS_6); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2673,7 +2673,7 @@ struct HostIterateTile< // #pragma simd LOOP_6R(index_type, m_tiledims) { apply(LOOP_ARGS_6); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 6 @@ -2682,7 +2682,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2700,7 +2700,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_7L(index_type, m_tiledims) { apply(LOOP_ARGS_7); } @@ -2708,7 +2708,7 @@ struct HostIterateTile< // #pragma simd LOOP_7L(index_type, m_tiledims) { apply(LOOP_ARGS_7); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2717,7 +2717,7 @@ struct HostIterateTile< // #pragma simd LOOP_7R(index_type, m_tiledims) { apply(LOOP_ARGS_7); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 7 @@ -2726,7 +2726,7 @@ struct HostIterateTile< point_type m_offset; point_type m_tiledims; - if (RP::outer_direction == RP::Left) { + if (RP::outer_direction == Iterate::Left) { for (int i = 0; i < RP::rank; ++i) { m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i]; @@ -2744,7 +2744,7 @@ struct HostIterateTile< // partial tile dims const bool full_tile = check_iteration_bounds(m_tiledims, m_offset); - if (RP::inner_direction == RP::Left) { + if (RP::inner_direction == Iterate::Left) { if (full_tile) { // #pragma simd LOOP_8L(index_type, m_tiledims) { apply(LOOP_ARGS_8); } @@ -2752,7 +2752,7 @@ struct HostIterateTile< // #pragma simd LOOP_8L(index_type, m_tiledims) { apply(LOOP_ARGS_8); } } - } // end RP::Left + } // end Iterate::Left else { if (full_tile) { // #pragma simd @@ -2761,7 +2761,7 @@ struct HostIterateTile< // #pragma simd LOOP_8R(index_type, m_tiledims) { apply(LOOP_ARGS_8); } } - } // end RP::Right + } // end Iterate::Right } // end op() rank == 8 #endif diff --git a/packages/kokkos/core/src/impl/KokkosExp_IterateTileGPU.hpp b/packages/kokkos/core/src/impl/KokkosExp_IterateTileGPU.hpp index 45e53d3a4bc9fe4c439ac9793f8185fa1aa8258c..688afcc107e4e4ff93a2b415c8209d29bf4c0ba2 100644 --- a/packages/kokkos/core/src/impl/KokkosExp_IterateTileGPU.hpp +++ b/packages/kokkos/core/src/impl/KokkosExp_IterateTileGPU.hpp @@ -57,90 +57,73 @@ namespace Kokkos { namespace Impl { +#ifdef KOKKOS_ENABLE_SYCL +template <typename index_type> +struct EmulateCUDADim3 { + index_type x; + index_type y; + index_type z; +}; +#endif + +template <class Tag, class Functor, class... Args> +KOKKOS_IMPL_FORCEINLINE_FUNCTION std::enable_if_t<std::is_void<Tag>::value> +_tag_invoke(Functor const& f, Args&&... args) { + f((Args &&) args...); +} + +template <class Tag, class Functor, class... Args> +KOKKOS_IMPL_FORCEINLINE_FUNCTION std::enable_if_t<!std::is_void<Tag>::value> +_tag_invoke(Functor const& f, Args&&... args) { + f(Tag{}, (Args &&) args...); +} + +template <class Tag, class Functor, class T, size_t N, size_t... Idxs, + class... Args> +KOKKOS_IMPL_FORCEINLINE_FUNCTION void _tag_invoke_array_helper( + Functor const& f, T (&vals)[N], std::integer_sequence<size_t, Idxs...>, + Args&&... args) { + _tag_invoke<Tag>(f, vals[Idxs]..., (Args &&) args...); +} + +template <class Tag, class Functor, class T, size_t N, class... Args> +KOKKOS_IMPL_FORCEINLINE_FUNCTION void _tag_invoke_array(Functor const& f, + T (&vals)[N], + Args&&... args) { + _tag_invoke_array_helper<Tag>(f, vals, std::make_index_sequence<N>{}, + (Args &&) args...); +} + // ------------------------------------------------------------------ // // ParallelFor iteration pattern template <int N, typename PolicyType, typename Functor, typename Tag> struct DeviceIterateTile; // Rank 2 -// Specializations for void tag type -template <typename PolicyType, typename Functor> -struct DeviceIterateTile<2, PolicyType, Functor, void> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_) - : m_policy(policy_), m_func(f_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (index_type tile_id1 = static_cast<index_type>(blockIdx.y); - tile_id1 < m_policy.m_tile_end[1]; tile_id1 += gridDim.y) { - const index_type offset_1 = - tile_id1 * m_policy.m_tile[1] + - static_cast<index_type>(threadIdx.y) + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - static_cast<index_type>(threadIdx.y) < m_policy.m_tile[1]) { - for (index_type tile_id0 = static_cast<index_type>(blockIdx.x); - tile_id0 < m_policy.m_tile_end[0]; tile_id0 += gridDim.x) { - const index_type offset_0 = - tile_id0 * m_policy.m_tile[0] + - static_cast<index_type>(threadIdx.x) + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - static_cast<index_type>(threadIdx.x) < m_policy.m_tile[0]) { - m_func(offset_0, offset_1); - } - } - } - } - } - // LR - else { - for (index_type tile_id0 = static_cast<index_type>(blockIdx.x); - tile_id0 < m_policy.m_tile_end[0]; tile_id0 += gridDim.x) { - const index_type offset_0 = - tile_id0 * m_policy.m_tile[0] + - static_cast<index_type>(threadIdx.x) + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - static_cast<index_type>(threadIdx.x) < m_policy.m_tile[0]) { - for (index_type tile_id1 = static_cast<index_type>(blockIdx.y); - tile_id1 < m_policy.m_tile_end[1]; tile_id1 += gridDim.y) { - const index_type offset_1 = - tile_id1 * m_policy.m_tile[1] + - static_cast<index_type>(threadIdx.y) + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - static_cast<index_type>(threadIdx.y) < m_policy.m_tile[1]) { - m_func(offset_0, offset_1); - } - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; -}; - -// Specializations for tag type template <typename PolicyType, typename Functor, typename Tag> struct DeviceIterateTile<2, PolicyType, Functor, Tag> { using index_type = typename PolicyType::index_type; +#ifdef KOKKOS_ENABLE_SYCL + KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile( + const PolicyType& policy_, const Functor& f_, + const EmulateCUDADim3<index_type> gridDim_, + const EmulateCUDADim3<index_type> blockIdx_, + const EmulateCUDADim3<index_type> threadIdx_) + : m_policy(policy_), + m_func(f_), + gridDim(gridDim_), + blockIdx(blockIdx_), + threadIdx(threadIdx_) {} +#else KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, const Functor& f_) : m_policy(policy_), m_func(f_) {} +#endif KOKKOS_IMPL_DEVICE_FUNCTION void exec_range() const { - if (PolicyType::inner_direction == PolicyType::Left) { + if (PolicyType::inner_direction == Iterate::Left) { // Loop over size maxnumblocks until full range covered for (index_type tile_id1 = static_cast<index_type>(blockIdx.y); tile_id1 < m_policy.m_tile_end[1]; tile_id1 += gridDim.y) { @@ -158,7 +141,7 @@ struct DeviceIterateTile<2, PolicyType, Functor, Tag> { static_cast<index_type>(m_policy.m_lower[0]); if (offset_0 < m_policy.m_upper[0] && static_cast<index_type>(threadIdx.x) < m_policy.m_tile[0]) { - m_func(Tag(), offset_0, offset_1); + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1); } } } @@ -180,7 +163,7 @@ struct DeviceIterateTile<2, PolicyType, Functor, Tag> { static_cast<index_type>(m_policy.m_lower[1]); if (offset_1 < m_policy.m_upper[1] && static_cast<index_type>(threadIdx.y) < m_policy.m_tile[1]) { - m_func(Tag(), offset_0, offset_1); + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1); } } } @@ -191,107 +174,38 @@ struct DeviceIterateTile<2, PolicyType, Functor, Tag> { private: const PolicyType& m_policy; const Functor& m_func; +#ifdef KOKKOS_ENABLE_SYCL + const EmulateCUDADim3<index_type> gridDim; + const EmulateCUDADim3<index_type> blockIdx; + const EmulateCUDADim3<index_type> threadIdx; +#endif }; // Rank 3 -// Specializations for void tag type -template <typename PolicyType, typename Functor> -struct DeviceIterateTile<3, PolicyType, Functor, void> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_) - : m_policy(policy_), m_func(f_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (index_type tile_id2 = static_cast<index_type>(blockIdx.z); - tile_id2 < m_policy.m_tile_end[2]; tile_id2 += gridDim.z) { - const index_type offset_2 = - tile_id2 * m_policy.m_tile[2] + - static_cast<index_type>(threadIdx.z) + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - static_cast<index_type>(threadIdx.z) < m_policy.m_tile[2]) { - for (index_type tile_id1 = static_cast<index_type>(blockIdx.y); - tile_id1 < m_policy.m_tile_end[1]; tile_id1 += gridDim.y) { - const index_type offset_1 = - tile_id1 * m_policy.m_tile[1] + - static_cast<index_type>(threadIdx.y) + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - static_cast<index_type>(threadIdx.y) < m_policy.m_tile[1]) { - for (index_type tile_id0 = static_cast<index_type>(blockIdx.x); - tile_id0 < m_policy.m_tile_end[0]; tile_id0 += gridDim.x) { - const index_type offset_0 = - tile_id0 * m_policy.m_tile[0] + - static_cast<index_type>(threadIdx.x) + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - static_cast<index_type>(threadIdx.x) < m_policy.m_tile[0]) { - m_func(offset_0, offset_1, offset_2); - } - } - } - } - } - } - } - // LR - else { - for (index_type tile_id0 = static_cast<index_type>(blockIdx.x); - tile_id0 < m_policy.m_tile_end[0]; tile_id0 += gridDim.x) { - const index_type offset_0 = - tile_id0 * m_policy.m_tile[0] + - static_cast<index_type>(threadIdx.x) + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - static_cast<index_type>(threadIdx.x) < m_policy.m_tile[0]) { - for (index_type tile_id1 = static_cast<index_type>(blockIdx.y); - tile_id1 < m_policy.m_tile_end[1]; tile_id1 += gridDim.y) { - const index_type offset_1 = - tile_id1 * m_policy.m_tile[1] + - static_cast<index_type>(threadIdx.y) + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - static_cast<index_type>(threadIdx.y) < m_policy.m_tile[1]) { - for (index_type tile_id2 = static_cast<index_type>(blockIdx.z); - tile_id2 < m_policy.m_tile_end[2]; tile_id2 += gridDim.z) { - const index_type offset_2 = - tile_id2 * m_policy.m_tile[2] + - static_cast<index_type>(threadIdx.z) + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - static_cast<index_type>(threadIdx.z) < m_policy.m_tile[2]) { - m_func(offset_0, offset_1, offset_2); - } - } - } - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; -}; - -// Specializations for void tag type template <typename PolicyType, typename Functor, typename Tag> struct DeviceIterateTile<3, PolicyType, Functor, Tag> { using index_type = typename PolicyType::index_type; +#ifdef KOKKOS_ENABLE_SYCL + KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile( + const PolicyType& policy_, const Functor& f_, + const EmulateCUDADim3<index_type> gridDim_, + const EmulateCUDADim3<index_type> blockIdx_, + const EmulateCUDADim3<index_type> threadIdx_) + : m_policy(policy_), + m_func(f_), + gridDim(gridDim_), + blockIdx(blockIdx_), + threadIdx(threadIdx_) {} +#else KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, const Functor& f_) : m_policy(policy_), m_func(f_) {} +#endif KOKKOS_IMPL_DEVICE_FUNCTION void exec_range() const { - if (PolicyType::inner_direction == PolicyType::Left) { + if (PolicyType::inner_direction == Iterate::Left) { for (index_type tile_id2 = static_cast<index_type>(blockIdx.z); tile_id2 < m_policy.m_tile_end[2]; tile_id2 += gridDim.z) { const index_type offset_2 = @@ -316,7 +230,7 @@ struct DeviceIterateTile<3, PolicyType, Functor, Tag> { static_cast<index_type>(m_policy.m_lower[0]); if (offset_0 < m_policy.m_upper[0] && static_cast<index_type>(threadIdx.x) < m_policy.m_tile[0]) { - m_func(Tag(), offset_0, offset_1, offset_2); + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1, offset_2); } } } @@ -348,7 +262,7 @@ struct DeviceIterateTile<3, PolicyType, Functor, Tag> { static_cast<index_type>(m_policy.m_lower[2]); if (offset_2 < m_policy.m_upper[2] && static_cast<index_type>(threadIdx.z) < m_policy.m_tile[2]) { - m_func(Tag(), offset_0, offset_1, offset_2); + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1, offset_2); } } } @@ -361,30 +275,46 @@ struct DeviceIterateTile<3, PolicyType, Functor, Tag> { private: const PolicyType& m_policy; const Functor& m_func; +#ifdef KOKKOS_ENABLE_SYCL + const EmulateCUDADim3<index_type> gridDim; + const EmulateCUDADim3<index_type> blockIdx; + const EmulateCUDADim3<index_type> threadIdx; +#endif }; // Rank 4 -// Specializations for void tag type -template <typename PolicyType, typename Functor> -struct DeviceIterateTile<4, PolicyType, Functor, void> { +template <typename PolicyType, typename Functor, typename Tag> +struct DeviceIterateTile<4, PolicyType, Functor, Tag> { using index_type = typename PolicyType::index_type; +#ifdef KOKKOS_ENABLE_SYCL + KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile( + const PolicyType& policy_, const Functor& f_, + const EmulateCUDADim3<index_type> gridDim_, + const EmulateCUDADim3<index_type> blockIdx_, + const EmulateCUDADim3<index_type> threadIdx_) + : m_policy(policy_), + m_func(f_), + gridDim(gridDim_), + blockIdx(blockIdx_), + threadIdx(threadIdx_) {} +#else KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, const Functor& f_) : m_policy(policy_), m_func(f_) {} +#endif static constexpr index_type max_blocks = 65535; KOKKOS_IMPL_DEVICE_FUNCTION void exec_range() const { - // LL - if (PolicyType::inner_direction == PolicyType::Left) { + if (PolicyType::inner_direction == Iterate::Left) { const index_type temp0 = m_policy.m_tile_end[0]; const index_type temp1 = m_policy.m_tile_end[1]; const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); const index_type numbl1 = (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl0) + ? static_cast<index_type>(max_blocks / numbl0) : (temp1 <= max_blocks ? temp1 : max_blocks)); const index_type tile_id0 = static_cast<index_type>(blockIdx.x) % numbl0; @@ -424,7 +354,8 @@ struct DeviceIterateTile<4, PolicyType, Functor, void> { static_cast<index_type>(m_policy.m_lower[0]); if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { - m_func(offset_0, offset_1, offset_2, offset_3); + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1, + offset_2, offset_3); } } } @@ -433,9 +364,7 @@ struct DeviceIterateTile<4, PolicyType, Functor, void> { } } } - } - // LR - else { + } else { const index_type temp0 = m_policy.m_tile_end[0]; const index_type temp1 = m_policy.m_tile_end[1]; const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); @@ -482,7 +411,8 @@ struct DeviceIterateTile<4, PolicyType, Functor, void> { if (offset_3 < m_policy.m_upper[3] && static_cast<index_type>(threadIdx.z) < m_policy.m_tile[3]) { - m_func(offset_0, offset_1, offset_2, offset_3); + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1, + offset_2, offset_3); } } } @@ -497,28 +427,47 @@ struct DeviceIterateTile<4, PolicyType, Functor, void> { private: const PolicyType& m_policy; const Functor& m_func; +#ifdef KOKKOS_ENABLE_SYCL + const EmulateCUDADim3<index_type> gridDim; + const EmulateCUDADim3<index_type> blockIdx; + const EmulateCUDADim3<index_type> threadIdx; +#endif }; -// Specializations for void tag type +// Rank 5 template <typename PolicyType, typename Functor, typename Tag> -struct DeviceIterateTile<4, PolicyType, Functor, Tag> { +struct DeviceIterateTile<5, PolicyType, Functor, Tag> { using index_type = typename PolicyType::index_type; +#ifdef KOKKOS_ENABLE_SYCL + KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile( + const PolicyType& policy_, const Functor& f_, + const EmulateCUDADim3<index_type> gridDim_, + const EmulateCUDADim3<index_type> blockIdx_, + const EmulateCUDADim3<index_type> threadIdx_) + : m_policy(policy_), + m_func(f_), + gridDim(gridDim_), + blockIdx(blockIdx_), + threadIdx(threadIdx_) {} +#else KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, const Functor& f_) : m_policy(policy_), m_func(f_) {} +#endif static constexpr index_type max_blocks = 65535; KOKKOS_IMPL_DEVICE_FUNCTION void exec_range() const { - if (PolicyType::inner_direction == PolicyType::Left) { - const index_type temp0 = m_policy.m_tile_end[0]; - const index_type temp1 = m_policy.m_tile_end[1]; + // LL + if (PolicyType::inner_direction == Iterate::Left) { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); const index_type numbl1 = (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl0) + ? index_type(max_blocks / numbl0) : (temp1 <= max_blocks ? temp1 : max_blocks)); const index_type tile_id0 = static_cast<index_type>(blockIdx.x) % numbl0; @@ -528,37 +477,61 @@ struct DeviceIterateTile<4, PolicyType, Functor, Tag> { const index_type thr_id1 = static_cast<index_type>(threadIdx.x) / m_policy.m_tile[0]; - for (index_type tile_id3 = static_cast<index_type>(blockIdx.z); - tile_id3 < m_policy.m_tile_end[3]; tile_id3 += gridDim.z) { - const index_type offset_3 = - tile_id3 * m_policy.m_tile[3] + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl3 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl2) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id2 = static_cast<index_type>(blockIdx.y) % numbl2; + const index_type tile_id3 = static_cast<index_type>(blockIdx.y) / numbl2; + const index_type thr_id2 = + static_cast<index_type>(threadIdx.y) % m_policy.m_tile[2]; + const index_type thr_id3 = + static_cast<index_type>(threadIdx.y) / m_policy.m_tile[2]; + + for (index_type tile_id4 = static_cast<index_type>(blockIdx.z); + tile_id4 < m_policy.m_tile_end[4]; tile_id4 += gridDim.z) { + const index_type offset_4 = + tile_id4 * m_policy.m_tile[4] + static_cast<index_type>(threadIdx.z) + - static_cast<index_type>(m_policy.m_lower[3]); - if (offset_3 < m_policy.m_upper[3] && - static_cast<index_type>(threadIdx.z) < m_policy.m_tile[3]) { - for (index_type tile_id2 = static_cast<index_type>(blockIdx.y); - tile_id2 < m_policy.m_tile_end[2]; tile_id2 += gridDim.y) { - const index_type offset_2 = - tile_id2 * m_policy.m_tile[2] + - static_cast<index_type>(threadIdx.y) + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - static_cast<index_type>(threadIdx.y) < m_policy.m_tile[2]) { - for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; - j += numbl1) { - const index_type offset_1 = - j * m_policy.m_tile[1] + thr_id1 + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - thr_id1 < m_policy.m_tile[1]) { - for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; - i += numbl0) { - const index_type offset_0 = - i * m_policy.m_tile[0] + thr_id0 + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - thr_id0 < m_policy.m_tile[0]) { - m_func(Tag(), offset_0, offset_1, offset_2, offset_3); + static_cast<index_type>(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + static_cast<index_type>(threadIdx.z) < m_policy.m_tile[4]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast<index_type>(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast<index_type>(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast<index_type>(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; + i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast<index_type>(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + thr_id0 < m_policy.m_tile[0]) { + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1, + offset_2, offset_3, offset_4); + } + } } } } @@ -567,13 +540,15 @@ struct DeviceIterateTile<4, PolicyType, Functor, Tag> { } } } - } else { - const index_type temp0 = m_policy.m_tile_end[0]; - const index_type temp1 = m_policy.m_tile_end[1]; - const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl0 = + } + // LR + else { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl0 = (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl1) + ? static_cast<index_type>(max_blocks / numbl1) : (temp0 <= max_blocks ? temp0 : max_blocks)); const index_type tile_id0 = static_cast<index_type>(blockIdx.x) / numbl1; @@ -583,6 +558,21 @@ struct DeviceIterateTile<4, PolicyType, Functor, Tag> { const index_type thr_id1 = static_cast<index_type>(threadIdx.x) % m_policy.m_tile[1]; + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl2 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl3) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id2 = static_cast<index_type>(blockIdx.y) / numbl3; + const index_type tile_id3 = static_cast<index_type>(blockIdx.y) % numbl3; + const index_type thr_id2 = + static_cast<index_type>(threadIdx.y) / m_policy.m_tile[3]; + const index_type thr_id3 = + static_cast<index_type>(threadIdx.y) % m_policy.m_tile[3]; + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { const index_type offset_0 = i * m_policy.m_tile[0] + thr_id0 + @@ -591,30 +581,39 @@ struct DeviceIterateTile<4, PolicyType, Functor, Tag> { for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; j += numbl1) { const index_type offset_1 = - tile_id1 * m_policy.m_tile[1] + thr_id1 + + j * m_policy.m_tile[1] + thr_id1 + static_cast<index_type>(m_policy.m_lower[1]); if (offset_1 < m_policy.m_upper[1] && thr_id1 < m_policy.m_tile[1]) { - for (index_type tile_id2 = static_cast<index_type>(blockIdx.y); - tile_id2 < m_policy.m_tile_end[2]; tile_id2 += gridDim.y) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { const index_type offset_2 = - tile_id2 * m_policy.m_tile[2] + - static_cast<index_type>(threadIdx.y) + + k * m_policy.m_tile[2] + thr_id2 + static_cast<index_type>(m_policy.m_lower[2]); if (offset_2 < m_policy.m_upper[2] && - static_cast<index_type>(threadIdx.y) < m_policy.m_tile[2]) { - for (index_type tile_id3 = - static_cast<index_type>(blockIdx.z); - tile_id3 < m_policy.m_tile_end[3]; - tile_id3 += gridDim.z) { + thr_id2 < m_policy.m_tile[2]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { const index_type offset_3 = - tile_id3 * m_policy.m_tile[3] + - static_cast<index_type>(threadIdx.z) + + l * m_policy.m_tile[3] + thr_id3 + static_cast<index_type>(m_policy.m_lower[3]); if (offset_3 < m_policy.m_upper[3] && - static_cast<index_type>(threadIdx.z) < - m_policy.m_tile[3]) { - m_func(Tag(), offset_0, offset_1, offset_2, offset_3); + thr_id3 < m_policy.m_tile[3]) { + for (index_type tile_id4 = + static_cast<index_type>(blockIdx.z); + tile_id4 < m_policy.m_tile_end[4]; + tile_id4 += gridDim.z) { + const index_type offset_4 = + tile_id4 * m_policy.m_tile[4] + + static_cast<index_type>(threadIdx.z) + + static_cast<index_type>(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + static_cast<index_type>(threadIdx.z) < + m_policy.m_tile[4]) { + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1, + offset_2, offset_3, offset_4); + } + } } } } @@ -629,30 +628,47 @@ struct DeviceIterateTile<4, PolicyType, Functor, Tag> { private: const PolicyType& m_policy; const Functor& m_func; +#ifdef KOKKOS_ENABLE_SYCL + const EmulateCUDADim3<index_type> gridDim; + const EmulateCUDADim3<index_type> blockIdx; + const EmulateCUDADim3<index_type> threadIdx; +#endif }; -// Rank 5 -// Specializations for void tag type -template <typename PolicyType, typename Functor> -struct DeviceIterateTile<5, PolicyType, Functor, void> { +// Rank 6 +template <typename PolicyType, typename Functor, typename Tag> +struct DeviceIterateTile<6, PolicyType, Functor, Tag> { using index_type = typename PolicyType::index_type; +#ifdef KOKKOS_ENABLE_SYCL + KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile( + const PolicyType& policy_, const Functor& f_, + const EmulateCUDADim3<index_type> gridDim_, + const EmulateCUDADim3<index_type> blockIdx_, + const EmulateCUDADim3<index_type> threadIdx_) + : m_policy(policy_), + m_func(f_), + gridDim(gridDim_), + blockIdx(blockIdx_), + threadIdx(threadIdx_) {} +#else KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, const Functor& f_) : m_policy(policy_), m_func(f_) {} +#endif static constexpr index_type max_blocks = 65535; KOKKOS_IMPL_DEVICE_FUNCTION void exec_range() const { // LL - if (PolicyType::inner_direction == PolicyType::Left) { + if (PolicyType::inner_direction == Iterate::Left) { index_type temp0 = m_policy.m_tile_end[0]; index_type temp1 = m_policy.m_tile_end[1]; const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); const index_type numbl1 = (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl0) + ? static_cast<index_type>(max_blocks / numbl0) : (temp1 <= max_blocks ? temp1 : max_blocks)); const index_type tile_id0 = static_cast<index_type>(blockIdx.x) % numbl0; @@ -667,7 +683,7 @@ struct DeviceIterateTile<5, PolicyType, Functor, void> { const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); const index_type numbl3 = (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl2) + ? static_cast<index_type>(max_blocks / numbl2) : (temp1 <= max_blocks ? temp1 : max_blocks)); const index_type tile_id2 = static_cast<index_type>(blockIdx.y) % numbl2; @@ -677,44 +693,66 @@ struct DeviceIterateTile<5, PolicyType, Functor, void> { const index_type thr_id3 = static_cast<index_type>(threadIdx.y) / m_policy.m_tile[2]; - for (index_type tile_id4 = static_cast<index_type>(blockIdx.z); - tile_id4 < m_policy.m_tile_end[4]; tile_id4 += gridDim.z) { - const index_type offset_4 = - tile_id4 * m_policy.m_tile[4] + - static_cast<index_type>(threadIdx.z) + - static_cast<index_type>(m_policy.m_lower[4]); - if (offset_4 < m_policy.m_upper[4] && - static_cast<index_type>(threadIdx.z) < m_policy.m_tile[4]) { - for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; - l += numbl3) { - const index_type offset_3 = - l * m_policy.m_tile[3] + thr_id3 + - static_cast<index_type>(m_policy.m_lower[3]); - if (offset_3 < m_policy.m_upper[3] && - thr_id3 < m_policy.m_tile[3]) { - for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; - k += numbl2) { - const index_type offset_2 = - k * m_policy.m_tile[2] + thr_id2 + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - thr_id2 < m_policy.m_tile[2]) { - for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; - j += numbl1) { - const index_type offset_1 = - j * m_policy.m_tile[1] + thr_id1 + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - thr_id1 < m_policy.m_tile[1]) { - for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; - i += numbl0) { - const index_type offset_0 = - i * m_policy.m_tile[0] + thr_id0 + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - thr_id0 < m_policy.m_tile[0]) { - m_func(offset_0, offset_1, offset_2, offset_3, - offset_4); + temp0 = m_policy.m_tile_end[4]; + temp1 = m_policy.m_tile_end[5]; + const index_type numbl4 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl5 = + (temp0 * temp1 > max_blocks + ? static_cast<index_type>(max_blocks / numbl4) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id4 = static_cast<index_type>(blockIdx.z) % numbl4; + const index_type tile_id5 = static_cast<index_type>(blockIdx.z) / numbl4; + const index_type thr_id4 = + static_cast<index_type>(threadIdx.z) % m_policy.m_tile[4]; + const index_type thr_id5 = + static_cast<index_type>(threadIdx.z) / m_policy.m_tile[4]; + + for (index_type n = tile_id5; n < m_policy.m_tile_end[5]; n += numbl5) { + const index_type offset_5 = + n * m_policy.m_tile[5] + thr_id5 + + static_cast<index_type>(m_policy.m_lower[5]); + if (offset_5 < m_policy.m_upper[5] && thr_id5 < m_policy.m_tile[5]) { + for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; + m += numbl4) { + const index_type offset_4 = + m * m_policy.m_tile[4] + thr_id4 + + static_cast<index_type>(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + thr_id4 < m_policy.m_tile[4]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast<index_type>(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast<index_type>(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast<index_type>(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type i = tile_id0; + i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast<index_type>(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + thr_id0 < m_policy.m_tile[0]) { + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1, + offset_2, offset_3, + offset_4, offset_5); + } + } } } } @@ -733,7 +771,7 @@ struct DeviceIterateTile<5, PolicyType, Functor, void> { const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); const index_type numbl0 = (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl1) + ? static_cast<index_type>(max_blocks / numbl1) : (temp0 <= max_blocks ? temp0 : max_blocks)); const index_type tile_id0 = static_cast<index_type>(blockIdx.x) / numbl1; @@ -748,7 +786,7 @@ struct DeviceIterateTile<5, PolicyType, Functor, void> { const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); const index_type numbl2 = (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl3) + ? static_cast<index_type>(max_blocks / numbl3) : (temp0 <= max_blocks ? temp0 : max_blocks)); const index_type tile_id2 = static_cast<index_type>(blockIdx.y) / numbl3; @@ -758,6 +796,21 @@ struct DeviceIterateTile<5, PolicyType, Functor, void> { const index_type thr_id3 = static_cast<index_type>(threadIdx.y) % m_policy.m_tile[3]; + temp0 = m_policy.m_tile_end[4]; + temp1 = m_policy.m_tile_end[5]; + const index_type numbl5 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl4 = + (temp0 * temp1 > max_blocks + ? static_cast<index_type>(max_blocks / numbl5) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id4 = static_cast<index_type>(blockIdx.z) / numbl5; + const index_type tile_id5 = static_cast<index_type>(blockIdx.z) % numbl5; + const index_type thr_id4 = + static_cast<index_type>(threadIdx.z) / m_policy.m_tile[5]; + const index_type thr_id5 = + static_cast<index_type>(threadIdx.z) % m_policy.m_tile[5]; + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { const index_type offset_0 = i * m_policy.m_tile[0] + thr_id0 + @@ -784,19 +837,25 @@ struct DeviceIterateTile<5, PolicyType, Functor, void> { static_cast<index_type>(m_policy.m_lower[3]); if (offset_3 < m_policy.m_upper[3] && thr_id3 < m_policy.m_tile[3]) { - for (index_type tile_id4 = - static_cast<index_type>(blockIdx.z); - tile_id4 < m_policy.m_tile_end[4]; - tile_id4 += gridDim.z) { + for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; + m += numbl4) { const index_type offset_4 = - tile_id4 * m_policy.m_tile[4] + - static_cast<index_type>(threadIdx.z) + + m * m_policy.m_tile[4] + thr_id4 + static_cast<index_type>(m_policy.m_lower[4]); if (offset_4 < m_policy.m_upper[4] && - static_cast<index_type>(threadIdx.z) < - m_policy.m_tile[4]) { - m_func(offset_0, offset_1, offset_2, offset_3, - offset_4); + thr_id4 < m_policy.m_tile[4]) { + for (index_type n = tile_id5; + n < m_policy.m_tile_end[5]; n += numbl5) { + const index_type offset_5 = + n * m_policy.m_tile[5] + thr_id5 + + static_cast<index_type>(m_policy.m_lower[5]); + if (offset_5 < m_policy.m_upper[5] && + thr_id5 < m_policy.m_tile[5]) { + Impl::_tag_invoke<Tag>(m_func, offset_0, offset_1, + offset_2, offset_3, + offset_4, offset_5); + } + } } } } @@ -813,2361 +872,90 @@ struct DeviceIterateTile<5, PolicyType, Functor, void> { private: const PolicyType& m_policy; const Functor& m_func; +#ifdef KOKKOS_ENABLE_SYCL + const EmulateCUDADim3<index_type> gridDim; + const EmulateCUDADim3<index_type> blockIdx; + const EmulateCUDADim3<index_type> threadIdx; +#endif }; -// Specializations for tag type -template <typename PolicyType, typename Functor, typename Tag> -struct DeviceIterateTile<5, PolicyType, Functor, Tag> { - using index_type = typename PolicyType::index_type; +// ---------------------------------------------------------------------------------- - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_) - : m_policy(policy_), m_func(f_) {} +namespace Reduce { - static constexpr index_type max_blocks = 65535; +template <typename T> +using is_void = std::is_same<T, void>; - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - index_type temp0 = m_policy.m_tile_end[0]; - index_type temp1 = m_policy.m_tile_end[1]; - const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); - const index_type numbl1 = - (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl0) - : (temp1 <= max_blocks ? temp1 : max_blocks)); +template <typename T> +struct is_array_type : std::false_type { + using value_type = T; +}; - const index_type tile_id0 = static_cast<index_type>(blockIdx.x) % numbl0; - const index_type tile_id1 = static_cast<index_type>(blockIdx.x) / numbl0; - const index_type thr_id0 = - static_cast<index_type>(threadIdx.x) % m_policy.m_tile[0]; - const index_type thr_id1 = - static_cast<index_type>(threadIdx.x) / m_policy.m_tile[0]; +template <typename T> +struct is_array_type<T*> : std::true_type { + using value_type = T; +}; - temp0 = m_policy.m_tile_end[2]; - temp1 = m_policy.m_tile_end[3]; - const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); - const index_type numbl3 = - (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl2) - : (temp1 <= max_blocks ? temp1 : max_blocks)); +template <typename T> +struct is_array_type<T[]> : std::true_type { + using value_type = T; +}; - const index_type tile_id2 = static_cast<index_type>(blockIdx.y) % numbl2; - const index_type tile_id3 = static_cast<index_type>(blockIdx.y) / numbl2; - const index_type thr_id2 = - static_cast<index_type>(threadIdx.y) % m_policy.m_tile[2]; - const index_type thr_id3 = - static_cast<index_type>(threadIdx.y) / m_policy.m_tile[2]; +// ------------------------------------------------------------------ // - for (index_type tile_id4 = static_cast<index_type>(blockIdx.z); - tile_id4 < m_policy.m_tile_end[4]; tile_id4 += gridDim.z) { - const index_type offset_4 = - tile_id4 * m_policy.m_tile[4] + - static_cast<index_type>(threadIdx.z) + - static_cast<index_type>(m_policy.m_lower[4]); - if (offset_4 < m_policy.m_upper[4] && - static_cast<index_type>(threadIdx.z) < m_policy.m_tile[4]) { - for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; - l += numbl3) { - const index_type offset_3 = - l * m_policy.m_tile[3] + thr_id3 + - static_cast<index_type>(m_policy.m_lower[3]); - if (offset_3 < m_policy.m_upper[3] && - thr_id3 < m_policy.m_tile[3]) { - for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; - k += numbl2) { - const index_type offset_2 = - k * m_policy.m_tile[2] + thr_id2 + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - thr_id2 < m_policy.m_tile[2]) { - for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; - j += numbl1) { - const index_type offset_1 = - j * m_policy.m_tile[1] + thr_id1 + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - thr_id1 < m_policy.m_tile[1]) { - for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; - i += numbl0) { - const index_type offset_0 = - i * m_policy.m_tile[0] + thr_id0 + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - thr_id0 < m_policy.m_tile[0]) { - m_func(Tag(), offset_0, offset_1, offset_2, offset_3, - offset_4); - } - } - } - } - } - } - } - } - } - } - } - // LR - else { - index_type temp0 = m_policy.m_tile_end[0]; - index_type temp1 = m_policy.m_tile_end[1]; - const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl0 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl1) - : (temp0 <= max_blocks ? temp0 : max_blocks)); - - const index_type tile_id0 = static_cast<index_type>(blockIdx.x) / numbl1; - const index_type tile_id1 = static_cast<index_type>(blockIdx.x) % numbl1; - const index_type thr_id0 = - static_cast<index_type>(threadIdx.x) / m_policy.m_tile[1]; - const index_type thr_id1 = - static_cast<index_type>(threadIdx.x) % m_policy.m_tile[1]; - - temp0 = m_policy.m_tile_end[2]; - temp1 = m_policy.m_tile_end[3]; - const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl2 = - (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl3) - : (temp0 <= max_blocks ? temp0 : max_blocks)); - - const index_type tile_id2 = static_cast<index_type>(blockIdx.y) / numbl3; - const index_type tile_id3 = static_cast<index_type>(blockIdx.y) % numbl3; - const index_type thr_id2 = - static_cast<index_type>(threadIdx.y) / m_policy.m_tile[3]; - const index_type thr_id3 = - static_cast<index_type>(threadIdx.y) % m_policy.m_tile[3]; - - for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { - const index_type offset_0 = - i * m_policy.m_tile[0] + thr_id0 + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { - for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; - j += numbl1) { - const index_type offset_1 = - j * m_policy.m_tile[1] + thr_id1 + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - thr_id1 < m_policy.m_tile[1]) { - for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; - k += numbl2) { - const index_type offset_2 = - k * m_policy.m_tile[2] + thr_id2 + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - thr_id2 < m_policy.m_tile[2]) { - for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; - l += numbl3) { - const index_type offset_3 = - l * m_policy.m_tile[3] + thr_id3 + - static_cast<index_type>(m_policy.m_lower[3]); - if (offset_3 < m_policy.m_upper[3] && - thr_id3 < m_policy.m_tile[3]) { - for (index_type tile_id4 = - static_cast<index_type>(blockIdx.z); - tile_id4 < m_policy.m_tile_end[4]; - tile_id4 += gridDim.z) { - const index_type offset_4 = - tile_id4 * m_policy.m_tile[4] + - static_cast<index_type>(threadIdx.z) + - static_cast<index_type>(m_policy.m_lower[4]); - if (offset_4 < m_policy.m_upper[4] && - static_cast<index_type>(threadIdx.z) < - m_policy.m_tile[4]) { - m_func(Tag(), offset_0, offset_1, offset_2, offset_3, - offset_4); - } - } - } - } - } - } - } - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; -}; - -// Rank 6 -// Specializations for void tag type -template <typename PolicyType, typename Functor> -struct DeviceIterateTile<6, PolicyType, Functor, void> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_) - : m_policy(policy_), m_func(f_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - index_type temp0 = m_policy.m_tile_end[0]; - index_type temp1 = m_policy.m_tile_end[1]; - const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); - const index_type numbl1 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl0) - : (temp1 <= max_blocks ? temp1 : max_blocks)); - - const index_type tile_id0 = static_cast<index_type>(blockIdx.x) % numbl0; - const index_type tile_id1 = static_cast<index_type>(blockIdx.x) / numbl0; - const index_type thr_id0 = - static_cast<index_type>(threadIdx.x) % m_policy.m_tile[0]; - const index_type thr_id1 = - static_cast<index_type>(threadIdx.x) / m_policy.m_tile[0]; - - temp0 = m_policy.m_tile_end[2]; - temp1 = m_policy.m_tile_end[3]; - const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); - const index_type numbl3 = - (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl2) - : (temp1 <= max_blocks ? temp1 : max_blocks)); - - const index_type tile_id2 = static_cast<index_type>(blockIdx.y) % numbl2; - const index_type tile_id3 = static_cast<index_type>(blockIdx.y) / numbl2; - const index_type thr_id2 = - static_cast<index_type>(threadIdx.y) % m_policy.m_tile[2]; - const index_type thr_id3 = - static_cast<index_type>(threadIdx.y) / m_policy.m_tile[2]; - - temp0 = m_policy.m_tile_end[4]; - temp1 = m_policy.m_tile_end[5]; - const index_type numbl4 = (temp0 <= max_blocks ? temp0 : max_blocks); - const index_type numbl5 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl4) - : (temp1 <= max_blocks ? temp1 : max_blocks)); - - const index_type tile_id4 = static_cast<index_type>(blockIdx.z) % numbl4; - const index_type tile_id5 = static_cast<index_type>(blockIdx.z) / numbl4; - const index_type thr_id4 = - static_cast<index_type>(threadIdx.z) % m_policy.m_tile[4]; - const index_type thr_id5 = - static_cast<index_type>(threadIdx.z) / m_policy.m_tile[4]; - - for (index_type n = tile_id5; n < m_policy.m_tile_end[5]; n += numbl5) { - const index_type offset_5 = - n * m_policy.m_tile[5] + thr_id5 + - static_cast<index_type>(m_policy.m_lower[5]); - if (offset_5 < m_policy.m_upper[5] && thr_id5 < m_policy.m_tile[5]) { - for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; - m += numbl4) { - const index_type offset_4 = - m * m_policy.m_tile[4] + thr_id4 + - static_cast<index_type>(m_policy.m_lower[4]); - if (offset_4 < m_policy.m_upper[4] && - thr_id4 < m_policy.m_tile[4]) { - for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; - l += numbl3) { - const index_type offset_3 = - l * m_policy.m_tile[3] + thr_id3 + - static_cast<index_type>(m_policy.m_lower[3]); - if (offset_3 < m_policy.m_upper[3] && - thr_id3 < m_policy.m_tile[3]) { - for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; - k += numbl2) { - const index_type offset_2 = - k * m_policy.m_tile[2] + thr_id2 + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - thr_id2 < m_policy.m_tile[2]) { - for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; - j += numbl1) { - const index_type offset_1 = - j * m_policy.m_tile[1] + thr_id1 + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - thr_id1 < m_policy.m_tile[1]) { - for (index_type i = tile_id0; - i < m_policy.m_tile_end[0]; i += numbl0) { - const index_type offset_0 = - i * m_policy.m_tile[0] + thr_id0 + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - thr_id0 < m_policy.m_tile[0]) { - m_func(offset_0, offset_1, offset_2, offset_3, - offset_4, offset_5); - } - } - } - } - } - } - } - } - } - } - } - } - } - // LR - else { - index_type temp0 = m_policy.m_tile_end[0]; - index_type temp1 = m_policy.m_tile_end[1]; - const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl0 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl1) - : (temp0 <= max_blocks ? temp0 : max_blocks)); - - const index_type tile_id0 = static_cast<index_type>(blockIdx.x) / numbl1; - const index_type tile_id1 = static_cast<index_type>(blockIdx.x) % numbl1; - const index_type thr_id0 = - static_cast<index_type>(threadIdx.x) / m_policy.m_tile[1]; - const index_type thr_id1 = - static_cast<index_type>(threadIdx.x) % m_policy.m_tile[1]; - - temp0 = m_policy.m_tile_end[2]; - temp1 = m_policy.m_tile_end[3]; - const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl2 = - (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl3) - : (temp0 <= max_blocks ? temp0 : max_blocks)); - - const index_type tile_id2 = static_cast<index_type>(blockIdx.y) / numbl3; - const index_type tile_id3 = static_cast<index_type>(blockIdx.y) % numbl3; - const index_type thr_id2 = - static_cast<index_type>(threadIdx.y) / m_policy.m_tile[3]; - const index_type thr_id3 = - static_cast<index_type>(threadIdx.y) % m_policy.m_tile[3]; - - temp0 = m_policy.m_tile_end[4]; - temp1 = m_policy.m_tile_end[5]; - const index_type numbl5 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl4 = - (temp0 * temp1 > max_blocks - ? index_type(max_blocks / numbl5) - : (temp0 <= max_blocks ? temp0 : max_blocks)); - - const index_type tile_id4 = static_cast<index_type>(blockIdx.z) / numbl5; - const index_type tile_id5 = static_cast<index_type>(blockIdx.z) % numbl5; - const index_type thr_id4 = - static_cast<index_type>(threadIdx.z) / m_policy.m_tile[5]; - const index_type thr_id5 = - static_cast<index_type>(threadIdx.z) % m_policy.m_tile[5]; - - for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { - const index_type offset_0 = - i * m_policy.m_tile[0] + thr_id0 + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { - for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; - j += numbl1) { - const index_type offset_1 = - j * m_policy.m_tile[1] + thr_id1 + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - thr_id1 < m_policy.m_tile[1]) { - for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; - k += numbl2) { - const index_type offset_2 = - k * m_policy.m_tile[2] + thr_id2 + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - thr_id2 < m_policy.m_tile[2]) { - for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; - l += numbl3) { - const index_type offset_3 = - l * m_policy.m_tile[3] + thr_id3 + - static_cast<index_type>(m_policy.m_lower[3]); - if (offset_3 < m_policy.m_upper[3] && - thr_id3 < m_policy.m_tile[3]) { - for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; - m += numbl4) { - const index_type offset_4 = - m * m_policy.m_tile[4] + thr_id4 + - static_cast<index_type>(m_policy.m_lower[4]); - if (offset_4 < m_policy.m_upper[4] && - thr_id4 < m_policy.m_tile[4]) { - for (index_type n = tile_id5; - n < m_policy.m_tile_end[5]; n += numbl5) { - const index_type offset_5 = - n * m_policy.m_tile[5] + thr_id5 + - static_cast<index_type>(m_policy.m_lower[5]); - if (offset_5 < m_policy.m_upper[5] && - thr_id5 < m_policy.m_tile[5]) { - m_func(offset_0, offset_1, offset_2, offset_3, - offset_4, offset_5); - } - } - } - } - } - } - } - } - } - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; -}; - -// Specializations for tag type -template <typename PolicyType, typename Functor, typename Tag> -struct DeviceIterateTile<6, PolicyType, Functor, Tag> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_) - : m_policy(policy_), m_func(f_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - index_type temp0 = m_policy.m_tile_end[0]; - index_type temp1 = m_policy.m_tile_end[1]; - const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); - const index_type numbl1 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl0) - : (temp1 <= max_blocks ? temp1 : max_blocks)); - - const index_type tile_id0 = static_cast<index_type>(blockIdx.x) % numbl0; - const index_type tile_id1 = static_cast<index_type>(blockIdx.x) / numbl0; - const index_type thr_id0 = - static_cast<index_type>(threadIdx.x) % m_policy.m_tile[0]; - const index_type thr_id1 = - static_cast<index_type>(threadIdx.x) / m_policy.m_tile[0]; - - temp0 = m_policy.m_tile_end[2]; - temp1 = m_policy.m_tile_end[3]; - const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); - const index_type numbl3 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl2) - : (temp1 <= max_blocks ? temp1 : max_blocks)); - - const index_type tile_id2 = static_cast<index_type>(blockIdx.y) % numbl2; - const index_type tile_id3 = static_cast<index_type>(blockIdx.y) / numbl2; - const index_type thr_id2 = - static_cast<index_type>(threadIdx.y) % m_policy.m_tile[2]; - const index_type thr_id3 = - static_cast<index_type>(threadIdx.y) / m_policy.m_tile[2]; - - temp0 = m_policy.m_tile_end[4]; - temp1 = m_policy.m_tile_end[5]; - const index_type numbl4 = (temp0 <= max_blocks ? temp0 : max_blocks); - const index_type numbl5 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl4) - : (temp1 <= max_blocks ? temp1 : max_blocks)); - - const index_type tile_id4 = static_cast<index_type>(blockIdx.z) % numbl4; - const index_type tile_id5 = static_cast<index_type>(blockIdx.z) / numbl4; - const index_type thr_id4 = - static_cast<index_type>(threadIdx.z) % m_policy.m_tile[4]; - const index_type thr_id5 = - static_cast<index_type>(threadIdx.z) / m_policy.m_tile[4]; - - for (index_type n = tile_id5; n < m_policy.m_tile_end[5]; n += numbl5) { - const index_type offset_5 = - n * m_policy.m_tile[5] + thr_id5 + - static_cast<index_type>(m_policy.m_lower[5]); - if (offset_5 < m_policy.m_upper[5] && thr_id5 < m_policy.m_tile[5]) { - for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; - m += numbl4) { - const index_type offset_4 = - m * m_policy.m_tile[4] + thr_id4 + - static_cast<index_type>(m_policy.m_lower[4]); - if (offset_4 < m_policy.m_upper[4] && - thr_id4 < m_policy.m_tile[4]) { - for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; - l += numbl3) { - const index_type offset_3 = - l * m_policy.m_tile[3] + thr_id3 + - static_cast<index_type>(m_policy.m_lower[3]); - if (offset_3 < m_policy.m_upper[3] && - thr_id3 < m_policy.m_tile[3]) { - for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; - k += numbl2) { - const index_type offset_2 = - k * m_policy.m_tile[2] + thr_id2 + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - thr_id2 < m_policy.m_tile[2]) { - for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; - j += numbl1) { - const index_type offset_1 = - j * m_policy.m_tile[1] + thr_id1 + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - thr_id1 < m_policy.m_tile[1]) { - for (index_type i = tile_id0; - i < m_policy.m_tile_end[0]; i += numbl0) { - const index_type offset_0 = - i * m_policy.m_tile[0] + thr_id0 + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && - thr_id0 < m_policy.m_tile[0]) { - m_func(Tag(), offset_0, offset_1, offset_2, - offset_3, offset_4, offset_5); - } - } - } - } - } - } - } - } - } - } - } - } - } - // LR - else { - index_type temp0 = m_policy.m_tile_end[0]; - index_type temp1 = m_policy.m_tile_end[1]; - const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl0 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl1) - : (temp0 <= max_blocks ? temp0 : max_blocks)); - - const index_type tile_id0 = static_cast<index_type>(blockIdx.x) / numbl1; - const index_type tile_id1 = static_cast<index_type>(blockIdx.x) % numbl1; - const index_type thr_id0 = - static_cast<index_type>(threadIdx.x) / m_policy.m_tile[1]; - const index_type thr_id1 = - static_cast<index_type>(threadIdx.x) % m_policy.m_tile[1]; - - temp0 = m_policy.m_tile_end[2]; - temp1 = m_policy.m_tile_end[3]; - const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl2 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl3) - : (temp0 <= max_blocks ? temp0 : max_blocks)); - - const index_type tile_id2 = static_cast<index_type>(blockIdx.y) / numbl3; - const index_type tile_id3 = static_cast<index_type>(blockIdx.y) % numbl3; - const index_type thr_id2 = - static_cast<index_type>(threadIdx.y) / m_policy.m_tile[3]; - const index_type thr_id3 = - static_cast<index_type>(threadIdx.y) % m_policy.m_tile[3]; - - temp0 = m_policy.m_tile_end[4]; - temp1 = m_policy.m_tile_end[5]; - const index_type numbl5 = (temp1 <= max_blocks ? temp1 : max_blocks); - const index_type numbl4 = - (temp0 * temp1 > max_blocks - ? static_cast<index_type>(max_blocks / numbl5) - : (temp0 <= max_blocks ? temp0 : max_blocks)); - - const index_type tile_id4 = static_cast<index_type>(blockIdx.z) / numbl5; - const index_type tile_id5 = static_cast<index_type>(blockIdx.z) % numbl5; - const index_type thr_id4 = - static_cast<index_type>(threadIdx.z) / m_policy.m_tile[5]; - const index_type thr_id5 = - static_cast<index_type>(threadIdx.z) % m_policy.m_tile[5]; - - for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { - const index_type offset_0 = - i * m_policy.m_tile[0] + thr_id0 + - static_cast<index_type>(m_policy.m_lower[0]); - if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { - for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; - j += numbl1) { - const index_type offset_1 = - j * m_policy.m_tile[1] + thr_id1 + - static_cast<index_type>(m_policy.m_lower[1]); - if (offset_1 < m_policy.m_upper[1] && - thr_id1 < m_policy.m_tile[1]) { - for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; - k += numbl2) { - const index_type offset_2 = - k * m_policy.m_tile[2] + thr_id2 + - static_cast<index_type>(m_policy.m_lower[2]); - if (offset_2 < m_policy.m_upper[2] && - thr_id2 < m_policy.m_tile[2]) { - for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; - l += numbl3) { - const index_type offset_3 = - l * m_policy.m_tile[3] + thr_id3 + - static_cast<index_type>(m_policy.m_lower[3]); - if (offset_3 < m_policy.m_upper[3] && - thr_id3 < m_policy.m_tile[3]) { - for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; - m += numbl4) { - const index_type offset_4 = - m * m_policy.m_tile[4] + thr_id4 + - static_cast<index_type>(m_policy.m_lower[4]); - if (offset_4 < m_policy.m_upper[4] && - thr_id4 < m_policy.m_tile[4]) { - for (index_type n = tile_id5; - n < m_policy.m_tile_end[5]; n += numbl5) { - const index_type offset_5 = - n * m_policy.m_tile[5] + thr_id5 + - static_cast<index_type>(m_policy.m_lower[5]); - if (offset_5 < m_policy.m_upper[5] && - thr_id5 < m_policy.m_tile[5]) { - m_func(Tag(), offset_0, offset_1, offset_2, - offset_3, offset_4, offset_5); - } - } - } - } - } - } - } - } - } - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; -}; - -// ---------------------------------------------------------------------------------- - -namespace Reduce { - -template <typename T> -using is_void = std::is_same<T, void>; - -template <typename T> -struct is_array_type : std::false_type { - using value_type = T; -}; - -template <typename T> -struct is_array_type<T*> : std::true_type { - using value_type = T; -}; - -template <typename T> -struct is_array_type<T[]> : std::true_type { - using value_type = T; -}; - -// ------------------------------------------------------------------ // -template <int N, typename PolicyType, typename Functor, typename Tag, - typename ValueType, typename Enable = void> -struct DeviceIterateTile; - -// ParallelReduce iteration pattern -// Scalar reductions - -// num_blocks = min( num_tiles, max_num_blocks ); //i.e. determined by number of -// tiles and reduction algorithm constraints extract n-dim tile offsets (i.e. -// tile's global starting mulit-index) from the tileid = blockid using tile -// dimensions local indices within a tile extracted from (index_type)threadIdx_x -// using tile dims, constrained by blocksize combine tile and local id info for -// multi-dim global ids - -// Pattern: -// Each block+thread is responsible for a tile+local_id combo (additional when -// striding by num_blocks) -// 1. create offset arrays -// 2. loop over number of tiles, striding by griddim (equal to num tiles, or max -// num blocks) -// 3. temps set for tile_idx and thrd_idx, which will be modified -// 4. if LL vs LR: -// determine tile starting point offsets (multidim) -// determine local index offsets (multidim) -// concatentate tile offset + local offset for global multi-dim index -// if offset withinin range bounds AND local offset within tile bounds, call -// functor - -// ValueType = T -// Rank 2 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 2, PolicyType, Functor, void, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - // Deduce this blocks tile_id - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_v); - } - } - } - } - - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Specializations for tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 2, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = - (thrd_idx % - m_policy.m_tile[i]); // Move this to first computation, - // add to m_offset right away - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Rank 3 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 3, PolicyType, Functor, void, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = - (thrd_idx % - m_policy.m_tile[i]); // Move this to first computation, - // add to m_offset right away - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 3, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = - (thrd_idx % - m_policy.m_tile[i]); // Move this to first computation, - // add to m_offset right away - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Rank 4 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 4, PolicyType, Functor, void, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 4, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Rank 5 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 5, PolicyType, Functor, void, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Specializations for tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 5, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Rank 6 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 6, PolicyType, Functor, void, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_offset[5], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_offset[5], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// Specializations for tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 6, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<!is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - ValueType& v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_offset[5], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_offset[5], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - ValueType& m_v; -}; - -// ValueType = T[], T* -// Rank 2 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 2, PolicyType, Functor, void, ValueType, - typename std::enable_if<is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = - (thrd_idx % - m_policy.m_tile[i]); // Move this to first computation, - // add to m_offset right away - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; - -// Specializations for tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 2, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_v); - } - } - } // end for loop over num_tiles - product of tiles in each direction - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; - -// Rank 3 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 3, PolicyType, Functor, void, ValueType, - typename std::enable_if<is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = - (thrd_idx % - m_policy.m_tile[i]); // Move this to first computation, - // add to m_offset right away - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = - (thrd_idx % - m_policy.m_tile[i]); // Move this to first computation, - // add to m_offset right away - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; - -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 3, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION - DeviceIterateTile(const PolicyType& policy_, const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; - -// Rank 4 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 4, PolicyType, Functor, void, ValueType, - typename std::enable_if<is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; - -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 4, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with (index_type)threadIdx_y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; - -// Rank 5 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 5, PolicyType, Functor, void, ValueType, - typename std::enable_if<is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; - -// Specializations for tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 5, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_v); - } - } - } - } - } // end exec_range - - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; - -// Rank 6 -// Specializations for void tag type -template <typename PolicyType, typename Functor, typename ValueType> -struct DeviceIterateTile< - 6, PolicyType, Functor, void, ValueType, - typename std::enable_if<is_array_type<ValueType>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; - - KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, - const Functor& f_, - value_type* v_) - : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; - - KOKKOS_IMPL_DEVICE_FUNCTION - void exec_range() const { - if (static_cast<index_type>(blockIdx.x) < m_policy.m_num_tiles && - static_cast<index_type>(threadIdx.y) < m_policy.m_prod_tile_dims) { - index_type m_offset[PolicyType::rank]; // tile starting global id offset - index_type - m_local_offset[PolicyType::rank]; // tile starting global id offset - - for (index_type tileidx = static_cast<index_type>(blockIdx.x); - tileidx < m_policy.m_num_tiles; tileidx += gridDim.x) { - index_type tile_idx = - tileidx; // temp because tile_idx will be modified while - // determining tile starting point offsets - index_type thrd_idx = static_cast<index_type>(threadIdx.y); - bool in_bounds = true; - - // LL - if (PolicyType::inner_direction == PolicyType::Left) { - for (int i = 0; i < PolicyType::rank; ++i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; - - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_offset[5], m_v); - } - } - // LR - else { - for (int i = PolicyType::rank - 1; i >= 0; --i) { - m_offset[i] = - (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + - m_policy.m_lower[i]; - tile_idx /= m_policy.m_tile_end[i]; - - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); - thrd_idx /= m_policy.m_tile[i]; +template <typename T> +using value_type_storage_t = + typename std::conditional_t<is_array_type<T>::value, std::decay<T>, + std::add_lvalue_reference<T> >::type; - m_offset[i] += m_local_offset[i]; - if (!(m_offset[i] < m_policy.m_upper[i] && - m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; - } - } - if (in_bounds) { - m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_offset[5], m_v); - } - } - } - } - } // end exec_range +// ParallelReduce iteration pattern +// Scalar reductions - private: - const PolicyType& m_policy; - const Functor& m_func; - value_type* m_v; -}; +// num_blocks = min( num_tiles, max_num_blocks ); //i.e. determined by number of +// tiles and reduction algorithm constraints extract n-dim tile offsets (i.e. +// tile's global starting mulit-index) from the tileid = blockid using tile +// dimensions local indices within a tile extracted from (index_type)threadIdx_x +// using tile dims, constrained by blocksize combine tile and local id info for +// multi-dim global ids -// Specializations for tag type -template <typename PolicyType, typename Functor, typename Tag, - typename ValueType> -struct DeviceIterateTile< - 6, PolicyType, Functor, Tag, ValueType, - typename std::enable_if<is_array_type<ValueType>::value && - !is_void<Tag>::value>::type> { - using index_type = typename PolicyType::index_type; - using value_type = typename is_array_type<ValueType>::value_type; +// Pattern: +// Each block+thread is responsible for a tile+local_id combo (additional when +// striding by num_blocks) +// 1. create offset arrays +// 2. loop over number of tiles, striding by griddim (equal to num tiles, or max +// num blocks) +// 3. temps set for tile_idx and thrd_idx, which will be modified +// 4. if LL vs LR: +// determine tile starting point offsets (multidim) +// determine local index offsets (multidim) +// concatentate tile offset + local offset for global multi-dim index +// if offset withinin range bounds AND local offset within tile bounds, call +// functor +template <int N, typename PolicyType, typename Functor, typename Tag, + typename ValueType, typename Enable = void> +struct DeviceIterateTile { + using index_type = typename PolicyType::index_type; + using value_type_storage = value_type_storage_t<ValueType>; + +#ifdef KOKKOS_ENABLE_SYCL + KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile( + const PolicyType& policy_, const Functor& f_, value_type_storage v_, + const EmulateCUDADim3<index_type> gridDim_, + const EmulateCUDADim3<index_type> blockIdx_, + const EmulateCUDADim3<index_type> threadIdx_) + : m_policy(policy_), + m_func(f_), + m_v(v_), + gridDim(gridDim_), + blockIdx(blockIdx_), + threadIdx(threadIdx_) {} +#else KOKKOS_IMPL_DEVICE_FUNCTION DeviceIterateTile(const PolicyType& policy_, const Functor& f_, - value_type* v_) + value_type_storage v_) : m_policy(policy_), m_func(f_), m_v(v_) {} - - static constexpr index_type max_blocks = 65535; +#endif KOKKOS_IMPL_DEVICE_FUNCTION void exec_range() const { @@ -3186,26 +974,25 @@ struct DeviceIterateTile< bool in_bounds = true; // LL - if (PolicyType::inner_direction == PolicyType::Left) { + if (PolicyType::inner_direction == Iterate::Left) { for (int i = 0; i < PolicyType::rank; ++i) { m_offset[i] = (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + m_policy.m_lower[i]; tile_idx /= m_policy.m_tile_end[i]; - // tile-local indices identified with threadIdx.y + // tile-local indices identified with (index_type)threadIdx_y m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); thrd_idx /= m_policy.m_tile[i]; m_offset[i] += m_local_offset[i]; if (!(m_offset[i] < m_policy.m_upper[i] && m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; + in_bounds = false; } } if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_offset[5], m_v); + Impl::_tag_invoke_array<Tag>(m_func, m_offset, m_v); } } // LR @@ -3216,19 +1003,21 @@ struct DeviceIterateTile< m_policy.m_lower[i]; tile_idx /= m_policy.m_tile_end[i]; - // tile-local indices identified with threadIdx.y - m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = + (thrd_idx % + m_policy.m_tile[i]); // Move this to first computation, + // add to m_offset right away thrd_idx /= m_policy.m_tile[i]; m_offset[i] += m_local_offset[i]; if (!(m_offset[i] < m_policy.m_upper[i] && m_local_offset[i] < m_policy.m_tile[i])) { - in_bounds &= false; + in_bounds = false; } } if (in_bounds) { - m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], - m_offset[4], m_offset[5], m_v); + Impl::_tag_invoke_array<Tag>(m_func, m_offset, m_v); } } } @@ -3238,7 +1027,12 @@ struct DeviceIterateTile< private: const PolicyType& m_policy; const Functor& m_func; - value_type* m_v; + value_type_storage m_v; +#ifdef KOKKOS_ENABLE_SYCL + const EmulateCUDADim3<index_type> gridDim; + const EmulateCUDADim3<index_type> blockIdx; + const EmulateCUDADim3<index_type> threadIdx; +#endif }; } // namespace Reduce diff --git a/packages/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp b/packages/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp index 6905d9e4066ed98d30231232a76b638f43b31634..c513817b5b8cbd74847e180099081bb475020c44 100644 --- a/packages/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp @@ -46,349 +46,91 @@ #define KOKKOS_IMPL_ANALYZE_POLICY_HPP #include <Kokkos_Core_fwd.hpp> -#include <Kokkos_Concepts.hpp> -#include <impl/Kokkos_Tags.hpp> -#include <impl/Kokkos_GraphImpl_fwd.hpp> -#include <impl/Kokkos_Error.hpp> -#include <impl/Kokkos_EBO.hpp> +#include <Kokkos_Concepts.hpp> // IndexType +#include <traits/Kokkos_Traits_fwd.hpp> +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> + +#include <traits/Kokkos_ExecutionSpaceTrait.hpp> +#include <traits/Kokkos_GraphKernelTrait.hpp> +#include <traits/Kokkos_IndexTypeTrait.hpp> +#include <traits/Kokkos_IterationPatternTrait.hpp> +#include <traits/Kokkos_LaunchBoundsTrait.hpp> +#include <traits/Kokkos_OccupancyControlTrait.hpp> +#include <traits/Kokkos_ScheduleTrait.hpp> +#include <traits/Kokkos_WorkItemPropertyTrait.hpp> +#include <traits/Kokkos_WorkTagTrait.hpp> namespace Kokkos { -namespace Experimental { -struct DesiredOccupancy { - int m_occ = 100; - explicit constexpr DesiredOccupancy(int occ) : m_occ(occ) { - KOKKOS_EXPECTS(0 <= occ && occ <= 100); - } - explicit constexpr operator int() const { return m_occ; } - constexpr int value() const { return m_occ; } - explicit DesiredOccupancy() = default; -}; -struct MaximizeOccupancy { - explicit MaximizeOccupancy() = default; -}; -} // namespace Experimental - namespace Impl { -template <typename ExecutionSpace = void, typename Schedule = void, - typename WorkTag = void, typename IndexType = void, - typename IterationPattern = void, typename LaunchBounds = void, - typename MyWorkItemProperty = - Kokkos::Experimental::WorkItemProperty::None_t, - typename IsGraphKernel = std::false_type, - typename OccupancyControl = Kokkos::Experimental::MaximizeOccupancy> -struct PolicyTraitsBase { - using type = - PolicyTraitsBase<ExecutionSpace, Schedule, WorkTag, IndexType, - IterationPattern, LaunchBounds, MyWorkItemProperty, - IsGraphKernel, OccupancyControl>; - using execution_space = ExecutionSpace; - using schedule_type = Schedule; - using work_tag = WorkTag; - using index_type = IndexType; - using iteration_pattern = IterationPattern; - using launch_bounds = LaunchBounds; - using work_item_property = MyWorkItemProperty; - using is_graph_kernel = IsGraphKernel; - using occupancy_control = OccupancyControl; +//------------------------------------------------------------------------------ + +using execution_policy_trait_specifications = + type_list<ExecutionSpaceTrait, GraphKernelTrait, IndexTypeTrait, + IterationPatternTrait, LaunchBoundsTrait, OccupancyControlTrait, + ScheduleTrait, WorkItemPropertyTrait, WorkTagTrait>; + +//------------------------------------------------------------------------------ +// Ignore void for backwards compatibility purposes, though hopefully no one is +// using this in application code +template <class... Traits> +struct AnalyzeExecPolicy<void, void, Traits...> + : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; }; -template <typename PolicyBase, typename Property> -struct SetWorkItemProperty { - static_assert( - std::is_same<typename PolicyBase::work_item_property, - Kokkos::Experimental::WorkItemProperty::None_t>::value, - "Kokkos Error: More than one work item property given"); - using type = PolicyTraitsBase< - typename PolicyBase::execution_space, typename PolicyBase::schedule_type, - typename PolicyBase::work_tag, typename PolicyBase::index_type, - typename PolicyBase::iteration_pattern, - typename PolicyBase::launch_bounds, Property, - typename PolicyBase::is_graph_kernel, - typename PolicyBase::occupancy_control>; -}; - -template <typename PolicyBase, typename ExecutionSpace> -struct SetExecutionSpace { - static_assert(is_void<typename PolicyBase::execution_space>::value, - "Kokkos Error: More than one execution space given"); - using type = - PolicyTraitsBase<ExecutionSpace, typename PolicyBase::schedule_type, - typename PolicyBase::work_tag, - typename PolicyBase::index_type, - typename PolicyBase::iteration_pattern, - typename PolicyBase::launch_bounds, - typename PolicyBase::work_item_property, - typename PolicyBase::is_graph_kernel, - typename PolicyBase::occupancy_control>; -}; - -template <typename PolicyBase, typename Schedule> -struct SetSchedule { - static_assert(is_void<typename PolicyBase::schedule_type>::value, - "Kokkos Error: More than one schedule type given"); - using type = PolicyTraitsBase<typename PolicyBase::execution_space, Schedule, - typename PolicyBase::work_tag, - typename PolicyBase::index_type, - typename PolicyBase::iteration_pattern, - typename PolicyBase::launch_bounds, - typename PolicyBase::work_item_property, - typename PolicyBase::is_graph_kernel, - typename PolicyBase::occupancy_control>; -}; - -template <typename PolicyBase, typename WorkTag> -struct SetWorkTag { - static_assert(is_void<typename PolicyBase::work_tag>::value, - "Kokkos Error: More than one work tag given"); - using type = PolicyTraitsBase<typename PolicyBase::execution_space, - typename PolicyBase::schedule_type, WorkTag, - typename PolicyBase::index_type, - typename PolicyBase::iteration_pattern, - typename PolicyBase::launch_bounds, - typename PolicyBase::work_item_property, - typename PolicyBase::is_graph_kernel, - typename PolicyBase::occupancy_control>; -}; - -template <typename PolicyBase, typename IndexType> -struct SetIndexType { - static_assert(is_void<typename PolicyBase::index_type>::value, - "Kokkos Error: More than one index type given"); - using type = PolicyTraitsBase<typename PolicyBase::execution_space, - typename PolicyBase::schedule_type, - typename PolicyBase::work_tag, IndexType, - typename PolicyBase::iteration_pattern, - typename PolicyBase::launch_bounds, - typename PolicyBase::work_item_property, - typename PolicyBase::is_graph_kernel, - typename PolicyBase::occupancy_control>; -}; - -template <typename PolicyBase, typename IterationPattern> -struct SetIterationPattern { - static_assert(is_void<typename PolicyBase::iteration_pattern>::value, - "Kokkos Error: More than one iteration_pattern given"); - using type = PolicyTraitsBase< - typename PolicyBase::execution_space, typename PolicyBase::schedule_type, - typename PolicyBase::work_tag, typename PolicyBase::index_type, - IterationPattern, typename PolicyBase::launch_bounds, - typename PolicyBase::work_item_property, - typename PolicyBase::is_graph_kernel, - typename PolicyBase::occupancy_control>; -}; - -template <typename PolicyBase, typename LaunchBounds> -struct SetLaunchBounds { - static_assert(is_void<typename PolicyBase::launch_bounds>::value, - "Kokkos Error: More than one launch_bounds given"); - using type = PolicyTraitsBase< - typename PolicyBase::execution_space, typename PolicyBase::schedule_type, - typename PolicyBase::work_tag, typename PolicyBase::index_type, - typename PolicyBase::iteration_pattern, LaunchBounds, - typename PolicyBase::work_item_property, - typename PolicyBase::is_graph_kernel, - typename PolicyBase::occupancy_control>; -}; - -template <typename PolicyBase> -struct SetIsGraphKernel { - using type = PolicyTraitsBase< - typename PolicyBase::execution_space, typename PolicyBase::schedule_type, - typename PolicyBase::work_tag, typename PolicyBase::index_type, - typename PolicyBase::iteration_pattern, - typename PolicyBase::launch_bounds, - typename PolicyBase::work_item_property, std::true_type, - typename PolicyBase::occupancy_control>; -}; - -template <typename PolicyBase, typename OccupancyControl> -struct SetOccupancyControl { - using type = PolicyTraitsBase< - typename PolicyBase::execution_space, typename PolicyBase::schedule_type, - typename PolicyBase::work_tag, typename PolicyBase::index_type, - typename PolicyBase::iteration_pattern, - typename PolicyBase::launch_bounds, - typename PolicyBase::work_item_property, - typename PolicyBase::is_graph_kernel, OccupancyControl>; -}; - -template <typename Base, typename... Traits> -struct AnalyzePolicy; - -// TODO DSH rewrite this to be more extensible once we have metaprogramming from -// desul -template <typename Base, typename T, typename... Traits> -struct AnalyzePolicy<Base, T, Traits...> - : public AnalyzePolicy< - typename std::conditional_t< - is_execution_space<T>::value, SetExecutionSpace<Base, T>, - std::conditional_t< - is_schedule_type<T>::value, SetSchedule<Base, T>, - std::conditional_t< - is_index_type<T>::value, SetIndexType<Base, T>, - std::conditional_t< - std::is_integral<T>::value, - SetIndexType<Base, IndexType<T>>, - std::conditional_t< - is_iteration_pattern<T>::value, - SetIterationPattern<Base, T>, - std::conditional_t< - is_launch_bounds<T>::value, - SetLaunchBounds<Base, T>, - std::conditional_t< - Kokkos::Experimental:: - is_work_item_property<T>::value, - SetWorkItemProperty<Base, T>, - std::conditional_t< - std::is_same<T, - IsGraphKernelTag>::value, - SetIsGraphKernel<Base>, - std::conditional_t< - std::is_same< - T, Kokkos::Experimental:: - DesiredOccupancy>:: - value || - std::is_same< - T, - Kokkos::Experimental:: - MaximizeOccupancy>:: - value, - SetOccupancyControl<Base, T>, - std::conditional_t< - !std::is_void<T>::value, - SetWorkTag<Base, T>, - Base>>>>>>>>>>::type, - Traits...> {}; - -template <typename Base> -struct AnalyzePolicy<Base> { - static constexpr auto execution_space_is_defaulted = - std::is_void<typename Base::execution_space>::value; - using execution_space = - typename std::conditional<execution_space_is_defaulted, - DefaultExecutionSpace, - typename Base::execution_space>::type; - - using schedule_type = - typename std::conditional<is_void<typename Base::schedule_type>::value, - Schedule<Static>, - typename Base::schedule_type>::type; - - using work_tag = typename Base::work_tag; - - using index_type = - typename std::conditional<is_void<typename Base::index_type>::value, - IndexType<typename execution_space::size_type>, - typename Base::index_type>::type::type; - // nasty hack to make index_type into an integral_type - // instead of the wrapped IndexType<T> for backwards compatibility - - using iteration_pattern = typename std::conditional< - is_void<typename Base::iteration_pattern>::value, - void // TODO set default iteration pattern - , - typename Base::iteration_pattern>::type; - - using launch_bounds = - typename std::conditional<is_void<typename Base::launch_bounds>::value, - LaunchBounds<>, - typename Base::launch_bounds>::type; - - using work_item_property = typename Base::work_item_property; - - using is_graph_kernel = typename Base::is_graph_kernel; - - using occupancy_control = typename Base::occupancy_control; - - using type = - PolicyTraitsBase<execution_space, schedule_type, work_tag, index_type, - iteration_pattern, launch_bounds, work_item_property, - is_graph_kernel, occupancy_control>; -}; - -template <class AnalyzedPolicy> -struct PolicyDataStorage : AnalyzedPolicy, - NoUniqueAddressMemberEmulation< - typename AnalyzedPolicy::occupancy_control> { - using occupancy_control_t = typename AnalyzedPolicy::occupancy_control; - - using occupancy_control_storage_base_t = - NoUniqueAddressMemberEmulation<occupancy_control_t>; - - static constexpr bool experimental_contains_desired_occupancy = - std::is_same<occupancy_control_t, - Kokkos::Experimental::DesiredOccupancy>::value; - - PolicyDataStorage() = default; - - // Converting constructors - template < - class Other, - std::enable_if_t< - experimental_contains_desired_occupancy && - PolicyDataStorage<Other>::experimental_contains_desired_occupancy, - int> = 0> - PolicyDataStorage(PolicyDataStorage<Other> const &other) { - this->impl_set_desired_occupancy(other.impl_get_desired_occupancy()); - } - - template <class Other, - std::enable_if_t<!experimental_contains_desired_occupancy || - !PolicyDataStorage<Other>:: - experimental_contains_desired_occupancy, - int> = 0> - PolicyDataStorage(PolicyDataStorage<Other> const &) {} - - // Converting assignment operators - template < - class Other, - std::enable_if_t< - experimental_contains_desired_occupancy && - PolicyDataStorage<Other>::experimental_contains_desired_occupancy, - int> = 0> - PolicyDataStorage &operator=(PolicyDataStorage<Other> const &other) { - this->impl_set_desired_occupancy(other.impl_get_desired_occupancy()); - return *this; - } - - template <class Other, - std::enable_if_t<!experimental_contains_desired_occupancy || - !PolicyDataStorage<Other>:: - experimental_contains_desired_occupancy, - int> = 0> - PolicyDataStorage &operator=(PolicyDataStorage<Other> const &) { +//------------------------------------------------------------------------------ +// Mix in the defaults (base_traits) for the traits that aren't yet handled + +template <class TraitSpecList> +struct KOKKOS_IMPL_ENFORCE_EMPTY_BASE_OPTIMIZATION AnalyzeExecPolicyBaseTraits; +template <class... TraitSpecifications> +struct KOKKOS_IMPL_ENFORCE_EMPTY_BASE_OPTIMIZATION + AnalyzeExecPolicyBaseTraits<type_list<TraitSpecifications...>> + : TraitSpecifications::base_traits... {}; + +template <> +struct AnalyzeExecPolicy<void> + : AnalyzeExecPolicyBaseTraits<execution_policy_trait_specifications> { + // Ensure default constructibility since a converting constructor causes it to + // be deleted. + AnalyzeExecPolicy() = default; + + // Base converting constructor and assignment operator: unless an individual + // policy analysis deletes a constructor, assume it's convertible + template <class Other> + AnalyzeExecPolicy(ExecPolicyTraitsWithDefaults<Other> const&) {} + + template <class Other> + AnalyzeExecPolicy& operator=(ExecPolicyTraitsWithDefaults<Other> const&) { return *this; } +}; - // Access to desired occupancy (getter and setter) - template <class Dummy = occupancy_control_t> - std::enable_if_t<std::is_same<Dummy, occupancy_control_t>::value && - experimental_contains_desired_occupancy, - Kokkos::Experimental::DesiredOccupancy> - impl_get_desired_occupancy() const { - return this - ->occupancy_control_storage_base_t::no_unique_address_data_member(); - } - - template <class Dummy = occupancy_control_t> - std::enable_if_t<std::is_same<Dummy, occupancy_control_t>::value && - experimental_contains_desired_occupancy> - impl_set_desired_occupancy(occupancy_control_t desired_occupancy) { - this->occupancy_control_storage_base_t::no_unique_address_data_member() = - desired_occupancy; - } +//------------------------------------------------------------------------------ +// Used for defaults that depend on other analysis results +template <class AnalysisResults> +struct ExecPolicyTraitsWithDefaults : AnalysisResults { + using base_t = AnalysisResults; + using base_t::base_t; + // The old code turned this into an integral type for backwards compatibility, + // so that's what we're doing here. The original comment was: + // nasty hack to make index_type into an integral_type + // instead of the wrapped IndexType<T> for backwards compatibility + using index_type = typename std::conditional_t< + base_t::index_type_is_defaulted, + Kokkos::IndexType<typename base_t::execution_space::size_type>, + typename base_t::index_type>::type; }; +//------------------------------------------------------------------------------ template <typename... Traits> struct PolicyTraits - : PolicyDataStorage< - typename AnalyzePolicy<PolicyTraitsBase<>, Traits...>::type> { - using base_t = PolicyDataStorage< - typename AnalyzePolicy<PolicyTraitsBase<>, Traits...>::type>; - template <class... Args> - PolicyTraits(PolicyTraits<Args...> const &p) : base_t(p) {} - PolicyTraits() = default; + : ExecPolicyTraitsWithDefaults<AnalyzeExecPolicy<void, Traits...>> { + using base_t = + ExecPolicyTraitsWithDefaults<AnalyzeExecPolicy<void, Traits...>>; + using base_t::base_t; }; } // namespace Impl diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp index 8ed130d15fcabb443dbb693ce6240ffb2ab7dd58..f2c1c756a910d26de0eb3765e0b90684e564d243 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -191,8 +191,7 @@ inline T atomic_exchange(volatile T* const dest, typename std::enable_if<sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long), const T&>::type val) { - using type = - typename Kokkos::Impl::if_c<sizeof(T) == sizeof(int), int, long>::type; + using type = std::conditional_t<sizeof(T) == sizeof(int), int, long>; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -285,8 +284,7 @@ inline void atomic_assign(volatile T* const dest, typename std::enable_if<sizeof(T) == sizeof(int) || sizeof(T) == sizeof(long), const T&>::type val) { - using type = - typename Kokkos::Impl::if_c<sizeof(T) == sizeof(int), int, long>::type; + using type = std::conditional_t<sizeof(T) == sizeof(int), int, long>; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp index 6b9e4a4a250518826ed5341ee5f461fc4c021d38..28ac7a3bab9e748f9d315ca479f57db885ed75c4 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -345,7 +345,7 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( return return_val; #elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL) // FIXME_SYCL - std::abort(); + Kokkos::abort("Not implemented!"); (void)op; (void)dest; (void)val; diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_MinMax.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_MinMax.hpp index 8a886d0a7757ae085a223511162a0ed956dd6d6a..7338a5c545f25f58662c2e05c4a20bda4992e203 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_MinMax.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_MinMax.hpp @@ -101,6 +101,52 @@ inline __host__ unsigned long long int atomic_fetch_max( #endif +#if (350 > __CUDA_ARCH__) + +// Fallback for atomic{Min,Max} for Kepler + +inline __device__ int atomic_fetch_min(volatile int* const dest, + const int val) { + return Impl::atomic_fetch_oper(Impl::MinOper<const int, const int>(), dest, + val); +} + +inline __device__ unsigned int atomic_fetch_min( + volatile unsigned int* const dest, const unsigned int val) { + return Impl::atomic_fetch_oper( + Impl::MinOper<const unsigned int, const unsigned int>(), dest, val); +} + +inline __device__ unsigned long long int atomic_fetch_min( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return Impl::atomic_fetch_oper(Impl::MinOper<const unsigned long long int, + const unsigned long long int>(), + dest, val); +} + +inline __device__ int atomic_fetch_max(volatile int* const dest, + const int val) { + return Impl::atomic_fetch_oper(Impl::MaxOper<const int, const int>(), dest, + val); +} + +inline __device__ unsigned int atomic_fetch_max( + volatile unsigned int* const dest, const unsigned int val) { + return Impl::atomic_fetch_oper( + Impl::MaxOper<const unsigned int, const unsigned int>(), dest, val); +} + +inline __device__ unsigned long long int atomic_fetch_max( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return Impl::atomic_fetch_oper(Impl::MaxOper<const unsigned long long int, + const unsigned long long int>(), + dest, val); +} + +#else // Supported by devices of compute capability 3.5 and higher + inline __device__ int atomic_fetch_min(volatile int* const dest, const int val) { return atomicMin((int*)dest, val); @@ -133,6 +179,8 @@ inline __device__ unsigned long long int atomic_fetch_max( return atomicMax((unsigned long long int*)dest, val); } +#endif + // Atomic_{min,max}_fetch #ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND @@ -178,6 +226,52 @@ inline __host__ unsigned long long int atomic_max_fetch( } #endif +#if (350 > __CUDA_ARCH__) + +// Fallback for atomic{Min,Max} for Kepler + +inline __device__ int atomic_min_fetch(volatile int* const dest, + const int val) { + return Impl::atomic_oper_fetch(Impl::MinOper<const int, const int>(), dest, + val); +} + +inline __device__ unsigned int atomic_min_fetch( + volatile unsigned int* const dest, const unsigned int val) { + return Impl::atomic_oper_fetch( + Impl::MinOper<const unsigned int, const unsigned int>(), dest, val); +} + +inline __device__ unsigned long long int atomic_min_fetch( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return Impl::atomic_oper_fetch(Impl::MinOper<const unsigned long long int, + const unsigned long long int>(), + dest, val); +} + +inline __device__ int atomic_max_fetch(volatile int* const dest, + const int val) { + return Impl::atomic_oper_fetch(Impl::MaxOper<const int, const int>(), dest, + val); +} + +inline __device__ unsigned int atomic_max_fetch( + volatile unsigned int* const dest, const unsigned int val) { + return Impl::atomic_oper_fetch( + Impl::MaxOper<const unsigned int, const unsigned int>(), dest, val); +} + +inline __device__ unsigned long long int atomic_max_fetch( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return Impl::atomic_oper_fetch(Impl::MaxOper<const unsigned long long int, + const unsigned long long int>(), + dest, val); +} + +#else // Supported by devices of compute capability 3.5 and higher + inline __device__ int atomic_min_fetch(volatile int* const dest, const int val) { const int old = atomicMin((int*)dest, val); @@ -216,6 +310,8 @@ inline __device__ unsigned long long int atomic_max_fetch( return old >= val ? old : val; } +#endif + #endif #endif } // namespace Kokkos diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_View.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_View.hpp index 3916a1b03d58ba718ce9492b5270b071bcc4b55b..975318b7dde67a1d1569c3cf657060c3ae18215d 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_View.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_View.hpp @@ -299,14 +299,18 @@ class AtomicDataElement { } KOKKOS_INLINE_FUNCTION - bool operator==(const_value_type& val) const { return *ptr == val; } + bool operator==(const AtomicDataElement& val) const { return *ptr == val; } KOKKOS_INLINE_FUNCTION - bool operator==(volatile const_value_type& val) const { return *ptr == val; } + bool operator==(volatile const AtomicDataElement& val) const { + return *ptr == val; + } KOKKOS_INLINE_FUNCTION - bool operator!=(const_value_type& val) const { return *ptr != val; } + bool operator!=(const AtomicDataElement& val) const { return *ptr != val; } KOKKOS_INLINE_FUNCTION - bool operator!=(volatile const_value_type& val) const { return *ptr != val; } + bool operator!=(volatile const AtomicDataElement& val) const { + return *ptr != val; + } KOKKOS_INLINE_FUNCTION bool operator>=(const_value_type& val) const { return *ptr >= val; } diff --git a/packages/kokkos/core/src/impl/Kokkos_ClockTic.hpp b/packages/kokkos/core/src/impl/Kokkos_ClockTic.hpp index 8c2d53ba14628bfbd075155a846794fef5d13728..4e46b8d157f83129182d4db9b725bcddbe3ed28b 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ClockTic.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ClockTic.hpp @@ -52,6 +52,15 @@ #include <omp.h> #endif +// To use OpenCL(TM) built-in intrinsics inside kernels, we have to +// forward-declare their prototype, also see +// https://github.com/intel/pti-gpu/blob/master/chapters/binary_instrumentation/OpenCLBuiltIn.md +#if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_ARCH_INTEL_GEN) && \ + defined(__SYCL_DEVICE_ONLY__) +extern SYCL_EXTERNAL unsigned long __attribute__((overloadable)) +intel_get_cycle_counter(); +#endif + namespace Kokkos { namespace Impl { @@ -69,13 +78,16 @@ namespace Impl { */ KOKKOS_FORCEINLINE_FUNCTION -uint64_t clock_tic(void) noexcept { +uint64_t clock_tic() noexcept { #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) // Return value of 64-bit hi-res clock register. return clock64(); +#elif defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_ARCH_INTEL_GEN) && \ + defined(__SYCL_DEVICE_ONLY__) + return intel_get_cycle_counter(); #elif defined(KOKKOS_ENABLE_OPENMPTARGET) return uint64_t(omp_get_wtime() * 1.e9); #elif defined(__i386__) || defined(__x86_64) diff --git a/packages/kokkos/core/src/impl/Kokkos_Combined_Reducer.hpp b/packages/kokkos/core/src/impl/Kokkos_Combined_Reducer.hpp index 61c1375900ecb88add73b0e8a07156b4d6bb94d4..06681a95ae902c613c701cd78ff572d35da6c0a1 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Combined_Reducer.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Combined_Reducer.hpp @@ -53,6 +53,8 @@ #include <Kokkos_AnonymousSpace.hpp> #include <impl/Kokkos_Utilities.hpp> // comma operator fold emulation +#include <utility> + namespace Kokkos { namespace Impl { @@ -99,7 +101,7 @@ template <class IdxSeq, class... ValueTypes> struct CombinedReducerValueImpl; template <size_t... Idxs, class... ValueTypes> -struct CombinedReducerValueImpl<integer_sequence<size_t, Idxs...>, +struct CombinedReducerValueImpl<std::integer_sequence<size_t, Idxs...>, ValueTypes...> : CombinedReducerValueItemImpl<Idxs, ValueTypes>... { public: @@ -220,14 +222,15 @@ template <class IdxSeq, class Space, class...> struct CombinedReducerImpl; template <size_t... Idxs, class Space, class... Reducers> -struct CombinedReducerImpl<integer_sequence<size_t, Idxs...>, Space, +struct CombinedReducerImpl<std::integer_sequence<size_t, Idxs...>, Space, Reducers...> : private CombinedReducerStorageImpl<Idxs, Reducers>... { public: - using reducer = CombinedReducerImpl<integer_sequence<size_t, Idxs...>, Space, - Reducers...>; - using value_type = CombinedReducerValueImpl<integer_sequence<size_t, Idxs...>, - typename Reducers::value_type...>; + using reducer = CombinedReducerImpl<std::integer_sequence<size_t, Idxs...>, + Space, Reducers...>; + using value_type = + CombinedReducerValueImpl<std::integer_sequence<size_t, Idxs...>, + typename Reducers::value_type...>; using result_view_type = Kokkos::View<value_type, Space, Kokkos::MemoryUnmanaged>; @@ -309,10 +312,11 @@ struct CombinedReducerImpl<integer_sequence<size_t, Idxs...>, Space, // thing. template <class Space, class... Reducers> struct CombinedReducer - : CombinedReducerImpl<make_index_sequence<sizeof...(Reducers)>, Space, + : CombinedReducerImpl<std::make_index_sequence<sizeof...(Reducers)>, Space, Reducers...> { - using base_t = CombinedReducerImpl<make_index_sequence<sizeof...(Reducers)>, - Space, Reducers...>; + using base_t = + CombinedReducerImpl<std::make_index_sequence<sizeof...(Reducers)>, Space, + Reducers...>; using base_t::base_t; using reducer = CombinedReducer<Space, Reducers...>; }; @@ -327,8 +331,8 @@ template <class IdxSeq, class Functor, class Space, class... Reducers> struct CombinedReductionFunctorWrapperImpl; template <size_t... Idxs, class Functor, class Space, class... Reducers> -struct CombinedReductionFunctorWrapperImpl<integer_sequence<size_t, Idxs...>, - Functor, Space, Reducers...> { +struct CombinedReductionFunctorWrapperImpl< + std::integer_sequence<size_t, Idxs...>, Functor, Space, Reducers...> { private: Functor m_functor; @@ -425,10 +429,11 @@ struct CombinedReductionFunctorWrapperImpl<integer_sequence<size_t, Idxs...>, template <class Functor, class Space, class... Reducers> struct CombinedReductionFunctorWrapper : CombinedReductionFunctorWrapperImpl< - make_index_sequence<sizeof...(Reducers)>, Functor, Space, + std::make_index_sequence<sizeof...(Reducers)>, Functor, Space, Reducers...> { using base_t = CombinedReductionFunctorWrapperImpl< - make_index_sequence<sizeof...(Reducers)>, Functor, Space, Reducers...>; + std::make_index_sequence<sizeof...(Reducers)>, Functor, Space, + Reducers...>; using base_t::base_t; }; @@ -488,11 +493,8 @@ using _reducer_from_arg_t = //------------------------------------------------------------------------------ template <class Space, class... ReferencesOrViewsOrReducers> -KOKKOS_INLINE_FUNCTION constexpr CombinedReducerValueImpl< - make_index_sequence<sizeof...(ReferencesOrViewsOrReducers)>, - typename _reducer_from_arg_t<Space, - ReferencesOrViewsOrReducers>::value_type...> -make_combined_reducer_value(ReferencesOrViewsOrReducers&&... args) { +KOKKOS_INLINE_FUNCTION constexpr auto make_combined_reducer_value( + ReferencesOrViewsOrReducers&&... args) { //---------------------------------------- // This is a bit round-about and we should make sure it doesn't have // any performance implications. Basically, we make a reducer out of anything @@ -500,7 +502,7 @@ make_combined_reducer_value(ReferencesOrViewsOrReducers&&... args) { // compilers should figure out what's going on, but we should double-check // that. return CombinedReducerValueImpl< - make_index_sequence<sizeof...(ReferencesOrViewsOrReducers)>, + std::make_index_sequence<sizeof...(ReferencesOrViewsOrReducers)>, typename _reducer_from_arg_t<Space, ReferencesOrViewsOrReducers>::value_type...>{ // This helper function is now poorly named after refactoring. @@ -510,9 +512,8 @@ make_combined_reducer_value(ReferencesOrViewsOrReducers&&... args) { } template <class Space, class ValueType, class... ReferencesOrViewsOrReducers> -KOKKOS_INLINE_FUNCTION constexpr CombinedReducer< - Space, _reducer_from_arg_t<Space, ReferencesOrViewsOrReducers>...> -make_combined_reducer(ValueType& value, ReferencesOrViewsOrReducers&&... args) { +KOKKOS_INLINE_FUNCTION constexpr auto make_combined_reducer( + ValueType& value, ReferencesOrViewsOrReducers&&... args) { //---------------------------------------- // This is doing more or less the same thing of making every argument into // a reducer, just in a different place than in `make_combined_reducer_value`, @@ -526,10 +527,8 @@ make_combined_reducer(ValueType& value, ReferencesOrViewsOrReducers&&... args) { } template <class Functor, class Space, class... ReferencesOrViewsOrReducers> -KOKKOS_INLINE_FUNCTION constexpr CombinedReductionFunctorWrapper< - Functor, Space, _reducer_from_arg_t<Space, ReferencesOrViewsOrReducers>...> -make_wrapped_combined_functor(Functor const& functor, Space, - ReferencesOrViewsOrReducers&&...) { +KOKKOS_INLINE_FUNCTION constexpr auto make_wrapped_combined_functor( + Functor const& functor, Space, ReferencesOrViewsOrReducers&&...) { //---------------------------------------- return CombinedReductionFunctorWrapper< Functor, Space, diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp index d6dc384f2ff6f98c82736b5c603810706cee1ff7..b4769fbeaa53be8353df315ede634708da1b297d 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp @@ -54,6 +54,7 @@ #include <functional> #include <list> #include <cerrno> +#include <regex> #ifndef _WIN32 #include <unistd.h> #else @@ -80,6 +81,23 @@ std::stack<hook_function_type, std::list<hook_function_type>> finalize_hooks; namespace Kokkos { namespace Impl { +/** + * The category is only used in printing, tools + * get all metadata free of category + */ +using metadata_category_type = std::string; +using metadata_key_type = std::string; +using metadata_value_type = std::string; + +std::map<metadata_category_type, + std::map<metadata_key_type, metadata_value_type>> + metadata_map; + +void declare_configuration_metadata(const std::string& category, + const std::string& key, + const std::string& value) { + metadata_map[category][key] = value; +} ExecSpaceManager& ExecSpaceManager::get_instance() { static ExecSpaceManager space_initializer = {}; @@ -210,8 +228,19 @@ int get_ctest_gpu(const char* local_rank_str) { // function to extract gpu # from args int get_gpu(const InitArguments& args) { - int use_gpu = args.device_id; - const int ndevices = args.ndevices; + int use_gpu = args.device_id; + const int ndevices = [](int num_devices) -> int { + if (num_devices > 0) return num_devices; +#if defined(KOKKOS_ENABLE_CUDA) + return Cuda::detect_device_count(); +#elif defined(KOKKOS_ENABLE_HIP) + return Experimental::HIP::detect_device_count(); +#elif defined(KOKKOS_ENABLE_SYCL) + return sycl::device::get_devices(sycl::info::device_type::gpu).size(); +#else + return num_devices; +#endif + }(args.ndevices); const int skip_device = args.skip_device; // if the exact device is not set, but ndevices was given, assign round-robin @@ -232,7 +261,7 @@ int get_gpu(const InitArguments& args) { local_rank_str) { // Use the device assigned by CTest use_gpu = get_ctest_gpu(local_rank_str); - } else if (ndevices >= 0) { + } else if (ndevices > 0) { // Use the device assigned by the rank if (local_rank_str) { auto local_rank = std::stoi(local_rank_str); @@ -270,13 +299,221 @@ void initialize_backends(const InitArguments& args) { Impl::ExecSpaceManager::get_instance().initialize_spaces(args); } -void initialize_profiling(const InitArguments&) { - Kokkos::Profiling::initialize(); +void initialize_profiling(const InitArguments& args) { + Kokkos::Profiling::initialize(args.tool_lib); + if (args.tool_help) { + if (!Kokkos::Tools::printHelp(args.tool_args)) { + std::cerr << "Tool has not provided a help message" << std::endl; + } + g_is_initialized = true; + ::Kokkos::finalize(); + std::exit(EXIT_SUCCESS); + } + Kokkos::Tools::parseArgs(args.tool_args); + for (const auto& category_value : Kokkos::Impl::metadata_map) { + for (const auto& key_value : category_value.second) { + Kokkos::Tools::declareMetadata(key_value.first, key_value.second); + } + } } +std::string version_string_from_int(int version_number) { + std::stringstream str_builder; + str_builder << version_number / 10000 << "." << (version_number % 10000) / 100 + << "." << version_number % 100; + return str_builder.str(); +} void pre_initialize_internal(const InitArguments& args) { if (args.disable_warnings) g_show_warnings = false; if (args.tune_internals) g_tune_internals = true; + declare_configuration_metadata("version_info", "Kokkos Version", + version_string_from_int(KOKKOS_VERSION)); +#ifdef KOKKOS_COMPILER_APPLECC + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_APPLECC", + std::to_string(KOKKOS_COMPILER_APPLECC)); + declare_configuration_metadata("tools_only", "compiler_family", "apple"); +#endif +#ifdef KOKKOS_COMPILER_CLANG + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_CLANG", + std::to_string(KOKKOS_COMPILER_CLANG)); + declare_configuration_metadata("tools_only", "compiler_family", "clang"); +#endif +#ifdef KOKKOS_COMPILER_CRAYC + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_CRAYC", + std::to_string(KOKKOS_COMPILER_CRAYC)); + declare_configuration_metadata("tools_only", "compiler_family", "cray"); +#endif +#ifdef KOKKOS_COMPILER_GNU + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_GNU", + std::to_string(KOKKOS_COMPILER_GNU)); + declare_configuration_metadata("tools_only", "compiler_family", "gnu"); +#endif +#ifdef KOKKOS_COMPILER_IBM + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_IBM", + std::to_string(KOKKOS_COMPILER_IBM)); + declare_configuration_metadata("tools_only", "compiler_family", "ibm"); +#endif +#ifdef KOKKOS_COMPILER_INTEL + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_INTEL", + std::to_string(KOKKOS_COMPILER_INTEL)); + declare_configuration_metadata("tools_only", "compiler_family", "intel"); +#endif +#ifdef KOKKOS_COMPILER_NVCC + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_NVCC", + std::to_string(KOKKOS_COMPILER_NVCC)); + declare_configuration_metadata("tools_only", "compiler_family", "nvcc"); +#endif +#ifdef KOKKOS_COMPILER_PGI + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_PGI", + std::to_string(KOKKOS_COMPILER_PGI)); + declare_configuration_metadata("tools_only", "compiler_family", "pgi"); +#endif +#ifdef KOKKOS_COMPILER_MSVC + declare_configuration_metadata("compiler_version", "KOKKOS_COMPILER_MSVC", + std::to_string(KOKKOS_COMPILER_MSVC)); + declare_configuration_metadata("tools_only", "compiler_family", "msvc"); +#endif +#ifdef KOKKOS_ENABLE_ISA_KNC + declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_KNC", + "yes"); +#else + declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_KNC", "no"); +#endif +#ifdef KOKKOS_ENABLE_ISA_POWERPCLE + declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_POWERPCLE", + "yes"); +#else + declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_POWERPCLE", + "no"); +#endif +#ifdef KOKKOS_ENABLE_ISA_X86_64 + declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_X86_64", + "yes"); +#else + declare_configuration_metadata("architecture", "KOKKOS_ENABLE_ISA_X86_64", + "no"); +#endif + +#ifdef KOKKOS_ENABLE_GNU_ATOMICS + declare_configuration_metadata("atomics", "KOKKOS_ENABLE_GNU_ATOMICS", "yes"); +#else + declare_configuration_metadata("atomics", "KOKKOS_ENABLE_GNU_ATOMICS", "no"); +#endif +#ifdef KOKKOS_ENABLE_INTEL_ATOMICS + declare_configuration_metadata("atomics", "KOKKOS_ENABLE_INTEL_ATOMICS", + "yes"); +#else + declare_configuration_metadata("atomics", "KOKKOS_ENABLE_INTEL_ATOMICS", + "no"); +#endif +#ifdef KOKKOS_ENABLE_WINDOWS_ATOMICS + declare_configuration_metadata("atomics", "KOKKOS_ENABLE_WINDOWS_ATOMICS", + "yes"); +#else + declare_configuration_metadata("atomics", "KOKKOS_ENABLE_WINDOWS_ATOMICS", + "no"); +#endif + +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP + declare_configuration_metadata("vectorization", "KOKKOS_ENABLE_PRAGMA_IVDEP", + "yes"); +#else + declare_configuration_metadata("vectorization", "KOKKOS_ENABLE_PRAGMA_IVDEP", + "no"); +#endif +#ifdef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT + declare_configuration_metadata("vectorization", + "KOKKOS_ENABLE_PRAGMA_LOOPCOUNT", "yes"); +#else + declare_configuration_metadata("vectorization", + "KOKKOS_ENABLE_PRAGMA_LOOPCOUNT", "no"); +#endif +#ifdef KOKKOS_ENABLE_PRAGMA_SIMD + declare_configuration_metadata("vectorization", "KOKKOS_ENABLE_PRAGMA_SIMD", + "yes"); +#else + declare_configuration_metadata("vectorization", "KOKKOS_ENABLE_PRAGMA_SIMD", + "no"); +#endif +#ifdef KOKKOS_ENABLE_PRAGMA_UNROLL + declare_configuration_metadata("vectorization", "KOKKOS_ENABLE_PRAGMA_UNROLL", + "yes"); +#else + declare_configuration_metadata("vectorization", "KOKKOS_ENABLE_PRAGMA_UNROLL", + "no"); +#endif +#ifdef KOKKOS_ENABLE_PRAGMA_VECTOR + declare_configuration_metadata("vectorization", "KOKKOS_ENABLE_PRAGMA_VECTOR", + "yes"); +#else + declare_configuration_metadata("vectorization", "KOKKOS_ENABLE_PRAGMA_VECTOR", + "no"); +#endif + +#ifdef KOKKOS_ENABLE_HBWSPACE + declare_configuration_metadata("memory", "KOKKOS_ENABLE_HBWSPACE", "yes"); +#else + declare_configuration_metadata("memory", "KOKKOS_ENABLE_HBWSPACE", "no"); +#endif +#ifdef KOKKOS_ENABLE_INTEL_MM_ALLOC + declare_configuration_metadata("memory", "KOKKOS_ENABLE_INTEL_MM_ALLOC", + "yes"); +#else + declare_configuration_metadata("memory", "KOKKOS_ENABLE_INTEL_MM_ALLOC", + "no"); +#endif +#ifdef KOKKOS_ENABLE_POSIX_MEMALIGN + declare_configuration_metadata("memory", "KOKKOS_ENABLE_POSIX_MEMALIGN", + "yes"); +#else + declare_configuration_metadata("memory", "KOKKOS_ENABLE_POSIX_MEMALIGN", + "no"); +#endif + +#ifdef KOKKOS_ENABLE_ASM + declare_configuration_metadata("options", "KOKKOS_ENABLE_ASM", "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_ASM", "no"); +#endif +#ifdef KOKKOS_ENABLE_CXX14 + declare_configuration_metadata("options", "KOKKOS_ENABLE_CXX14", "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_CXX14", "no"); +#endif +#ifdef KOKKOS_ENABLE_CXX17 + declare_configuration_metadata("options", "KOKKOS_ENABLE_CXX17", "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_CXX17", "no"); +#endif +#ifdef KOKKOS_ENABLE_CXX20 + declare_configuration_metadata("options", "KOKKOS_ENABLE_CXX20", "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_CXX20", "no"); +#endif +#ifdef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + declare_configuration_metadata("options", "KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK", + "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK", + "no"); +#endif +#ifdef KOKKOS_ENABLE_HWLOC + declare_configuration_metadata("options", "KOKKOS_ENABLE_HWLOC", "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_HWLOC", "no"); +#endif +#ifdef KOKKOS_ENABLE_LIBRT + declare_configuration_metadata("options", "KOKKOS_ENABLE_LIBRT", "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_LIBRT", "no"); +#endif +#ifdef KOKKOS_ENABLE_MPI + declare_configuration_metadata("options", "KOKKOS_ENABLE_MPI", "yes"); +#else + declare_configuration_metadata("options", "KOKKOS_ENABLE_MPI", "no"); +#endif + declare_configuration_metadata("architecture", "Default Device", + typeid(Kokkos::DefaultExecutionSpace).name()); } void post_initialize_internal(const InitArguments& args) { @@ -364,6 +601,24 @@ bool check_int_arg(char const* arg, char const* expected, int* value) { return true; } +bool check_str_arg(char const* arg, char const* expected, std::string& value) { + if (!check_arg(arg, expected)) return false; + std::size_t arg_len = std::strlen(arg); + std::size_t exp_len = std::strlen(expected); + bool okay = true; + if (arg_len == exp_len || arg[exp_len] != '=') okay = false; + char const* remain = arg + exp_len + 1; + value = remain; + if (!okay) { + std::ostringstream ss; + ss << "Error: expecting an '=STRING' after command line argument '" + << expected << "'"; + ss << ". Raised by Kokkos::initialize(int narg, char* argc[])."; + Impl::throw_runtime_exception(ss.str()); + } + return true; +} + void warn_deprecated_command_line_argument(std::string deprecated, std::string valid) { std::cerr @@ -390,6 +645,9 @@ void parse_command_line_arguments(int& narg, char* arg[], auto& skip_device = arguments.skip_device; auto& disable_warnings = arguments.disable_warnings; auto& tune_internals = arguments.tune_internals; + auto& tool_help = arguments.tool_help; + auto& tool_args = arguments.tool_args; + auto& tool_lib = arguments.tool_lib; bool kokkos_threads_found = false; bool kokkos_numa_found = false; @@ -461,7 +719,7 @@ void parse_command_line_arguments(int& narg, char* arg[], int num1_len = num2 == nullptr ? strlen(num1) : num2 - num1; char* num1_only = new char[num1_len + 1]; strncpy(num1_only, num1, num1_len); - num1_only[num1_len] = 0; + num1_only[num1_len] = '\0'; if (!is_unsigned_int(num1_only) || (strlen(num1_only) == 0)) { throw_runtime_exception( @@ -510,6 +768,37 @@ void parse_command_line_arguments(int& narg, char* arg[], arg[k] = arg[k + 1]; } narg--; + } else if (check_str_arg(arg[iarg], "--kokkos-tools-library", tool_lib)) { + for (int k = iarg; k < narg - 1; k++) { + arg[k] = arg[k + 1]; + } + narg--; + } else if (check_str_arg(arg[iarg], "--kokkos-tools-args", tool_args)) { + for (int k = iarg; k < narg - 1; k++) { + arg[k] = arg[k + 1]; + } + narg--; + // strip any leading and/or trailing quotes if they were retained in the + // string because this will very likely cause parsing issues for tools. + // If the quotes are retained (via bypassing the shell): + // <EXE> --kokkos-tools-args="-c my example" + // would be tokenized as: + // "<EXE>" "\"-c" "my" "example\"" + // instead of: + // "<EXE>" "-c" "my" "example" + if (!tool_args.empty()) { + if (tool_args.front() == '"') tool_args = tool_args.substr(1); + if (tool_args.back() == '"') + tool_args = tool_args.substr(0, tool_args.length() - 1); + } + // add the name of the executable to the beginning + if (narg > 0) tool_args = std::string(arg[0]) + " " + tool_args; + } else if (check_arg(arg[iarg], "--kokkos-tools-help")) { + tool_help = true; + for (int k = iarg; k < narg - 1; k++) { + arg[k] = arg[k + 1]; + } + narg--; } else if (check_arg(arg[iarg], "--kokkos-help") || check_arg(arg[iarg], "--help")) { auto const help_message = R"( @@ -526,7 +815,7 @@ void parse_command_line_arguments(int& narg, char* arg[], --kokkos-disable-warnings : disable kokkos warning messages --kokkos-tune-internals : allow Kokkos to autotune policies and declare tuning features through the tuning system. If - left off, Kokkos uses heuristics + left off, Kokkos uses heuristics --kokkos-threads=INT : specify total number of threads or number of threads per NUMA region if used in conjunction with '--numa' option. @@ -540,6 +829,18 @@ void parse_command_line_arguments(int& narg, char* arg[], to be ignored. This is most useful on workstations with multiple GPUs of which one is used to drive screen output. + --kokkos-tools-library : Equivalent to KOKKOS_PROFILE_LIBRARY environment + variable. Must either be full path to library or + name of library if the path is present in the + runtime library search path (e.g. LD_LIBRARY_PATH) + --kokkos-tools-help : Query the (loaded) kokkos-tool for its command-line + option support (which should then be passed via + --kokkos-tools-args="...") + --kokkos-tools-args=STR : A single (quoted) string of options which will be + whitespace delimited and passed to the loaded + kokkos-tool as command-line arguments. E.g. + `<EXE> --kokkos-tools-args="-c input.txt"` will + pass `<EXE> -c input.txt` as argc/argv to tool -------------------------------------------------------------------------------- )"; std::cout << help_message << std::endl; @@ -556,6 +857,7 @@ void parse_command_line_arguments(int& narg, char* arg[], } else iarg++; } + if (tool_args.empty() && narg > 0) tool_args = arg[0]; } void parse_environment_variables(InitArguments& arguments) { @@ -566,6 +868,7 @@ void parse_environment_variables(InitArguments& arguments) { auto& skip_device = arguments.skip_device; auto& disable_warnings = arguments.disable_warnings; auto& tune_internals = arguments.tune_internals; + auto& tool_lib = arguments.tool_lib; char* endptr; auto env_num_threads_str = std::getenv("KOKKOS_NUM_THREADS"); if (env_num_threads_str != nullptr) { @@ -711,7 +1014,9 @@ void parse_environment_variables(InitArguments& arguments) { for (char& c : env_str) { c = toupper(c); } - if ((env_str == "TRUE") || (env_str == "ON") || (env_str == "1")) + const auto _rc = std::regex_constants::icase | std::regex_constants::egrep; + const auto _re = std::regex("^(true|on|yes|[1-9])$", _rc); + if (std::regex_match(env_str, _re)) disable_warnings = true; else if (disable_warnings) Impl::throw_runtime_exception( @@ -733,6 +1038,16 @@ void parse_environment_variables(InitArguments& arguments) { "KOKKOS_TUNE_INTERNALS if both are set. Raised by " "Kokkos::initialize(int narg, char* argc[])."); } + auto env_tool_lib = std::getenv("KOKKOS_PROFILE_LIBRARY"); + if (env_tool_lib != nullptr) { + if (!tool_lib.empty() && std::string(env_tool_lib) != tool_lib) + Impl::throw_runtime_exception( + "Error: expecting a match between --kokkos-tools-library and " + "KOKKOS_PROFILE_LIBRARY if both are set. Raised by " + "Kokkos::initialize(int narg, char* argc[])."); + else + tool_lib = env_tool_lib; + } } } // namespace @@ -765,6 +1080,7 @@ void pre_initialize(const InitArguments& args) { void post_initialize(const InitArguments& args) { post_initialize_internal(args); } + } // namespace Impl void push_finalize_hook(std::function<void()> f) { finalize_hooks.push(f); } @@ -778,180 +1094,35 @@ void finalize_all() { void fence() { Impl::fence_internal(); } +void print_helper(std::ostringstream& out, + const std::map<std::string, std::string>& print_me) { + for (const auto& kv : print_me) { + out << kv.first << ": " << kv.second << '\n'; + } +} + void print_configuration(std::ostream& out, const bool detail) { std::ostringstream msg; - msg << "Kokkos Version:" << std::endl; - msg << " " << KOKKOS_VERSION / 10000 << "." << (KOKKOS_VERSION % 10000) / 100 - << "." << KOKKOS_VERSION % 100 << std::endl; + print_helper(msg, Kokkos::Impl::metadata_map["version_info"]); msg << "Compiler:" << std::endl; -#ifdef KOKKOS_COMPILER_APPLECC - msg << " KOKKOS_COMPILER_APPLECC: " << KOKKOS_COMPILER_APPLECC << std::endl; -#endif -#ifdef KOKKOS_COMPILER_CLANG - msg << " KOKKOS_COMPILER_CLANG: " << KOKKOS_COMPILER_CLANG << std::endl; -#endif -#ifdef KOKKOS_COMPILER_CRAYC - msg << " KOKKOS_COMPILER_CRAYC: " << KOKKOS_COMPILER_CRAYC << std::endl; -#endif -#ifdef KOKKOS_COMPILER_GNU - msg << " KOKKOS_COMPILER_GNU: " << KOKKOS_COMPILER_GNU << std::endl; -#endif -#ifdef KOKKOS_COMPILER_IBM - msg << " KOKKOS_COMPILER_IBM: " << KOKKOS_COMPILER_IBM << std::endl; -#endif -#ifdef KOKKOS_COMPILER_INTEL - msg << " KOKKOS_COMPILER_INTEL: " << KOKKOS_COMPILER_INTEL << std::endl; -#endif -#ifdef KOKKOS_COMPILER_NVCC - msg << " KOKKOS_COMPILER_NVCC: " << KOKKOS_COMPILER_NVCC << std::endl; -#endif -#ifdef KOKKOS_COMPILER_PGI - msg << " KOKKOS_COMPILER_PGI: " << KOKKOS_COMPILER_PGI << std::endl; -#endif + print_helper(msg, Kokkos::Impl::metadata_map["compiler_version"]); msg << "Architecture:" << std::endl; -#ifdef KOKKOS_ENABLE_ISA_KNC - msg << " KOKKOS_ENABLE_ISA_KNC: yes" << std::endl; -#else - msg << " KOKKOS_ENABLE_ISA_KNC: no" << std::endl; -#endif -#ifdef KOKKOS_ENABLE_ISA_POWERPCLE - msg << " KOKKOS_ENABLE_ISA_POWERPCLE: yes" << std::endl; -#else - msg << " KOKKOS_ENABLE_ISA_POWERPCLE: no" << std::endl; -#endif -#ifdef KOKKOS_ENABLE_ISA_X86_64 - msg << " KOKKOS_ENABLE_ISA_X86_64: yes" << std::endl; -#else - msg << " KOKKOS_ENABLE_ISA_X86_64: no" << std::endl; -#endif - - msg << "Default Device:" << typeid(Kokkos::DefaultExecutionSpace).name() - << std::endl; + print_helper(msg, Kokkos::Impl::metadata_map["architecture"]); msg << "Atomics:" << std::endl; - msg << " KOKKOS_ENABLE_GNU_ATOMICS: "; -#ifdef KOKKOS_ENABLE_GNU_ATOMICS - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_INTEL_ATOMICS: "; -#ifdef KOKKOS_ENABLE_INTEL_ATOMICS - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_WINDOWS_ATOMICS: "; -#ifdef KOKKOS_ENABLE_WINDOWS_ATOMICS - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif + print_helper(msg, Kokkos::Impl::metadata_map["atomics"]); msg << "Vectorization:" << std::endl; - msg << " KOKKOS_ENABLE_PRAGMA_IVDEP: "; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_PRAGMA_LOOPCOUNT: "; -#ifdef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_PRAGMA_SIMD: "; -#ifdef KOKKOS_ENABLE_PRAGMA_SIMD - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_PRAGMA_UNROLL: "; -#ifdef KOKKOS_ENABLE_PRAGMA_UNROLL - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_PRAGMA_VECTOR: "; -#ifdef KOKKOS_ENABLE_PRAGMA_VECTOR - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif + print_helper(msg, Kokkos::Impl::metadata_map["vectorization"]); msg << "Memory:" << std::endl; - msg << " KOKKOS_ENABLE_HBWSPACE: "; -#ifdef KOKKOS_ENABLE_HBWSPACE - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_INTEL_MM_ALLOC: "; -#ifdef KOKKOS_ENABLE_INTEL_MM_ALLOC - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_POSIX_MEMALIGN: "; -#ifdef KOKKOS_ENABLE_POSIX_MEMALIGN - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif + print_helper(msg, Kokkos::Impl::metadata_map["memory"]); msg << "Options:" << std::endl; - msg << " KOKKOS_ENABLE_ASM: "; -#ifdef KOKKOS_ENABLE_ASM - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_CXX14: "; -#ifdef KOKKOS_ENABLE_CXX14 - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_CXX17: "; -#ifdef KOKKOS_ENABLE_CXX17 - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_CXX20: "; -#ifdef KOKKOS_ENABLE_CXX20 - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK: "; -#ifdef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_HWLOC: "; -#ifdef KOKKOS_ENABLE_HWLOC - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_LIBRT: "; -#ifdef KOKKOS_ENABLE_LIBRT - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_MPI: "; -#ifdef KOKKOS_ENABLE_MPI - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif + print_helper(msg, Kokkos::Impl::metadata_map["options"]); Impl::ExecSpaceManager::get_instance().print_configuration(msg, detail); diff --git a/packages/kokkos/core/src/impl/Kokkos_Error.cpp b/packages/kokkos/core/src/impl/Kokkos_Error.cpp index 6362487ed7eab223c8f2dff445d79c7eaada5646..dfb9f3a51cdbd9aa7e189e21f5956806d53823b5 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Error.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Error.cpp @@ -132,8 +132,11 @@ void Experimental::RawMemoryAllocationFailure::print_error_message( case AllocationMechanism::CudaHostAlloc: o << "cudaHostAlloc()."; break; case AllocationMechanism::HIPMalloc: o << "hipMalloc()."; break; case AllocationMechanism::HIPHostMalloc: o << "hipHostMalloc()."; break; - case AllocationMechanism::SYCLMalloc: - o << "cl::sycl::malloc_device()."; + case AllocationMechanism::SYCLMallocDevice: + o << "sycl::malloc_device()."; + break; + case AllocationMechanism::SYCLMallocShared: + o << "sycl::malloc_shared()."; break; } append_additional_error_information(o); diff --git a/packages/kokkos/core/src/impl/Kokkos_Error.hpp b/packages/kokkos/core/src/impl/Kokkos_Error.hpp index ab966a4d4a8bc8806ea0fb1c4c0f75b9a2143d2b..5db459734631ddff5d0a29963a9ec04b9ec549ea 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Error.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Error.hpp @@ -54,6 +54,9 @@ #ifdef KOKKOS_ENABLE_HIP #include <HIP/Kokkos_HIP_Abort.hpp> #endif +#ifdef KOKKOS_ENABLE_SYCL +#include <SYCL/Kokkos_SYCL_Abort.hpp> +#endif #ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE #define KOKKOS_ABORT_MESSAGE_BUFFER_SIZE 2048 @@ -93,7 +96,8 @@ class RawMemoryAllocationFailure : public std::bad_alloc { CudaHostAlloc, HIPMalloc, HIPHostMalloc, - SYCLMalloc + SYCLMallocDevice, + SYCLMallocShared }; private: @@ -180,7 +184,10 @@ class RawMemoryAllocationFailure : public std::bad_alloc { #elif defined(KOKKOS_ENABLE_HIP) && defined(__HIP_DEVICE_COMPILE__) // HIP aborts #define KOKKOS_IMPL_ABORT_NORETURN [[noreturn]] -#elif !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(__SYCL_DEVICE_ONLY__) +#elif defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__) +// FIXME_SYCL SYCL doesn't abort +#define KOKKOS_IMPL_ABORT_NORETURN +#elif !defined(KOKKOS_ENABLE_OPENMPTARGET) // Host aborts #define KOKKOS_IMPL_ABORT_NORETURN [[noreturn]] #else @@ -195,10 +202,12 @@ KOKKOS_IMPL_ABORT_NORETURN KOKKOS_INLINE_FUNCTION void abort( Kokkos::Impl::cuda_abort(message); #elif defined(KOKKOS_ENABLE_HIP) && defined(__HIP_DEVICE_COMPILE__) Kokkos::Impl::hip_abort(message); -#elif !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(__SYCL_DEVICE_ONLY__) +#elif defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + Kokkos::Impl::sycl_abort(message); +#elif !defined(KOKKOS_ENABLE_OPENMPTARGET) Kokkos::Impl::host_abort(message); #else - (void)message; // FIXME_OPENMPTARGET, FIXME_SYCL + (void)message; // FIXME_OPENMPTARGET #endif } diff --git a/packages/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp b/packages/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp index 2651229a706038fe3b8cfe8033bd4d521675003e..3068ef3db0389d48149d2d9ce28efac3112f1c27 100644 --- a/packages/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp @@ -104,7 +104,7 @@ class FixedBlockSizeMemoryPool m_first_block = (Block*)block_record->data(); auto idx_record = - record_type::allocate(mem_space, "FixedBlockSizeMemPool_blocks", + record_type::allocate(mem_space, "Kokkos::FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type)); KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0); m_tracker.assign_allocated_record_to_uninitialized(idx_record); diff --git a/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp index 3bcb60f8561d15431b2079824d6d4f4ae5d86df9..22e88ebc4fc57d4e7132bca0be2aa55f5bfc5f69 100644 --- a/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp @@ -84,16 +84,6 @@ struct ReduceFunctorHasInit< typename std::enable_if<0 < sizeof(&FunctorType::init)>::type> { enum : bool { value = true }; }; -// FIXME_SYCL not all compilers distinguish between the FunctorType::init and -// the FunctorType::template init<> specialization -#ifdef KOKKOS_ENABLE_SYCL -template <class FunctorType> -struct ReduceFunctorHasInit< - FunctorType, - typename std::enable_if<0 < sizeof(&FunctorType::template init<>)>::type> { - enum : bool { value = true }; -}; -#endif #endif template <class FunctorType, class Enable = void> @@ -117,16 +107,6 @@ struct ReduceFunctorHasJoin< typename std::enable_if<0 < sizeof(&FunctorType::join)>::type> { enum : bool { value = true }; }; -// FIXME_SYCL not all compilers distinguish between the FunctorType::join and -// the FunctorType::template join<> specialization -#ifdef KOKKOS_ENABLE_SYCL -template <class FunctorType> -struct ReduceFunctorHasJoin< - FunctorType, - typename std::enable_if<0 < sizeof(&FunctorType::template join<>)>::type> { - enum : bool { value = true }; -}; -#endif #endif template <class FunctorType, class Enable = void> @@ -150,16 +130,6 @@ struct ReduceFunctorHasFinal< typename std::enable_if<0 < sizeof(&FunctorType::final)>::type> { enum : bool { value = true }; }; -// FIXME_SYCL not all compilers distinguish between the FunctorType::final and -// the FunctorType::template final<> specialization -#ifdef KOKKOS_ENABLE_SYCL -template <class FunctorType> -struct ReduceFunctorHasFinal< - FunctorType, - typename std::enable_if<0 < sizeof(&FunctorType::template final<>)>::type> { - enum : bool { value = true }; -}; -#endif #endif template <class FunctorType, class Enable = void> @@ -183,27 +153,14 @@ struct ReduceFunctorHasShmemSize< typename std::enable_if<0 < sizeof(&FunctorType::team_shmem_size)>::type> { enum : bool { value = true }; }; -// FIXME_SYCL not all compilers distinguish between the -// FunctorType::team_shmem_size and the FunctorType::template team_shmem_size<> -// specialization -#ifdef KOKKOS_ENABLE_SYCL -template <class FunctorType> -struct ReduceFunctorHasShmemSize< - FunctorType, - typename std::enable_if< - 0 < sizeof(&FunctorType::template team_shmem_size<>)>::type> { - enum : bool { value = true }; -}; -#endif #endif template <class FunctorType, class ArgTag, class Enable = void> struct FunctorDeclaresValueType : public std::false_type {}; template <class FunctorType, class ArgTag> -struct FunctorDeclaresValueType< - FunctorType, ArgTag, - typename Impl::enable_if_type<typename FunctorType::value_type>::type> +struct FunctorDeclaresValueType<FunctorType, ArgTag, + void_t<typename FunctorType::value_type>> : public std::true_type {}; template <class FunctorType, @@ -290,8 +247,7 @@ struct FunctorValueTraits<FunctorType, ArgTag, // The reference_type for an array is 'value_type *' // The reference_type for a single value is 'value_type &' - using reference_type = - typename Impl::if_c<IsArray, value_type*, value_type&>::type; + using reference_type = std::conditional_t<IsArray, value_type*, value_type&>; // Number of values if single value template <class F> @@ -329,8 +285,8 @@ struct FunctorValueTraits<FunctorType, ArgTag, struct REJECTTAG { }; // Reject tagged operator() when using non-tagged execution policy. - using tag_type = typename Impl::if_c<std::is_same<ArgTag, void>::value, - VOIDTAG, ArgTag>::type; + using tag_type = + std::conditional_t<std::is_same<ArgTag, void>::value, VOIDTAG, ArgTag>; //---------------------------------------- // parallel_for operator without a tag: @@ -1371,12 +1327,11 @@ struct FunctorValueTraits<FunctorType, ArgTag, enum { IS_REJECT = std::is_same<REJECTTAG, ValueType>::value }; public: - using value_type = - typename Impl::if_c<IS_VOID || IS_REJECT, void, ValueType>::type; + using value_type = std::conditional_t<IS_VOID || IS_REJECT, void, ValueType>; using pointer_type = - typename Impl::if_c<IS_VOID || IS_REJECT, void, ValueType*>::type; + std::conditional_t<IS_VOID || IS_REJECT, void, ValueType*>; using reference_type = - typename Impl::if_c<IS_VOID || IS_REJECT, void, ValueType&>::type; + std::conditional_t<IS_VOID || IS_REJECT, void, ValueType&>; using functor_type = FunctorType; static_assert( @@ -2080,43 +2035,71 @@ struct FunctorFinal { KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType&, void*) {} }; -/* 'final' function provided */ +/* 'final' function provided for single value but no tag*/ template <class FunctorType, class ArgTag, class T> -struct FunctorFinal<FunctorType, ArgTag, - T& - // First substitution failure when FunctorType::final does - // not exist. Second substitution failure when enable_if( & - // Functor::final ) does not exist - , - decltype( - FunctorFinalFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::final))> { +struct FunctorFinal< + FunctorType, ArgTag, + T& + // First substitution failure when FunctorType::final does not exist. + // Second substitution failure when FunctorType::final is not compatible. + , + typename std::enable_if< + std::is_same<ArgTag, void>::value, + decltype(FunctorFinalFunction<FunctorType, ArgTag>::enable_if( + &FunctorType::final))>::type> { KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType& f, void* p) { f.final(*((T*)p)); } +}; - KOKKOS_FORCEINLINE_FUNCTION static void final(FunctorType& f, void* p) { - f.final(*((T*)p)); +/* 'final' function provided for array value but no tag*/ +template <class FunctorType, class ArgTag, class T> +struct FunctorFinal< + FunctorType, ArgTag, + T* + // First substitution failure when FunctorType::final does not exist. + // Second substitution failure when FunctorType::final is not compatible. + , + typename std::enable_if< + std::is_same<ArgTag, void>::value, + decltype(FunctorFinalFunction<FunctorType, ArgTag>::enable_if( + &FunctorType::final))>::type> { + KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType& f, void* p) { + f.final((T*)p); } }; -/* 'final' function provided for array value */ +/* 'final' function provided for single value and with tag */ template <class FunctorType, class ArgTag, class T> -struct FunctorFinal<FunctorType, ArgTag, - T* - // First substitution failure when FunctorType::final does - // not exist. Second substitution failure when enable_if( & - // Functor::final ) does not exist - , - decltype( - FunctorFinalFunction<FunctorType, ArgTag>::enable_if( - &FunctorType::final))> { +struct FunctorFinal< + FunctorType, ArgTag, + T& + // First substitution failure when FunctorType::final does not exist. + // Second substitution failure when FunctorType::final is not compatible. + , + typename std::enable_if< + !std::is_same<ArgTag, void>::value, + decltype(FunctorFinalFunction<FunctorType, ArgTag>::enable_if( + &FunctorType::final))>::type> { KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType& f, void* p) { - f.final((T*)p); + f.final(ArgTag(), *((T*)p)); } +}; - KOKKOS_FORCEINLINE_FUNCTION static void final(FunctorType& f, void* p) { - f.final((T*)p); +/* 'final' function provided for array value and with tag */ +template <class FunctorType, class ArgTag, class T> +struct FunctorFinal< + FunctorType, ArgTag, + T* + // First substitution failure when FunctorType::final does not exist. + // Second substitution failure when FunctorType::final is not compatible. + , + typename std::enable_if< + !std::is_same<ArgTag, void>::value, + decltype(FunctorFinalFunction<FunctorType, ArgTag>::enable_if( + &FunctorType::final))>::type> { + KOKKOS_FORCEINLINE_FUNCTION static void final(const FunctorType& f, void* p) { + f.final(ArgTag(), (T*)p); } }; diff --git a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index aeebcb64ee42b14f40465f611ec1982917904084..5c0eaa0a1ef80fa02e2f745f1d7e53d6fc45b8d3 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -250,10 +250,10 @@ SharedAllocationRecord<Kokkos::Experimental::HBWSpace, void>:: static_cast<SharedAllocationRecord<void, void> *>(this); strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); + SharedAllocationHeader::maximum_label_length - 1); // Set last element zero, in case c_str is too long RecordBase::m_alloc_ptr - ->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; + ->m_label[SharedAllocationHeader::maximum_label_length - 1] = '\0'; } //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp b/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp index 55d70985dcd1f921d4082b507cf84d1044ad8fbb..79ee7e80db3115f1c9c14366e2c237c042ab0bdb 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostBarrier.cpp @@ -49,10 +49,8 @@ #include <impl/Kokkos_HostBarrier.hpp> -#if !defined(_WIN32) -#include <sched.h> -#include <time.h> -#else +#include <thread> +#if defined(_WIN32) #include <process.h> #include <winsock2.h> #include <windows.h> @@ -63,18 +61,15 @@ namespace Impl { void HostBarrier::impl_backoff_wait_until_equal( int* ptr, const int v, const bool active_wait) noexcept { -#if !defined(_WIN32) - timespec req; - req.tv_sec = 0; unsigned count = 0u; while (!test_equal(ptr, v)) { const int c = ::Kokkos::log2(++count); if (!active_wait || c > log2_iterations_till_sleep) { - req.tv_nsec = c < 16 ? 256 * c : 4096; - nanosleep(&req, nullptr); + std::this_thread::sleep_for( + std::chrono::nanoseconds(c < 16 ? 256 * c : 4096)); } else if (c > log2_iterations_till_yield) { - sched_yield(); + std::this_thread::yield(); } #if defined(KOKKOS_ENABLE_ASM) #if defined(__PPC64__) @@ -91,18 +86,6 @@ void HostBarrier::impl_backoff_wait_until_equal( #endif #endif } -#else // _WIN32 - while (!test_equal(ptr, v)) { -#if defined(KOKKOS_ENABLE_ASM) - for (int j = 0; j < num_nops; ++j) { - __asm__ __volatile__("nop\n"); - } - __asm__ __volatile__("pause\n" ::: "memory"); -#endif - } -#endif - // printf("W: %d\n", count); } - } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSharedPtr.hpp b/packages/kokkos/core/src/impl/Kokkos_HostSharedPtr.hpp new file mode 100644 index 0000000000000000000000000000000000000000..97286dd07f4ea2ee94f3070768f425e2ef5b7896 --- /dev/null +++ b/packages/kokkos/core/src/impl/Kokkos_HostSharedPtr.hpp @@ -0,0 +1,178 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_HOST_SHARED_PTR_HPP +#define KOKKOS_IMPL_HOST_SHARED_PTR_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_Atomic.hpp> + +#include <functional> + +namespace Kokkos { +namespace Impl { + +template <typename T> +class HostSharedPtr { + public: + using element_type = T; + + KOKKOS_DEFAULTED_FUNCTION constexpr HostSharedPtr() = default; + KOKKOS_FUNCTION constexpr HostSharedPtr(std::nullptr_t) {} + + explicit HostSharedPtr(T* element_ptr) + : HostSharedPtr(element_ptr, [](T* const t) { delete t; }) {} + + template <class Deleter> + HostSharedPtr(T* element_ptr, const Deleter& deleter) + : m_element_ptr(element_ptr) { +#ifdef KOKKOS_ENABLE_CXX17 + static_assert(std::is_invocable_v<Deleter, T*> && + std::is_copy_constructible_v<Deleter>); +#endif + if (element_ptr) { + try { + m_control = new Control{deleter, 1}; + } catch (...) { + deleter(element_ptr); + throw; + } + } + } + + KOKKOS_FUNCTION HostSharedPtr(HostSharedPtr&& other) noexcept + : m_element_ptr(other.m_element_ptr), m_control(other.m_control) { + other.m_element_ptr = nullptr; + other.m_control = nullptr; + } + + KOKKOS_FUNCTION HostSharedPtr(const HostSharedPtr& other) noexcept + : m_element_ptr(other.m_element_ptr), m_control(other.m_control) { + // FIXME_OPENMPTARGET requires something like KOKKOS_IMPL_IF_ON_HOST +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + if (m_control) Kokkos::atomic_add(&(m_control->m_counter), 1); +#endif + } + + KOKKOS_FUNCTION HostSharedPtr& operator=(HostSharedPtr&& other) noexcept { + if (&other != this) { + cleanup(); + m_element_ptr = other.m_element_ptr; + other.m_element_ptr = nullptr; + m_control = other.m_control; + other.m_control = nullptr; + } + return *this; + } + + KOKKOS_FUNCTION HostSharedPtr& operator=( + const HostSharedPtr& other) noexcept { + if (&other != this) { + cleanup(); + m_element_ptr = other.m_element_ptr; + m_control = other.m_control; + // FIXME_OPENMPTARGET +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + if (m_control) Kokkos::atomic_add(&(m_control->m_counter), 1); +#endif + } + return *this; + } + + KOKKOS_FUNCTION ~HostSharedPtr() { cleanup(); } + + // returns the stored pointer + KOKKOS_FUNCTION T* get() const noexcept { return m_element_ptr; } + // dereferences the stored pointer + KOKKOS_FUNCTION T& operator*() const noexcept { + KOKKOS_EXPECTS(bool(*this)); + return *get(); + } + // dereferences the stored pointer + KOKKOS_FUNCTION T* operator->() const noexcept { + KOKKOS_EXPECTS(bool(*this)); + return get(); + } + + // checks if the stored pointer is not null + KOKKOS_FUNCTION explicit operator bool() const noexcept { + return get() != nullptr; + } + + // returns the number of HostSharedPtr instances managing the curent object or + // 0 if there is no managed object. + int use_count() const noexcept { + return m_control ? m_control->m_counter : 0; + } + + private: + KOKKOS_FUNCTION void cleanup() noexcept { + // FIXME_OPENMPTARGET +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + // If m_counter is set, then this instance is responsible for managing the + // object pointed to by m_counter and m_element_ptr. + if (m_control) { + int const count = Kokkos::atomic_fetch_sub(&(m_control->m_counter), 1); + if (count == 1) { + (m_control->m_deleter)(m_element_ptr); + m_element_ptr = nullptr; + delete m_control; + m_control = nullptr; + } + } +#endif + } + + struct Control { + std::function<void(T*)> m_deleter; + int m_counter; + }; + + T* m_element_ptr = nullptr; + Control* m_control = nullptr; +}; +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp index 130e5cce13a3cd04e1bda20339d56f9b57764c3e..ed46d170e53ebb58e118c8d020073ed12d3c1064 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -42,9 +42,8 @@ //@HEADER */ -#include <cstdio> -#include <algorithm> #include <Kokkos_Macros.hpp> + #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_MemorySpace.hpp> #include <impl/Kokkos_Tools.hpp> @@ -352,18 +351,12 @@ SharedAllocationRecord<void, void> SharedAllocationRecord<Kokkos::HostSpace, void>::s_root_record; #endif -void SharedAllocationRecord<Kokkos::HostSpace, void>::deallocate( - SharedAllocationRecord<void, void> *arg_rec) { - delete static_cast<SharedAllocationRecord *>(arg_rec); -} - SharedAllocationRecord<Kokkos::HostSpace, void>::~SharedAllocationRecord() #if defined( \ KOKKOS_IMPL_INTEL_WORKAROUND_NOEXCEPT_SPECIFICATION_VIRTUAL_FUNCTION) noexcept #endif { - m_space.deallocate(RecordBase::m_alloc_ptr->m_label, SharedAllocationRecord<void, void>::m_alloc_ptr, SharedAllocationRecord<void, void>::m_alloc_size, @@ -399,7 +392,7 @@ SharedAllocationRecord<Kokkos::HostSpace, void>::SharedAllocationRecord( const SharedAllocationRecord<void, void>::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord<void, void>( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord<Kokkos::HostSpace, void>::s_root_record, #endif @@ -407,91 +400,10 @@ SharedAllocationRecord<Kokkos::HostSpace, void>::SharedAllocationRecord( arg_alloc_size), sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc), m_space(arg_space) { - // Fill in the Header information - RecordBase::m_alloc_ptr->m_record = - static_cast<SharedAllocationRecord<void, void> *>(this); - - strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); - // Set last element zero, in case c_str is too long - RecordBase::m_alloc_ptr - ->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; -} - -//---------------------------------------------------------------------------- - -void *SharedAllocationRecord<Kokkos::HostSpace, void>::allocate_tracked( - const Kokkos::HostSpace &arg_space, const std::string &arg_alloc_label, - const size_t arg_alloc_size) { - if (!arg_alloc_size) return nullptr; - - SharedAllocationRecord *const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - - RecordBase::increment(r); - - return r->data(); -} - -void SharedAllocationRecord<Kokkos::HostSpace, void>::deallocate_tracked( - void *const arg_alloc_ptr) { - if (arg_alloc_ptr != nullptr) { - SharedAllocationRecord *const r = get_record(arg_alloc_ptr); - - RecordBase::decrement(r); - } -} - -void *SharedAllocationRecord<Kokkos::HostSpace, void>::reallocate_tracked( - void *const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord *const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - Kokkos::Impl::DeepCopy<HostSpace, HostSpace>( - r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); + this->base_t::_fill_host_accessible_header_info(*RecordBase::m_alloc_ptr, + arg_label); } -SharedAllocationRecord<Kokkos::HostSpace, void> * -SharedAllocationRecord<Kokkos::HostSpace, void>::get_record(void *alloc_ptr) { - using Header = SharedAllocationHeader; - using RecordHost = SharedAllocationRecord<Kokkos::HostSpace, void>; - - SharedAllocationHeader const *const head = - alloc_ptr ? Header::get_header(alloc_ptr) : nullptr; - RecordHost *const record = - head ? static_cast<RecordHost *>(head->m_record) : nullptr; - - if (!alloc_ptr || record->m_alloc_ptr != head) { - Kokkos::Impl::throw_runtime_exception( - std::string("Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , " - "void >::get_record ERROR")); - } - - return record; -} - -// Iterate records to print orphaned memory ... -#ifdef KOKKOS_ENABLE_DEBUG -void SharedAllocationRecord<Kokkos::HostSpace, void>::print_records( - std::ostream &s, const Kokkos::HostSpace &, bool detail) { - SharedAllocationRecord<void, void>::print_host_accessible_records( - s, "HostSpace", &s_root_record, detail); -} -#else -void SharedAllocationRecord<Kokkos::HostSpace, void>::print_records( - std::ostream &, const Kokkos::HostSpace &, bool) { - throw_runtime_exception( - "SharedAllocationRecord<HostSpace>::print_records only works with " - "KOKKOS_ENABLE_DEBUG enabled"); -} -#endif - } // namespace Impl } // namespace Kokkos @@ -568,3 +480,22 @@ void unlock_address_host_space(void *ptr) { } // namespace Impl } // namespace Kokkos + +//============================================================================== +// <editor-fold desc="Explicit instantiations of CRTP Base classes"> {{{1 + +#include <impl/Kokkos_SharedAlloc_timpl.hpp> + +namespace Kokkos { +namespace Impl { + +// To avoid additional compilation cost for something that's (mostly?) not +// performance sensitive, we explicity instantiate these CRTP base classes here, +// where we have access to the associated *_timpl.hpp header files. +template class SharedAllocationRecordCommon<Kokkos::HostSpace>; + +} // end namespace Impl +} // end namespace Kokkos + +// </editor-fold> end Explicit instantiations of CRTP Base classes }}}1 +//============================================================================== diff --git a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index 2480967ebd18b477c5193857e73040f707b7b435..d4cae7f122ed182cf88522d5d60729a0906cce5b 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -813,14 +813,16 @@ ThreadVectorRange( return Impl::ThreadVectorRangeBoundariesStruct<iType, Member>(member, count); } -template <typename iType, typename Member> -KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct<iType, Member> +template <typename iType1, typename iType2, typename Member> +KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< + typename std::common_type<iType1, iType2>::type, Member> ThreadVectorRange( - Member const& member, iType arg_begin, iType arg_end, + Member const& member, iType1 arg_begin, iType2 arg_end, typename std::enable_if< Impl::is_thread_team_member<Member>::value>::type const** = nullptr) { + using iType = typename std::common_type<iType1, iType2>::type; return Impl::ThreadVectorRangeBoundariesStruct<iType, Member>( - member, arg_begin, arg_end); + member, iType(arg_begin), iType(arg_end)); } //---------------------------------------------------------------------------- @@ -1010,6 +1012,25 @@ parallel_scan(Impl::ThreadVectorRangeBoundariesStruct<iType, Member> const& } } +template <typename iType, class Lambda, typename ReducerType, typename Member> +KOKKOS_INLINE_FUNCTION typename std::enable_if< + Kokkos::is_reducer<ReducerType>::value && + Impl::is_host_thread_team_member<Member>::value>::type +parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType, Member>& + loop_boundaries, + const Lambda& lambda, const ReducerType& reducer) { + typename ReducerType::value_type scan_val; + reducer.init(scan_val); + +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, true); + } +} + //---------------------------------------------------------------------------- template <class Member> diff --git a/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp index 262aa9e3ea1d6e3f32993f0ebc7b101f982633b7..76d553601923fd7282132fbff05ce69a4e576e97 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp @@ -57,6 +57,9 @@ void memory_fence() { #pragma omp flush #elif defined(__HIP_DEVICE_COMPILE__) __threadfence(); +#elif defined(KOKKOS_ENABLE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + sycl::ONEAPI::atomic_fence(sycl::ONEAPI::memory_order::acq_rel, + sycl::ONEAPI::memory_scope::device); #elif defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) asm volatile("mfence" ::: "memory"); #elif defined(KOKKOS_ENABLE_GNU_ATOMICS) || \ diff --git a/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp b/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e53afe436daff997726be8cb0c880887c32de1a4 --- /dev/null +++ b/packages/kokkos/core/src/impl/Kokkos_NumericTraits.cpp @@ -0,0 +1,73 @@ +#include <Kokkos_NumericTraits.hpp> + +// NOTE These out-of class definitions are only required with C++14. Since +// C++17, a static data member declared constrexpr is impllictly inline. + +#if !defined(KOKKOS_ENABLE_CXX17) +namespace Kokkos { +namespace Experimental { +namespace Impl { +#define OUT_OF_CLASS_DEFINTION_FLOATING_POINT(TRAIT) \ + constexpr float TRAIT##_helper<float>::value; \ + constexpr double TRAIT##_helper<double>::value; \ + constexpr long double TRAIT##_helper<long double>::value + +#define OUT_OF_CLASS_DEFINTION_INTEGRAL(TRAIT) \ + constexpr bool TRAIT##_helper<bool>::value; \ + constexpr char TRAIT##_helper<char>::value; \ + constexpr signed char TRAIT##_helper<signed char>::value; \ + constexpr unsigned char TRAIT##_helper<unsigned char>::value; \ + constexpr short TRAIT##_helper<short>::value; \ + constexpr unsigned short TRAIT##_helper<unsigned short>::value; \ + constexpr int TRAIT##_helper<int>::value; \ + constexpr unsigned int TRAIT##_helper<unsigned int>::value; \ + constexpr long int TRAIT##_helper<long int>::value; \ + constexpr unsigned long int TRAIT##_helper<unsigned long int>::value; \ + constexpr long long int TRAIT##_helper<long long int>::value; \ + constexpr unsigned long long int TRAIT##_helper<unsigned long long int>::value + +#define OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(TRAIT) \ + constexpr int TRAIT##_helper<float>::value; \ + constexpr int TRAIT##_helper<double>::value; \ + constexpr int TRAIT##_helper<long double>::value + +#define OUT_OF_CLASS_DEFINTION_INTEGRAL_2(TRAIT) \ + constexpr int TRAIT##_helper<bool>::value; \ + constexpr int TRAIT##_helper<char>::value; \ + constexpr int TRAIT##_helper<signed char>::value; \ + constexpr int TRAIT##_helper<unsigned char>::value; \ + constexpr int TRAIT##_helper<short>::value; \ + constexpr int TRAIT##_helper<unsigned short>::value; \ + constexpr int TRAIT##_helper<int>::value; \ + constexpr int TRAIT##_helper<unsigned int>::value; \ + constexpr int TRAIT##_helper<long int>::value; \ + constexpr int TRAIT##_helper<unsigned long int>::value; \ + constexpr int TRAIT##_helper<long long int>::value; \ + constexpr int TRAIT##_helper<unsigned long long int>::value + +OUT_OF_CLASS_DEFINTION_FLOATING_POINT(infinity); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT(epsilon); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT(round_error); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT(norm_min); + +OUT_OF_CLASS_DEFINTION_INTEGRAL(finite_min); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT(finite_min); +OUT_OF_CLASS_DEFINTION_INTEGRAL(finite_max); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT(finite_max); + +OUT_OF_CLASS_DEFINTION_INTEGRAL_2(digits); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(digits); +OUT_OF_CLASS_DEFINTION_INTEGRAL_2(digits10); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(digits10); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(max_digits10); +OUT_OF_CLASS_DEFINTION_INTEGRAL_2(radix); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(radix); + +OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(min_exponent); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(min_exponent10); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(max_exponent); +OUT_OF_CLASS_DEFINTION_FLOATING_POINT_2(max_exponent10); +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp b/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp index 4b7e02bbb2a0d96e5e1a852feccfefd2bf603274..94ea6e1a2b10c33a81e4f2c6b7a932577ce6144b 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling.cpp @@ -49,19 +49,30 @@ #include <dlfcn.h> #endif +#include <algorithm> +#include <array> #include <cstring> +#include <iostream> +#include <stack> #include <unordered_map> #include <unordered_set> -#include <algorithm> #include <vector> -#include <array> -#include <stack> -#include <iostream> namespace Kokkos { namespace Tools { namespace Experimental { + +namespace Impl { +void tool_invoked_fence(const uint32_t /* devID */) { + /** + * Currently the function ignores the device ID, + * Eventually we want to support fencing only + * a given stream/resource + */ + Kokkos::fence(); +} +} // namespace Impl #ifdef KOKKOS_ENABLE_TUNING static size_t kernel_name_context_variable_id; static size_t kernel_type_context_variable_id; @@ -74,9 +85,10 @@ static std::unordered_map<size_t, VariableInfo> variable_metadata; static EventSet current_callbacks; static EventSet backup_callbacks; static EventSet no_profiling; - +static Kokkos::Tools::Experimental::ToolSettings tool_requirements; bool eventSetsEqual(const EventSet& l, const EventSet& r) { return l.init == r.init && l.finalize == r.finalize && + l.parse_args == r.parse_args && l.print_help == r.print_help && l.begin_parallel_for == r.begin_parallel_for && l.end_parallel_for == r.end_parallel_for && l.begin_parallel_reduce == r.begin_parallel_reduce && @@ -95,6 +107,10 @@ bool eventSetsEqual(const EventSet& l, const EventSet& r) { l.end_deep_copy == r.end_deep_copy && l.begin_fence == r.begin_fence && l.end_fence == r.end_fence && l.sync_dual_view == r.sync_dual_view && l.modify_dual_view == r.modify_dual_view && + l.declare_metadata == r.declare_metadata && + l.request_tool_settings == r.request_tool_settings && + l.provide_tool_programming_interface == + r.provide_tool_programming_interface && l.declare_input_type == r.declare_input_type && l.declare_output_type == r.declare_output_type && l.end_tuning_context == r.end_tuning_context && @@ -102,6 +118,24 @@ bool eventSetsEqual(const EventSet& l, const EventSet& r) { l.request_output_values == r.request_output_values && l.declare_optimization_goal == r.declare_optimization_goal; } +enum class MayRequireGlobalFencing : bool { No, Yes }; +template <typename Callback, typename... Args> +inline void invoke_kokkosp_callback( + MayRequireGlobalFencing may_require_global_fencing, + const Callback& callback, Args&&... args) { + if (callback != nullptr) { + // two clause if statement + // may_require_global_fencing: "if this callback ever needs a fence", AND + // if the tool requires global fencing (default true, but tools can + // overwrite) + if (may_require_global_fencing == MayRequireGlobalFencing::Yes && + (Kokkos::Tools::Experimental::tool_requirements + .requires_global_fencing)) { + Kokkos::fence(); + } + (*callback)(std::forward<Args>(args)...); + } +} } // namespace Experimental bool profileLibraryLoaded() { return !Experimental::eventSetsEqual(Experimental::current_callbacks, @@ -110,11 +144,10 @@ bool profileLibraryLoaded() { void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if (Experimental::current_callbacks.begin_parallel_for != nullptr) { - Kokkos::fence(); - (*Experimental::current_callbacks.begin_parallel_for)(kernelPrefix.c_str(), - devID, kernelID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::Yes, + Experimental::current_callbacks.begin_parallel_for, kernelPrefix.c_str(), + devID, kernelID); #ifdef KOKKOS_ENABLE_TUNING if (Kokkos::tune_internals()) { auto context_id = Experimental::get_new_context_id(); @@ -130,10 +163,9 @@ void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, } void endParallelFor(const uint64_t kernelID) { - if (Experimental::current_callbacks.end_parallel_for != nullptr) { - Kokkos::fence(); - (*Experimental::current_callbacks.end_parallel_for)(kernelID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::Yes, + Experimental::current_callbacks.end_parallel_for, kernelID); #ifdef KOKKOS_ENABLE_TUNING if (Kokkos::tune_internals()) { Experimental::end_context(Experimental::get_current_context_id()); @@ -143,11 +175,10 @@ void endParallelFor(const uint64_t kernelID) { void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if (Experimental::current_callbacks.begin_parallel_scan != nullptr) { - Kokkos::fence(); - (*Experimental::current_callbacks.begin_parallel_scan)(kernelPrefix.c_str(), - devID, kernelID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::Yes, + Experimental::current_callbacks.begin_parallel_scan, kernelPrefix.c_str(), + devID, kernelID); #ifdef KOKKOS_ENABLE_TUNING if (Kokkos::tune_internals()) { auto context_id = Experimental::get_new_context_id(); @@ -163,10 +194,9 @@ void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, } void endParallelScan(const uint64_t kernelID) { - if (Experimental::current_callbacks.end_parallel_scan != nullptr) { - Kokkos::fence(); - (*Experimental::current_callbacks.end_parallel_scan)(kernelID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::Yes, + Experimental::current_callbacks.end_parallel_scan, kernelID); #ifdef KOKKOS_ENABLE_TUNING if (Kokkos::tune_internals()) { Experimental::end_context(Experimental::get_current_context_id()); @@ -176,11 +206,10 @@ void endParallelScan(const uint64_t kernelID) { void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if (Experimental::current_callbacks.begin_parallel_reduce != nullptr) { - Kokkos::fence(); - (*Experimental::current_callbacks.begin_parallel_reduce)( - kernelPrefix.c_str(), devID, kernelID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::Yes, + Experimental::current_callbacks.begin_parallel_reduce, + kernelPrefix.c_str(), devID, kernelID); #ifdef KOKKOS_ENABLE_TUNING if (Kokkos::tune_internals()) { auto context_id = Experimental::get_new_context_id(); @@ -196,10 +225,9 @@ void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, } void endParallelReduce(const uint64_t kernelID) { - if (Experimental::current_callbacks.end_parallel_reduce != nullptr) { - Kokkos::fence(); - (*Experimental::current_callbacks.end_parallel_reduce)(kernelID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::Yes, + Experimental::current_callbacks.end_parallel_reduce, kernelID); #ifdef KOKKOS_ENABLE_TUNING if (Kokkos::tune_internals()) { Experimental::end_context(Experimental::get_current_context_id()); @@ -208,44 +236,43 @@ void endParallelReduce(const uint64_t kernelID) { } void pushRegion(const std::string& kName) { - if (Experimental::current_callbacks.push_region != nullptr) { - Kokkos::fence(); - (*Experimental::current_callbacks.push_region)(kName.c_str()); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::Yes, + Experimental::current_callbacks.push_region, kName.c_str()); } void popRegion() { - if (Experimental::current_callbacks.pop_region != nullptr) { - Kokkos::fence(); - (*Experimental::current_callbacks.pop_region)(); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::Yes, + Experimental::current_callbacks.pop_region); } void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { - if (Experimental::current_callbacks.allocate_data != nullptr) { - (*Experimental::current_callbacks.allocate_data)(space, label.c_str(), ptr, - size); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.allocate_data, space, label.c_str(), ptr, + size); } void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { - if (Experimental::current_callbacks.deallocate_data != nullptr) { - (*Experimental::current_callbacks.deallocate_data)(space, label.c_str(), - ptr, size); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.deallocate_data, space, label.c_str(), + ptr, size); } void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, const void* dst_ptr, const SpaceHandle src_space, const std::string src_label, const void* src_ptr, const uint64_t size) { - if (Experimental::current_callbacks.begin_deep_copy != nullptr) { - (*Experimental::current_callbacks.begin_deep_copy)( - dst_space, dst_label.c_str(), dst_ptr, src_space, src_label.c_str(), - src_ptr, size); + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.begin_deep_copy, dst_space, + dst_label.c_str(), dst_ptr, src_space, src_label.c_str(), src_ptr, size); #ifdef KOKKOS_ENABLE_TUNING + if (Experimental::current_callbacks.begin_deep_copy != nullptr) { if (Kokkos::tune_internals()) { auto context_id = Experimental::get_new_context_id(); Experimental::begin_context(context_id); @@ -257,64 +284,128 @@ void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, Experimental::kernel_type_context_variable_id, "deep_copy")}; Experimental::set_input_values(context_id, 2, contextValues); } -#endif } +#endif } void endDeepCopy() { - if (Experimental::current_callbacks.end_deep_copy != nullptr) { - (*Experimental::current_callbacks.end_deep_copy)(); + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.end_deep_copy); #ifdef KOKKOS_ENABLE_TUNING + if (Experimental::current_callbacks.end_deep_copy != nullptr) { if (Kokkos::tune_internals()) { Experimental::end_context(Experimental::get_current_context_id()); } -#endif } +#endif } void beginFence(const std::string name, const uint32_t deviceId, uint64_t* handle) { - if (Experimental::current_callbacks.begin_fence != nullptr) { - (*Experimental::current_callbacks.begin_fence)(name.c_str(), deviceId, - handle); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.begin_fence, name.c_str(), deviceId, + handle); } void endFence(const uint64_t handle) { - if (Experimental::current_callbacks.end_fence != nullptr) { - (*Experimental::current_callbacks.end_fence)(handle); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.end_fence, handle); } void createProfileSection(const std::string& sectionName, uint32_t* secID) { - if (Experimental::current_callbacks.create_profile_section != nullptr) { - (*Experimental::current_callbacks.create_profile_section)( - sectionName.c_str(), secID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.create_profile_section, + sectionName.c_str(), secID); } void startSection(const uint32_t secID) { - if (Experimental::current_callbacks.start_profile_section != nullptr) { - (*Experimental::current_callbacks.start_profile_section)(secID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.start_profile_section, secID); } void stopSection(const uint32_t secID) { - if (Experimental::current_callbacks.stop_profile_section != nullptr) { - (*Experimental::current_callbacks.stop_profile_section)(secID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.stop_profile_section, secID); } void destroyProfileSection(const uint32_t secID) { - if (Experimental::current_callbacks.destroy_profile_section != nullptr) { - (*Experimental::current_callbacks.destroy_profile_section)(secID); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.destroy_profile_section, secID); } void markEvent(const std::string& eventName) { - if (Experimental::current_callbacks.profile_event != nullptr) { - (*Experimental::current_callbacks.profile_event)(eventName.c_str()); + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.profile_event, eventName.c_str()); +} + +bool printHelp(const std::string& args) { + if (Experimental::current_callbacks.print_help == nullptr) { + return false; } + std::string arg0 = args.substr(0, args.find_first_of(' ')); + const char* carg0 = arg0.c_str(); + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.print_help, const_cast<char*>(carg0)); + return true; +} + +void parseArgs(int _argc, char** _argv) { + if (Experimental::current_callbacks.parse_args != nullptr && _argc > 0) { + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.parse_args, _argc, _argv); + } +} + +void parseArgs(const std::string& args) { + if (Experimental::current_callbacks.parse_args == nullptr) { + return; + } + using strvec_t = std::vector<std::string>; + auto tokenize = [](const std::string& line, const std::string& delimiters) { + strvec_t _result{}; + std::size_t _bidx = 0; // position that is the beginning of the new string + std::size_t _didx = 0; // position of the delimiter in the string + while (_bidx < line.length() && _didx < line.length()) { + // find the first character (starting at _didx) that is not a delimiter + _bidx = line.find_first_not_of(delimiters, _didx); + // if no more non-delimiter chars, done + if (_bidx == std::string::npos) break; + // starting at the position of the new string, find the next delimiter + _didx = line.find_first_of(delimiters, _bidx); + // starting at the position of the new string, get the characters + // between this position and the next delimiter + std::string _tmp = line.substr(_bidx, _didx - _bidx); + // don't add empty strings + if (!_tmp.empty()) _result.emplace_back(_tmp); + } + return _result; + }; + auto vargs = tokenize(args, " \t"); + if (vargs.size() == 0) return; + auto _argc = static_cast<int>(vargs.size()); + char** _argv = new char*[_argc + 1]; + _argv[vargs.size()] = nullptr; + for (int i = 0; i < _argc; ++i) { + auto& _str = vargs.at(i); + _argv[i] = new char[_str.length() + 1]; + std::memcpy(_argv[i], _str.c_str(), _str.length() * sizeof(char)); + _argv[i][_str.length()] = '\0'; + } + parseArgs(_argc, _argv); + for (int i = 0; i < _argc; ++i) { + delete[] _argv[i]; + } + delete[] _argv; } SpaceHandle make_space_handle(const char* space_name) { @@ -323,7 +414,19 @@ SpaceHandle make_space_handle(const char* space_name) { return handle; } -void initialize() { +template <typename Callback> +void lookup_function(void* dlopen_handle, const std::string& basename, + Callback& callback) { +#ifdef KOKKOS_ENABLE_LIBDL + // dlsym returns a pointer to an object, while we want to assign to + // pointer to function A direct cast will give warnings hence, we have to + // workaround the issue by casting pointer to pointers. + void* p = dlsym(dlopen_handle, basename.c_str()); + callback = *reinterpret_cast<Callback*>(&p); +#endif +} + +void initialize(const std::string& profileLibrary) { // Make sure initialize calls happens only once static int is_initialized = 0; if (is_initialized) return; @@ -332,13 +435,9 @@ void initialize() { #ifdef KOKKOS_ENABLE_LIBDL void* firstProfileLibrary = nullptr; - char* envProfileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); + if (profileLibrary.empty()) return; - // If we do not find a profiling library in the environment then exit - // early. - if (envProfileLibrary == nullptr) { - return; - } + char* envProfileLibrary = const_cast<char*>(profileLibrary.c_str()); char* envProfileCopy = (char*)malloc(sizeof(char) * (strlen(envProfileLibrary) + 1)); @@ -361,118 +460,139 @@ void initialize() { std::cout << "KokkosP: Library Loaded: " << profileLibraryName << std::endl; #endif - // dlsym returns a pointer to an object, while we want to assign to - // pointer to function A direct cast will give warnings hence, we have to - // workaround the issue by casting pointer to pointers. - auto p1 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_for"); - Experimental::set_begin_parallel_for_callback( - *reinterpret_cast<beginFunction*>(&p1)); - auto p2 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_scan"); - Experimental::set_begin_parallel_scan_callback( - *reinterpret_cast<beginFunction*>(&p2)); - auto p3 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_reduce"); - Experimental::set_begin_parallel_reduce_callback( - *reinterpret_cast<beginFunction*>(&p3)); - - auto p4 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_scan"); - Experimental::set_end_parallel_scan_callback( - *reinterpret_cast<endFunction*>(&p4)); - auto p5 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_for"); - Experimental::set_end_parallel_for_callback( - *reinterpret_cast<endFunction*>(&p5)); - auto p6 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_reduce"); - Experimental::set_end_parallel_reduce_callback( - *reinterpret_cast<endFunction*>(&p6)); - - auto p7 = dlsym(firstProfileLibrary, "kokkosp_init_library"); - Experimental::set_init_callback(*reinterpret_cast<initFunction*>(&p7)); - auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library"); - Experimental::set_finalize_callback( - *reinterpret_cast<finalizeFunction*>(&p8)); - - auto p9 = dlsym(firstProfileLibrary, "kokkosp_push_profile_region"); - Experimental::set_push_region_callback( - *reinterpret_cast<pushFunction*>(&p9)); - auto p10 = dlsym(firstProfileLibrary, "kokkosp_pop_profile_region"); - Experimental::set_pop_region_callback( - *reinterpret_cast<popFunction*>(&p10)); - - auto p11 = dlsym(firstProfileLibrary, "kokkosp_allocate_data"); - Experimental::set_allocate_data_callback( - *reinterpret_cast<allocateDataFunction*>(&p11)); - auto p12 = dlsym(firstProfileLibrary, "kokkosp_deallocate_data"); - Experimental::set_deallocate_data_callback( - *reinterpret_cast<deallocateDataFunction*>(&p12)); - - auto p13 = dlsym(firstProfileLibrary, "kokkosp_begin_deep_copy"); - Experimental::set_begin_deep_copy_callback( - *reinterpret_cast<beginDeepCopyFunction*>(&p13)); - auto p14 = dlsym(firstProfileLibrary, "kokkosp_end_deep_copy"); - Experimental::set_end_deep_copy_callback( - *reinterpret_cast<endDeepCopyFunction*>(&p14)); - - auto p15 = dlsym(firstProfileLibrary, "kokkosp_begin_fence"); - Experimental::set_begin_fence_callback( - *reinterpret_cast<beginFenceFunction*>(&p15)); - auto p16 = dlsym(firstProfileLibrary, "kokkosp_end_fence"); - Experimental::set_end_fence_callback( - *reinterpret_cast<endFenceFunction*>(&p16)); - - auto p17 = dlsym(firstProfileLibrary, "kokkosp_dual_view_sync"); - Experimental::set_dual_view_sync_callback( - *reinterpret_cast<dualViewSyncFunction*>(&p17)); - auto p18 = dlsym(firstProfileLibrary, "kokkosp_dual_view_modify"); - Experimental::set_dual_view_modify_callback( - *reinterpret_cast<dualViewModifyFunction*>(&p18)); - - auto p19 = dlsym(firstProfileLibrary, "kokkosp_create_profile_section"); - Experimental::set_create_profile_section_callback( - *(reinterpret_cast<createProfileSectionFunction*>(&p19))); - auto p20 = dlsym(firstProfileLibrary, "kokkosp_start_profile_section"); - Experimental::set_start_profile_section_callback( - *reinterpret_cast<startProfileSectionFunction*>(&p20)); - auto p21 = dlsym(firstProfileLibrary, "kokkosp_stop_profile_section"); - Experimental::set_stop_profile_section_callback( - *reinterpret_cast<stopProfileSectionFunction*>(&p21)); - auto p22 = dlsym(firstProfileLibrary, "kokkosp_destroy_profile_section"); - Experimental::set_destroy_profile_section_callback( - *(reinterpret_cast<destroyProfileSectionFunction*>(&p22))); - - auto p23 = dlsym(firstProfileLibrary, "kokkosp_profile_event"); - Experimental::set_profile_event_callback( - *reinterpret_cast<profileEventFunction*>(&p23)); - + lookup_function( + firstProfileLibrary, "kokkosp_begin_parallel_scan", + Kokkos::Tools::Experimental::current_callbacks.begin_parallel_scan); + lookup_function( + firstProfileLibrary, "kokkosp_begin_parallel_for", + Kokkos::Tools::Experimental::current_callbacks.begin_parallel_for); + lookup_function( + firstProfileLibrary, "kokkosp_begin_parallel_reduce", + Kokkos::Tools::Experimental::current_callbacks.begin_parallel_reduce); + lookup_function( + firstProfileLibrary, "kokkosp_end_parallel_scan", + Kokkos::Tools::Experimental::current_callbacks.end_parallel_scan); + lookup_function( + firstProfileLibrary, "kokkosp_end_parallel_for", + Kokkos::Tools::Experimental::current_callbacks.end_parallel_for); + lookup_function( + firstProfileLibrary, "kokkosp_end_parallel_reduce", + Kokkos::Tools::Experimental::current_callbacks.end_parallel_reduce); + + lookup_function(firstProfileLibrary, "kokkosp_init_library", + Kokkos::Tools::Experimental::current_callbacks.init); + lookup_function(firstProfileLibrary, "kokkosp_finalize_library", + Kokkos::Tools::Experimental::current_callbacks.finalize); + + lookup_function( + firstProfileLibrary, "kokkosp_push_profile_region", + Kokkos::Tools::Experimental::current_callbacks.push_region); + lookup_function( + firstProfileLibrary, "kokkosp_pop_profile_region", + Kokkos::Tools::Experimental::current_callbacks.pop_region); + lookup_function( + firstProfileLibrary, "kokkosp_allocate_data", + Kokkos::Tools::Experimental::current_callbacks.allocate_data); + lookup_function( + firstProfileLibrary, "kokkosp_deallocate_data", + Kokkos::Tools::Experimental::current_callbacks.deallocate_data); + + lookup_function( + firstProfileLibrary, "kokkosp_begin_deep_copy", + Kokkos::Tools::Experimental::current_callbacks.begin_deep_copy); + lookup_function( + firstProfileLibrary, "kokkosp_end_deep_copy", + Kokkos::Tools::Experimental::current_callbacks.end_deep_copy); + lookup_function( + firstProfileLibrary, "kokkosp_begin_fence", + Kokkos::Tools::Experimental::current_callbacks.begin_fence); + lookup_function(firstProfileLibrary, "kokkosp_end_fence", + Kokkos::Tools::Experimental::current_callbacks.end_fence); + lookup_function( + firstProfileLibrary, "kokkosp_dual_view_sync", + Kokkos::Tools::Experimental::current_callbacks.sync_dual_view); + lookup_function( + firstProfileLibrary, "kokkosp_dual_view_modify", + Kokkos::Tools::Experimental::current_callbacks.modify_dual_view); + + lookup_function( + firstProfileLibrary, "kokkosp_declare_metadata", + Kokkos::Tools::Experimental::current_callbacks.declare_metadata); + lookup_function(firstProfileLibrary, "kokkosp_create_profile_section", + Kokkos::Tools::Experimental::current_callbacks + .create_profile_section); + lookup_function( + firstProfileLibrary, "kokkosp_start_profile_section", + Kokkos::Tools::Experimental::current_callbacks.start_profile_section); + lookup_function( + firstProfileLibrary, "kokkosp_stop_profile_section", + Kokkos::Tools::Experimental::current_callbacks.stop_profile_section); + lookup_function(firstProfileLibrary, "kokkosp_destroy_profile_section", + Kokkos::Tools::Experimental::current_callbacks + .destroy_profile_section); + + lookup_function( + firstProfileLibrary, "kokkosp_profile_event", + Kokkos::Tools::Experimental::current_callbacks.profile_event); #ifdef KOKKOS_ENABLE_TUNING - auto p24 = dlsym(firstProfileLibrary, "kokkosp_declare_output_type"); - Experimental::set_declare_output_type_callback( - *reinterpret_cast<Experimental::outputTypeDeclarationFunction*>( - &p24)); - - auto p25 = dlsym(firstProfileLibrary, "kokkosp_declare_input_type"); - Experimental::set_declare_input_type_callback( - *reinterpret_cast<Experimental::inputTypeDeclarationFunction*>(&p25)); - auto p26 = dlsym(firstProfileLibrary, "kokkosp_request_values"); - Experimental::set_request_output_values_callback( - *reinterpret_cast<Experimental::requestValueFunction*>(&p26)); - auto p27 = dlsym(firstProfileLibrary, "kokkosp_end_context"); - Experimental::set_end_context_callback( - *reinterpret_cast<Experimental::contextEndFunction*>(&p27)); - auto p28 = dlsym(firstProfileLibrary, "kokkosp_begin_context"); - Experimental::set_begin_context_callback( - *reinterpret_cast<Experimental::contextBeginFunction*>(&p28)); - auto p29 = - dlsym(firstProfileLibrary, "kokkosp_declare_optimization_goal"); - Experimental::set_declare_optimization_goal_callback( - *reinterpret_cast<Experimental::optimizationGoalDeclarationFunction*>( - &p29)); + lookup_function( + firstProfileLibrary, "kokkosp_declare_output_type", + Kokkos::Tools::Experimental::current_callbacks.declare_output_type); + + lookup_function( + firstProfileLibrary, "kokkosp_declare_input_type", + Kokkos::Tools::Experimental::current_callbacks.declare_input_type); + lookup_function( + firstProfileLibrary, "kokkosp_request_values", + Kokkos::Tools::Experimental::current_callbacks.request_output_values); + lookup_function( + firstProfileLibrary, "kokkosp_end_context", + Kokkos::Tools::Experimental::current_callbacks.end_tuning_context); + lookup_function( + firstProfileLibrary, "kokkosp_begin_context", + Kokkos::Tools::Experimental::current_callbacks.begin_tuning_context); + lookup_function(firstProfileLibrary, "kokkosp_declare_optimization_goal", + Kokkos::Tools::Experimental::current_callbacks + .declare_optimization_goal); #endif // KOKKOS_ENABLE_TUNING + + lookup_function( + firstProfileLibrary, "kokkosp_print_help", + Kokkos::Tools::Experimental::current_callbacks.print_help); + lookup_function( + firstProfileLibrary, "kokkosp_parse_args", + Kokkos::Tools::Experimental::current_callbacks.parse_args); + lookup_function(firstProfileLibrary, + "kokkosp_provide_tool_programming_interface", + Kokkos::Tools::Experimental::current_callbacks + .provide_tool_programming_interface); + lookup_function( + firstProfileLibrary, "kokkosp_request_tool_settings", + Kokkos::Tools::Experimental::current_callbacks.request_tool_settings); } } +#else + (void)profileLibrary; #endif // KOKKOS_ENABLE_LIBDL - if (Experimental::current_callbacks.init != nullptr) { - (*Experimental::current_callbacks.init)( - 0, (uint64_t)KOKKOSP_INTERFACE_VERSION, (uint32_t)0, nullptr); - } + Experimental::invoke_kokkosp_callback( + Kokkos::Tools::Experimental::MayRequireGlobalFencing::No, + Kokkos::Tools::Experimental::current_callbacks.init, 0, + (uint64_t)KOKKOSP_INTERFACE_VERSION, (uint32_t)0, nullptr); + + Experimental::tool_requirements.requires_global_fencing = true; + + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.request_tool_settings, 1, + &Experimental::tool_requirements); + + Experimental::ToolProgrammingInterface actions; + actions.fence = &Experimental::Impl::tool_invoked_fence; + + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.provide_tool_programming_interface, 1, + actions); #ifdef KOKKOS_ENABLE_TUNING Experimental::VariableInfo kernel_name; @@ -548,7 +668,9 @@ void finalize() { is_finalized = 1; if (Experimental::current_callbacks.finalize != nullptr) { - (*Experimental::current_callbacks.finalize)(); + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.finalize); Experimental::pause_tools(); } @@ -568,17 +690,24 @@ void finalize() { void syncDualView(const std::string& label, const void* const ptr, bool to_device) { - if (Experimental::current_callbacks.sync_dual_view != nullptr) { - (*Experimental::current_callbacks.sync_dual_view)(label.c_str(), ptr, - to_device); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.sync_dual_view, label.c_str(), ptr, + to_device); } void modifyDualView(const std::string& label, const void* const ptr, bool on_device) { - if (Experimental::current_callbacks.modify_dual_view != nullptr) { - (*Experimental::current_callbacks.modify_dual_view)(label.c_str(), ptr, - on_device); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.modify_dual_view, label.c_str(), ptr, + on_device); +} + +void declareMetadata(const std::string& key, const std::string& value) { + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.declare_metadata, key.c_str(), + value.c_str()); } } // namespace Tools @@ -591,6 +720,12 @@ void set_init_callback(initFunction callback) { void set_finalize_callback(finalizeFunction callback) { current_callbacks.finalize = callback; } +void set_parse_args_callback(parseArgsFunction callback) { + current_callbacks.parse_args = callback; +} +void set_print_help_callback(printHelpFunction callback) { + current_callbacks.print_help = callback; +} void set_begin_parallel_for_callback(beginFunction callback) { current_callbacks.begin_parallel_for = callback; } @@ -657,6 +792,9 @@ void set_dual_view_sync_callback(dualViewSyncFunction callback) { void set_dual_view_modify_callback(dualViewModifyFunction callback) { current_callbacks.modify_dual_view = callback; } +void set_declare_metadata_callback(declareMetadataFunction callback) { + current_callbacks.declare_metadata = callback; +} void set_declare_output_type_callback(outputTypeDeclarationFunction callback) { current_callbacks.declare_output_type = callback; @@ -751,7 +889,17 @@ void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, void endDeepCopy() { Kokkos::Tools::endDeepCopy(); } void finalize() { Kokkos::Tools::finalize(); } -void initialize() { Kokkos::Tools::initialize(); } +void initialize(const std::string& profileLibrary) { + Kokkos::Tools::initialize(profileLibrary); +} + +bool printHelp(const std::string& args) { + return Kokkos::Tools::printHelp(args); +} +void parseArgs(const std::string& args) { Kokkos::Tools::parseArgs(args); } +void parseArgs(int _argc, char** _argv) { + Kokkos::Tools::parseArgs(_argc, _argv); +} SpaceHandle make_space_handle(const char* space_name) { return Kokkos::Tools::make_space_handle(space_name); @@ -782,10 +930,10 @@ size_t get_new_variable_id() { return get_variable_counter(); } size_t declare_output_type(const std::string& variableName, VariableInfo info) { size_t variableId = get_new_variable_id(); #ifdef KOKKOS_ENABLE_TUNING - if (Experimental::current_callbacks.declare_output_type != nullptr) { - (*Experimental::current_callbacks.declare_output_type)(variableName.c_str(), - variableId, &info); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.declare_output_type, variableName.c_str(), + variableId, &info); variable_metadata[variableId] = info; #else (void)variableName; @@ -797,10 +945,10 @@ size_t declare_output_type(const std::string& variableName, VariableInfo info) { size_t declare_input_type(const std::string& variableName, VariableInfo info) { size_t variableId = get_new_variable_id(); #ifdef KOKKOS_ENABLE_TUNING - if (Experimental::current_callbacks.declare_input_type != nullptr) { - (*Experimental::current_callbacks.declare_input_type)(variableName.c_str(), - variableId, &info); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.declare_input_type, variableName.c_str(), + variableId, &info); variable_metadata[variableId] = info; #else (void)variableName; @@ -839,8 +987,10 @@ void request_output_values(size_t contextId, size_t count, for (size_t x = 0; x < count; ++x) { values[x].metadata = &variable_metadata[values[x].type_id]; } - (*Experimental::current_callbacks.request_output_values)( - contextId, context_values.size(), context_values.data(), count, values); + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.request_output_values, contextId, + context_values.size(), context_values.data(), count, values); } #else (void)contextId; @@ -854,19 +1004,19 @@ static std::unordered_map<size_t, size_t> optimization_goals; #endif void begin_context(size_t contextId) { - if (Experimental::current_callbacks.begin_tuning_context != nullptr) { - (*Experimental::current_callbacks.begin_tuning_context)(contextId); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.begin_tuning_context, contextId); } void end_context(size_t contextId) { #ifdef KOKKOS_ENABLE_TUNING for (auto id : features_per_context[contextId]) { active_features.erase(id); } - if (Experimental::current_callbacks.end_tuning_context != nullptr) { - (*Experimental::current_callbacks.end_tuning_context)( - contextId, feature_values[optimization_goals[contextId]]); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.end_tuning_context, contextId, + feature_values[optimization_goals[contextId]]); optimization_goals.erase(contextId); decrement_current_context_id(); #else @@ -954,9 +1104,9 @@ size_t get_new_variable_id(); void declare_optimization_goal(const size_t context, const OptimizationGoal& goal) { #ifdef KOKKOS_ENABLE_TUNING - if (Experimental::current_callbacks.declare_optimization_goal != nullptr) { - (*Experimental::current_callbacks.declare_optimization_goal)(context, goal); - } + Experimental::invoke_kokkosp_callback( + Experimental::MayRequireGlobalFencing::No, + Experimental::current_callbacks.declare_optimization_goal, context, goal); optimization_goals[context] = goal.type_id; #else (void)context; diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp b/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp index 688937623761f72b7d4eacfad9cc27e5a10c57eb..1ff6a36c3bc3c934e787af30c5bd6568046f15f1 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling.hpp @@ -45,13 +45,13 @@ #ifndef KOKKOS_IMPL_KOKKOS_PROFILING_HPP #define KOKKOS_IMPL_KOKKOS_PROFILING_HPP -#include <impl/Kokkos_Profiling_Interface.hpp> -#include <Kokkos_Macros.hpp> #include <Kokkos_Core_fwd.hpp> #include <Kokkos_ExecPolicy.hpp> +#include <Kokkos_Macros.hpp> #include <Kokkos_Tuners.hpp> -#include <string> +#include <impl/Kokkos_Profiling_Interface.hpp> #include <map> +#include <string> #include <type_traits> namespace Kokkos { @@ -125,8 +125,11 @@ void syncDualView(const std::string& label, const void* const ptr, void modifyDualView(const std::string& label, const void* const ptr, bool on_device); -void initialize(); +void declareMetadata(const std::string& key, const std::string& value); +void initialize(const std::string& = {}); void finalize(); +bool printHelp(const std::string&); +void parseArgs(const std::string&); Kokkos_Profiling_SpaceHandle make_space_handle(const char* space_name); @@ -134,6 +137,8 @@ namespace Experimental { void set_init_callback(initFunction callback); void set_finalize_callback(finalizeFunction callback); +void set_parse_args_callback(parseArgsFunction callback); +void set_print_help_callback(printHelpFunction callback); void set_begin_parallel_for_callback(beginFunction callback); void set_end_parallel_for_callback(endFunction callback); void set_begin_parallel_reduce_callback(beginFunction callback); @@ -156,6 +161,7 @@ void set_begin_fence_callback(beginFenceFunction callback); void set_end_fence_callback(endFenceFunction callback); void set_dual_view_sync_callback(dualViewSyncFunction callback); void set_dual_view_modify_callback(dualViewModifyFunction callback); +void set_declare_metadata_callback(declareMetadataFunction callback); void set_declare_output_type_callback(outputTypeDeclarationFunction callback); void set_declare_input_type_callback(inputTypeDeclarationFunction callback); @@ -183,10 +189,19 @@ namespace Impl { static std::map<std::string, Kokkos::Tools::Experimental::TeamSizeTuner> team_tuners; +template <int Rank> +using MDRangeTuningMap = + std::map<std::string, Kokkos::Tools::Experimental::MDRangeTuner<Rank>>; + +template <int Rank> +static MDRangeTuningMap<Rank> mdrange_tuners; + +// For any policies without a tuning implementation, with a reducer template <class ReducerType, class ExecPolicy, class Functor, typename TagType> void tune_policy(const size_t, const std::string&, ExecPolicy&, const Functor&, TagType) {} +// For any policies without a tuning implementation, without a reducer template <class ExecPolicy, class Functor, typename TagType> void tune_policy(const size_t, const std::string&, ExecPolicy&, const Functor&, const TagType&) {} @@ -225,6 +240,24 @@ struct SimpleTeamSizeCalculator { auto max = policy.team_size_recommended(functor, tag); return max; } + template <typename Policy, typename Functor> + int get_mdrange_max_tile_size_product(const Policy& policy, + const Functor& functor, + const Kokkos::ParallelForTag&) { + using exec_space = typename Policy::execution_space; + using driver = Kokkos::Impl::ParallelFor<Functor, Policy, exec_space>; + return driver::max_tile_size_product(policy, functor); + } + template <typename Policy, typename Functor> + int get_mdrange_max_tile_size_product(const Policy& policy, + const Functor& functor, + const Kokkos::ParallelReduceTag&) { + using exec_space = typename Policy::execution_space; + using driver = + Kokkos::Impl::ParallelReduce<Functor, Policy, Kokkos::InvalidType, + exec_space>; + return driver::max_tile_size_product(policy, functor); + } }; // when we have a complex reducer, we need to pass an @@ -251,15 +284,25 @@ struct ComplexReducerSizeCalculator { ReducerType reducer_example = ReducerType(value); return policy.team_size_recommended(functor, reducer_example, tag); } + template <typename Policy, typename Functor> + int get_mdrange_max_tile_size_product(const Policy& policy, + const Functor& functor, + const Kokkos::ParallelReduceTag&) { + using exec_space = typename Policy::execution_space; + using driver = + Kokkos::Impl::ParallelReduce<Functor, Policy, ReducerType, exec_space>; + return driver::max_tile_size_product(policy, functor); + } }; } // namespace Impl -template <class Functor, class TagType, class... Properties> -void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, - Kokkos::TeamPolicy<Properties...>& policy, - const Functor& functor, const TagType& tag) { - if (policy.impl_auto_team_size() || policy.impl_auto_vector_length()) { +template <class Tuner, class Functor, class TagType, + class TuningPermissionFunctor, class Map, class Policy> +void generic_tune_policy(const std::string& label_in, Map& map, Policy& policy, + const Functor& functor, const TagType& tag, + const TuningPermissionFunctor& should_tune) { + if (should_tune(policy)) { std::string label = label_in; if (label_in.empty()) { using policy_type = @@ -269,12 +312,10 @@ void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, label = name.get(); } auto tuner_iter = [&]() { - auto my_tuner = team_tuners.find(label); - if (my_tuner == team_tuners.end()) { - return (team_tuners - .emplace(label, Kokkos::Tools::Experimental::TeamSizeTuner( - label, policy, functor, tag, - Impl::SimpleTeamSizeCalculator{})) + auto my_tuner = map.find(label); + if (my_tuner == map.end()) { + return (map.emplace(label, Tuner(label, policy, functor, tag, + Impl::SimpleTeamSizeCalculator{})) .first); } return my_tuner; @@ -282,12 +323,12 @@ void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, tuner_iter->second.tune(policy); } } - -template <class ReducerType, class Functor, class TagType, class... Properties> -void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, - Kokkos::TeamPolicy<Properties...>& policy, - const Functor& functor, const TagType& tag) { - if (policy.impl_auto_team_size() || policy.impl_auto_vector_length()) { +template <class Tuner, class ReducerType, class Functor, class TagType, + class TuningPermissionFunctor, class Map, class Policy> +void generic_tune_policy(const std::string& label_in, Map& map, Policy& policy, + const Functor& functor, const TagType& tag, + const TuningPermissionFunctor& should_tune) { + if (should_tune(policy)) { std::string label = label_in; if (label_in.empty()) { using policy_type = @@ -297,15 +338,13 @@ void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, label = name.get(); } auto tuner_iter = [&]() { - auto my_tuner = team_tuners.find(label); - if (my_tuner == team_tuners.end()) { - return ( - team_tuners - .emplace(label, - Kokkos::Tools::Experimental::TeamSizeTuner( - label, policy, functor, tag, + auto my_tuner = map.find(label); + if (my_tuner == map.end()) { + return (map.emplace( + label, + Tuner(label, policy, functor, tag, Impl::ComplexReducerSizeCalculator<ReducerType>{})) - .first); + .first); } return my_tuner; }(); @@ -313,6 +352,60 @@ void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, } } +// tune a TeamPolicy, without reducer +template <class Functor, class TagType, class... Properties> +void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, + Kokkos::TeamPolicy<Properties...>& policy, + const Functor& functor, const TagType& tag) { + generic_tune_policy<Experimental::TeamSizeTuner>( + label_in, team_tuners, policy, functor, tag, + [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) { + return (candidate_policy.impl_auto_team_size() || + candidate_policy.impl_auto_vector_length()); + }); +} + +// tune a TeamPolicy, with reducer +template <class ReducerType, class Functor, class TagType, class... Properties> +void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, + Kokkos::TeamPolicy<Properties...>& policy, + const Functor& functor, const TagType& tag) { + generic_tune_policy<Experimental::TeamSizeTuner, ReducerType>( + label_in, team_tuners, policy, functor, tag, + [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) { + return (candidate_policy.impl_auto_team_size() || + candidate_policy.impl_auto_vector_length()); + }); +} + +// tune a MDRangePolicy, without reducer +template <class Functor, class TagType, class... Properties> +void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, + Kokkos::MDRangePolicy<Properties...>& policy, + const Functor& functor, const TagType& tag) { + using Policy = Kokkos::MDRangePolicy<Properties...>; + static constexpr int rank = Policy::rank; + generic_tune_policy<Experimental::MDRangeTuner<rank>>( + label_in, mdrange_tuners<rank>, policy, functor, tag, + [](const Policy& candidate_policy) { + return candidate_policy.impl_tune_tile_size(); + }); +} + +// tune a MDRangePolicy, with reducer +template <class ReducerType, class Functor, class TagType, class... Properties> +void tune_policy(const size_t /**tuning_context*/, const std::string& label_in, + Kokkos::MDRangePolicy<Properties...>& policy, + const Functor& functor, const TagType& tag) { + using Policy = Kokkos::MDRangePolicy<Properties...>; + static constexpr int rank = Policy::rank; + generic_tune_policy<Experimental::MDRangeTuner<rank>, ReducerType>( + label_in, mdrange_tuners<rank>, policy, functor, tag, + [](const Policy& candidate_policy) { + return candidate_policy.impl_tune_tile_size(); + }); +} + template <class ReducerType> struct ReductionSwitcher { template <class Functor, class TagType, class ExecPolicy> @@ -337,16 +430,12 @@ struct ReductionSwitcher<Kokkos::InvalidType> { } }; -template <class ExecPolicy, class Functor, typename TagType> -void report_policy_results(const size_t, const std::string&, ExecPolicy&, - const Functor&, const TagType&) {} - -template <class Functor, class TagType, class... Properties> -void report_policy_results(const size_t /**tuning_context*/, - const std::string& label_in, - Kokkos::TeamPolicy<Properties...> policy, - const Functor&, const TagType&) { - if (policy.impl_auto_team_size() || policy.impl_auto_vector_length()) { +template <class Tuner, class Functor, class TagType, + class TuningPermissionFunctor, class Map, class Policy> +void generic_report_results(const std::string& label_in, Map& map, + Policy& policy, const Functor&, const TagType&, + const TuningPermissionFunctor& should_tune) { + if (should_tune(policy)) { std::string label = label_in; if (label_in.empty()) { using policy_type = @@ -355,11 +444,45 @@ void report_policy_results(const size_t /**tuning_context*/, Kokkos::Impl::ParallelConstructName<Functor, work_tag> name(label); label = name.get(); } - auto& tuner = team_tuners[label]; - tuner.end(); + auto tuner_iter = map[label]; + tuner_iter.end(); } } +// report results for a policy type we don't tune (do nothing) +template <class ExecPolicy, class Functor, typename TagType> +void report_policy_results(const size_t, const std::string&, ExecPolicy&, + const Functor&, const TagType&) {} + +// report results for a TeamPolicy +template <class Functor, class TagType, class... Properties> +void report_policy_results(const size_t /**tuning_context*/, + const std::string& label_in, + Kokkos::TeamPolicy<Properties...>& policy, + const Functor& functor, const TagType& tag) { + generic_report_results<Experimental::TeamSizeTuner>( + label_in, team_tuners, policy, functor, tag, + [](const Kokkos::TeamPolicy<Properties...>& candidate_policy) { + return (candidate_policy.impl_auto_team_size() || + candidate_policy.impl_auto_vector_length()); + }); +} + +// report results for an MDRangePolicy +template <class Functor, class TagType, class... Properties> +void report_policy_results(const size_t /**tuning_context*/, + const std::string& label_in, + Kokkos::MDRangePolicy<Properties...>& policy, + const Functor& functor, const TagType& tag) { + using Policy = Kokkos::MDRangePolicy<Properties...>; + static constexpr int rank = Policy::rank; + generic_report_results<Experimental::MDRangeTuner<rank>>( + label_in, mdrange_tuners<rank>, policy, functor, tag, + [](const Policy& candidate_policy) { + return candidate_policy.impl_tune_tile_size(); + }); +} + template <class ExecPolicy, class FunctorType> void begin_parallel_for(ExecPolicy& policy, FunctorType& functor, const std::string& label, uint64_t& kpID) { @@ -515,7 +638,8 @@ void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, const uint64_t size); void endDeepCopy(); void finalize(); -void initialize(); +void initialize(const std::string& = {}); + SpaceHandle make_space_handle(const char* space_name); namespace Experimental { @@ -533,7 +657,9 @@ using Kokkos::Tools::Experimental::set_end_parallel_reduce_callback; using Kokkos::Tools::Experimental::set_end_parallel_scan_callback; using Kokkos::Tools::Experimental::set_finalize_callback; using Kokkos::Tools::Experimental::set_init_callback; +using Kokkos::Tools::Experimental::set_parse_args_callback; using Kokkos::Tools::Experimental::set_pop_region_callback; +using Kokkos::Tools::Experimental::set_print_help_callback; using Kokkos::Tools::Experimental::set_profile_event_callback; using Kokkos::Tools::Experimental::set_push_region_callback; using Kokkos::Tools::Experimental::set_start_profile_section_callback; diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h b/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h index de771d330142e8313f2afc16811da0dcc77a04d4..ed8751c50cc04d915b7b3c371a6ec05756ff6087 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h @@ -54,7 +54,7 @@ #include <stdbool.h> #endif -#define KOKKOSP_INTERFACE_VERSION 20200625 +#define KOKKOSP_INTERFACE_VERSION 20210225 // Profiling @@ -73,6 +73,10 @@ typedef void (*Kokkos_Profiling_initFunction)( // NOLINTNEXTLINE(modernize-use-using): C compatibility typedef void (*Kokkos_Profiling_finalizeFunction)(); // NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_parseArgsFunction)(int, char**); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_printHelpFunction)(char*); +// NOLINTNEXTLINE(modernize-use-using): C compatibility typedef void (*Kokkos_Profiling_beginFunction)(const char*, const uint32_t, uint64_t*); // NOLINTNEXTLINE(modernize-use-using): C compatibility @@ -123,6 +127,33 @@ typedef void (*Kokkos_Profiling_dualViewModifyFunction)(const char*, const void* const, bool); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Profiling_declareMetadataFunction)(const char*, + const char*); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_toolInvokedFenceFunction)(const uint32_t); + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_functionPointer)(); +struct Kokkos_Tools_ToolProgrammingInterface { + Kokkos_Tools_toolInvokedFenceFunction fence; + // allow addition of more actions + Kokkos_Tools_functionPointer padding[31]; +}; + +struct Kokkos_Tools_ToolSettings { + bool requires_global_fencing; + bool padding[255]; +}; + +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_provideToolProgrammingInterfaceFunction)( + const uint32_t, struct Kokkos_Tools_ToolProgrammingInterface); +// NOLINTNEXTLINE(modernize-use-using): C compatibility +typedef void (*Kokkos_Tools_requestToolSettingsFunction)( + const uint32_t, struct Kokkos_Tools_ToolSettings*); + // Tuning #define KOKKOS_TOOLS_TUNING_STRING_LENGTH 64 @@ -217,11 +248,11 @@ typedef void (*Kokkos_Tools_contextEndFunction)( typedef void (*Kokkos_Tools_optimizationGoalDeclarationFunction)( const size_t, const struct Kokkos_Tools_OptimzationGoal goal); -typedef void (*function_pointer)(); - struct Kokkos_Profiling_EventSet { Kokkos_Profiling_initFunction init; Kokkos_Profiling_finalizeFunction finalize; + Kokkos_Profiling_parseArgsFunction parse_args; + Kokkos_Profiling_printHelpFunction print_help; Kokkos_Profiling_beginFunction begin_parallel_for; Kokkos_Profiling_endFunction end_parallel_for; Kokkos_Profiling_beginFunction begin_parallel_reduce; @@ -243,17 +274,23 @@ struct Kokkos_Profiling_EventSet { Kokkos_Profiling_endFenceFunction end_fence; Kokkos_Profiling_dualViewSyncFunction sync_dual_view; Kokkos_Profiling_dualViewModifyFunction modify_dual_view; - char profiling_padding[12 * sizeof(function_pointer)]; + Kokkos_Profiling_declareMetadataFunction declare_metadata; + Kokkos_Tools_provideToolProgrammingInterfaceFunction + provide_tool_programming_interface; + Kokkos_Tools_requestToolSettingsFunction request_tool_settings; + char profiling_padding[9 * sizeof(Kokkos_Tools_functionPointer)]; Kokkos_Tools_outputTypeDeclarationFunction declare_output_type; Kokkos_Tools_inputTypeDeclarationFunction declare_input_type; Kokkos_Tools_requestValueFunction request_output_values; Kokkos_Tools_contextBeginFunction begin_tuning_context; Kokkos_Tools_contextEndFunction end_tuning_context; Kokkos_Tools_optimizationGoalDeclarationFunction declare_optimization_goal; - char padding[234 * - sizeof(function_pointer)]; // allows us to add another 256 - // events to the Tools interface - // without changing struct layout + char padding[232 * + sizeof( + Kokkos_Tools_functionPointer)]; // allows us to add another + // 256 events to the Tools + // interface without + // changing struct layout }; #endif // KOKKOS_PROFILING_C_INTERFACE_HPP diff --git a/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index fad7a78e393d1dcf48468e735b5bd8b90f00c459..7809632f78ddf33d8429b353723736b68e3b7536 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -101,12 +101,29 @@ namespace Tools { namespace Experimental { using EventSet = Kokkos_Profiling_EventSet; -static_assert(sizeof(EventSet) / sizeof(function_pointer) == 275, +static_assert(sizeof(EventSet) / sizeof(Kokkos_Tools_functionPointer) == 275, "sizeof EventSet has changed, this is an error on the part of a " "Kokkos developer"); +static_assert(sizeof(Kokkos_Tools_ToolSettings) / sizeof(bool) == 256, + "sizeof EventSet has changed, this is an error on the part of a " + "Kokkos developer"); +static_assert(sizeof(Kokkos_Tools_ToolProgrammingInterface) / + sizeof(Kokkos_Tools_functionPointer) == + 32, + "sizeof EventSet has changed, this is an error on the part of a " + "Kokkos developer"); + +using toolInvokedFenceFunction = Kokkos_Tools_toolInvokedFenceFunction; +using provideToolProgrammingInterfaceFunction = + Kokkos_Tools_provideToolProgrammingInterfaceFunction; +using requestToolSettingsFunction = Kokkos_Tools_requestToolSettingsFunction; +using ToolSettings = Kokkos_Tools_ToolSettings; +using ToolProgrammingInterface = Kokkos_Tools_ToolProgrammingInterface; } // namespace Experimental using initFunction = Kokkos_Profiling_initFunction; using finalizeFunction = Kokkos_Profiling_finalizeFunction; +using parseArgsFunction = Kokkos_Profiling_parseArgsFunction; +using printHelpFunction = Kokkos_Profiling_printHelpFunction; using beginFunction = Kokkos_Profiling_beginFunction; using endFunction = Kokkos_Profiling_endFunction; using pushFunction = Kokkos_Profiling_pushFunction; @@ -120,13 +137,14 @@ using startProfileSectionFunction = using stopProfileSectionFunction = Kokkos_Profiling_stopProfileSectionFunction; using destroyProfileSectionFunction = Kokkos_Profiling_destroyProfileSectionFunction; -using profileEventFunction = Kokkos_Profiling_profileEventFunction; -using beginDeepCopyFunction = Kokkos_Profiling_beginDeepCopyFunction; -using endDeepCopyFunction = Kokkos_Profiling_endDeepCopyFunction; -using beginFenceFunction = Kokkos_Profiling_beginFenceFunction; -using endFenceFunction = Kokkos_Profiling_endFenceFunction; -using dualViewSyncFunction = Kokkos_Profiling_dualViewSyncFunction; -using dualViewModifyFunction = Kokkos_Profiling_dualViewModifyFunction; +using profileEventFunction = Kokkos_Profiling_profileEventFunction; +using beginDeepCopyFunction = Kokkos_Profiling_beginDeepCopyFunction; +using endDeepCopyFunction = Kokkos_Profiling_endDeepCopyFunction; +using beginFenceFunction = Kokkos_Profiling_beginFenceFunction; +using endFenceFunction = Kokkos_Profiling_endFenceFunction; +using dualViewSyncFunction = Kokkos_Profiling_dualViewSyncFunction; +using dualViewModifyFunction = Kokkos_Profiling_dualViewModifyFunction; +using declareMetadataFunction = Kokkos_Profiling_declareMetadataFunction; } // namespace Tools @@ -161,7 +179,9 @@ using Kokkos::Tools::endDeepCopyFunction; using Kokkos::Tools::endFunction; using Kokkos::Tools::finalizeFunction; using Kokkos::Tools::initFunction; +using Kokkos::Tools::parseArgsFunction; using Kokkos::Tools::popFunction; +using Kokkos::Tools::printHelpFunction; using Kokkos::Tools::profileEventFunction; using Kokkos::Tools::pushFunction; using Kokkos::Tools::SpaceHandle; diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp index 646280faee1e62e59046e5e9bcfa090cf19e21de..64dfd5d33fb8576b1cb5446843edefaaf6d67422 100644 --- a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp @@ -45,16 +45,29 @@ #ifndef KOKKOS_SHARED_ALLOC_HPP #define KOKKOS_SHARED_ALLOC_HPP +#include <Kokkos_Macros.hpp> +#include <Kokkos_Core_fwd.hpp> +#include <impl/Kokkos_Error.hpp> // Impl::throw_runtime_exception + #include <cstdint> #include <string> -// undefined at end of file #if defined(KOKKOS_ENABLE_OPENMPTARGET) +// Base function. +static constexpr bool kokkos_omp_on_host() { return true; } #if defined(KOKKOS_COMPILER_PGI) #define KOKKOS_IMPL_IF_ON_HOST if (!__builtin_is_device_code()) #else // Note: OpenMPTarget enforces C++17 at configure time -#define KOKKOS_IMPL_IF_ON_HOST if constexpr (omp_is_initial_device()) +#pragma omp begin declare variant match(device = {kind(host)}) +static constexpr bool kokkos_omp_on_host() { return true; } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(nohost)}) +static constexpr bool kokkos_omp_on_host() { return false; } +#pragma omp end declare variant + +#define KOKKOS_IMPL_IF_ON_HOST if constexpr (kokkos_omp_on_host()) #endif #else #define KOKKOS_IMPL_IF_ON_HOST if (true) @@ -66,6 +79,9 @@ namespace Impl { template <class MemorySpace = void, class DestroyFunctor = void> class SharedAllocationRecord; +template <class MemorySpace> +class SharedAllocationRecordCommon; + class SharedAllocationHeader { private: using Record = SharedAllocationRecord<void, void>; @@ -75,6 +91,10 @@ class SharedAllocationHeader { template <class, class> friend class SharedAllocationRecord; + template <class> + friend class SharedAllocationRecordCommon; + template <class> + friend class HostInaccessibleSharedAllocationRecordCommon; Record* m_record; char m_label[maximum_label_length]; @@ -99,6 +119,10 @@ class SharedAllocationRecord<void, void> { template <class, class> friend class SharedAllocationRecord; + template <class> + friend class SharedAllocationRecordCommon; + template <class> + friend class HostInaccessibleSharedAllocationRecordCommon; using function_type = void (*)(SharedAllocationRecord<void, void>*); @@ -229,6 +253,58 @@ class SharedAllocationRecord<void, void> { const SharedAllocationRecord* const root, const bool detail); }; +template <class MemorySpace> +class SharedAllocationRecordCommon : public SharedAllocationRecord<void, void> { + private: + using derived_t = SharedAllocationRecord<MemorySpace, void>; + using record_base_t = SharedAllocationRecord<void, void>; + derived_t& self() { return *static_cast<derived_t*>(this); } + derived_t const& self() const { return *static_cast<derived_t const*>(this); } + + protected: + using record_base_t::record_base_t; + + void _fill_host_accessible_header_info(SharedAllocationHeader& arg_header, + std::string const& arg_label); + + static void deallocate(record_base_t* arg_rec); + + public: + static auto allocate(MemorySpace const& arg_space, + std::string const& arg_label, size_t arg_alloc_size) + -> derived_t*; + /**\brief Allocate tracked memory in the space */ + static void* allocate_tracked(MemorySpace const& arg_space, + std::string const& arg_alloc_label, + size_t arg_alloc_size); + /**\brief Reallocate tracked memory in the space */ + static void deallocate_tracked(void* arg_alloc_ptr); + /**\brief Deallocate tracked memory in the space */ + static void* reallocate_tracked(void* arg_alloc_ptr, size_t arg_alloc_size); + static auto get_record(void* alloc_ptr) -> derived_t*; + std::string get_label() const; + static void print_records(std::ostream& s, MemorySpace const&, + bool detail = false); +}; + +template <class MemorySpace> +class HostInaccessibleSharedAllocationRecordCommon + : public SharedAllocationRecordCommon<MemorySpace> { + private: + using base_t = SharedAllocationRecordCommon<MemorySpace>; + using derived_t = SharedAllocationRecord<MemorySpace, void>; + using record_base_t = SharedAllocationRecord<void, void>; + + protected: + using base_t::base_t; + + public: + static void print_records(std::ostream& s, MemorySpace const&, + bool detail = false); + static auto get_record(void* alloc_ptr) -> derived_t*; + std::string get_label() const; +}; + namespace { /* Taking the address of this function so make sure it is unique */ @@ -508,5 +584,4 @@ union SharedAllocationTracker { } /* namespace Impl */ } /* namespace Kokkos */ -#undef KOKKOS_IMPL_IF_ON_HOST #endif diff --git a/packages/kokkos/core/src/impl/Kokkos_SharedAlloc_timpl.hpp b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc_timpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a6ee1b3f9eb11ddfbfd2c1ce5dd7a213bd25dda9 --- /dev/null +++ b/packages/kokkos/core/src/impl/Kokkos_SharedAlloc_timpl.hpp @@ -0,0 +1,287 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (12/8/20) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_SHAREDALLOC_TIMPL_HPP +#define KOKKOS_IMPL_SHAREDALLOC_TIMPL_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_Core_fwd.hpp> + +#include <impl/Kokkos_SharedAlloc.hpp> + +#include <Kokkos_HostSpace.hpp> // used with HostInaccessible specializations + +#include <string> // std::string +#include <cstring> // strncpy +#include <iostream> // ostream + +namespace Kokkos { +namespace Impl { + +template <class MemorySpace> +auto SharedAllocationRecordCommon<MemorySpace>::allocate( + MemorySpace const& arg_space, std::string const& arg_label, + size_t arg_alloc_size) -> derived_t* { + return new derived_t(arg_space, arg_label, arg_alloc_size); +} + +template <class MemorySpace> +void* SharedAllocationRecordCommon<MemorySpace>::allocate_tracked( + const MemorySpace& arg_space, const std::string& arg_alloc_label, + size_t arg_alloc_size) { + if (!arg_alloc_size) return nullptr; + + SharedAllocationRecord* const r = + allocate(arg_space, arg_alloc_label, arg_alloc_size); + + record_base_t::increment(r); + + return r->data(); +} + +template <class MemorySpace> +void SharedAllocationRecordCommon<MemorySpace>::deallocate( + SharedAllocationRecordCommon::record_base_t* arg_rec) { + delete static_cast<derived_t*>(arg_rec); +} + +template <class MemorySpace> +void SharedAllocationRecordCommon<MemorySpace>::deallocate_tracked( + void* arg_alloc_ptr) { + if (arg_alloc_ptr != nullptr) { + SharedAllocationRecord* const r = derived_t::get_record(arg_alloc_ptr); + record_base_t::decrement(r); + } +} + +template <class MemorySpace> +void* SharedAllocationRecordCommon<MemorySpace>::reallocate_tracked( + void* arg_alloc_ptr, size_t arg_alloc_size) { + derived_t* const r_old = derived_t::get_record(arg_alloc_ptr); + derived_t* const r_new = + allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); + + Kokkos::Impl::DeepCopy<MemorySpace, MemorySpace>( + r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); + + record_base_t::increment(r_new); + record_base_t::decrement(r_old); + + return r_new->data(); +} + +template <class MemorySpace> +auto SharedAllocationRecordCommon<MemorySpace>::get_record(void* alloc_ptr) + -> derived_t* { + using Header = SharedAllocationHeader; + + Header const* const h = alloc_ptr ? Header::get_header(alloc_ptr) : nullptr; + + if (!alloc_ptr || h->m_record->m_alloc_ptr != h) { + Kokkos::Impl::throw_runtime_exception( + std::string("Kokkos::Impl::SharedAllocationRecordCommon<") + + std::string(MemorySpace::name()) + + std::string(">::get_record() ERROR")); + } + + return static_cast<derived_t*>(h->m_record); +} + +template <class MemorySpace> +std::string SharedAllocationRecordCommon<MemorySpace>::get_label() const { + return std::string(record_base_t::head()->m_label); +} + +template <class MemorySpace> +void SharedAllocationRecordCommon<MemorySpace>:: + _fill_host_accessible_header_info(SharedAllocationHeader& arg_header, + std::string const& arg_label) { + // Fill in the Header information, directly accessible on the host + + arg_header.m_record = &self(); + + strncpy(arg_header.m_label, arg_label.c_str(), + SharedAllocationHeader::maximum_label_length); + // Set last element zero, in case c_str is too long + arg_header.m_label[SharedAllocationHeader::maximum_label_length - 1] = '\0'; +} + +template <class MemorySpace> +void SharedAllocationRecordCommon<MemorySpace>::print_records( + std::ostream& s, const MemorySpace&, bool detail) { + (void)s; + (void)detail; +#ifdef KOKKOS_ENABLE_DEBUG + SharedAllocationRecord<void, void>::print_host_accessible_records( + s, MemorySpace::name(), &derived_t::s_root_record, detail); +#else + Kokkos::Impl::throw_runtime_exception( + std::string("SharedAllocationHeader<") + + std::string(MemorySpace::name()) + + std::string( + ">::print_records only works with KOKKOS_ENABLE_DEBUG enabled")); +#endif +} + +template <class MemorySpace> +void HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::print_records( + std::ostream& s, const MemorySpace&, bool detail) { + (void)s; + (void)detail; +#ifdef KOKKOS_ENABLE_DEBUG + SharedAllocationRecord<void, void>* r = &derived_t::s_root_record; + + char buffer[256]; + + SharedAllocationHeader head; + + if (detail) { + do { + if (r->m_alloc_ptr) { + Kokkos::Impl::DeepCopy<HostSpace, MemorySpace>( + &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); + } else { + head.m_label[0] = 0; + } + + // Formatting dependent on sizeof(uintptr_t) + const char* format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = + "%s addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx " + "+ %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; + } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = + "%s addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ " + "0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; + } + + snprintf(buffer, 256, format_string, MemorySpace::execution_space::name(), + reinterpret_cast<uintptr_t>(r), + reinterpret_cast<uintptr_t>(r->m_prev), + reinterpret_cast<uintptr_t>(r->m_next), + reinterpret_cast<uintptr_t>(r->m_alloc_ptr), r->m_alloc_size, + r->m_count, reinterpret_cast<uintptr_t>(r->m_dealloc), + head.m_label); + s << buffer; + r = r->m_next; + } while (r != &derived_t::s_root_record); + } else { + do { + if (r->m_alloc_ptr) { + Kokkos::Impl::DeepCopy<HostSpace, MemorySpace>( + &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); + + // Formatting dependent on sizeof(uintptr_t) + const char* format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = "%s [ 0x%.12lx + %ld ] %s\n"; + } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = "%s [ 0x%.12llx + %ld ] %s\n"; + } + + snprintf( + buffer, 256, format_string, MemorySpace::execution_space::name(), + reinterpret_cast<uintptr_t>(r->data()), r->size(), head.m_label); + } else { + snprintf(buffer, 256, "%s [ 0 + 0 ]\n", + MemorySpace::execution_space::name()); + } + s << buffer; + r = r->m_next; + } while (r != &derived_t::s_root_record); + } +#else + Kokkos::Impl::throw_runtime_exception( + std::string("SharedAllocationHeader<") + + std::string(MemorySpace::name()) + + std::string( + ">::print_records only works with KOKKOS_ENABLE_DEBUG enabled")); +#endif +} + +template <class MemorySpace> +auto HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::get_record( + void* alloc_ptr) -> derived_t* { + // Copy the header from the allocation + SharedAllocationHeader head; + + SharedAllocationHeader const* const head_cuda = + alloc_ptr ? SharedAllocationHeader::get_header(alloc_ptr) : nullptr; + + if (alloc_ptr) { + Kokkos::Impl::DeepCopy<HostSpace, MemorySpace>( + &head, head_cuda, sizeof(SharedAllocationHeader)); + } + + derived_t* const record = + alloc_ptr ? static_cast<derived_t*>(head.m_record) : nullptr; + + if (!alloc_ptr || record->m_alloc_ptr != head_cuda) { + Kokkos::Impl::throw_runtime_exception( + std::string("Kokkos::Impl::SharedAllocationRecord<") + + std::string(MemorySpace::name()) + + std::string(", void>::get_record ERROR")); + } + + return record; +} + +template <class MemorySpace> +std::string +HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::get_label() const { + SharedAllocationHeader header; + + Kokkos::Impl::DeepCopy<Kokkos::HostSpace, MemorySpace>( + &header, this->record_base_t::head(), sizeof(SharedAllocationHeader)); + + return std::string(header.m_label); +} + +} // end namespace Impl +} // end namespace Kokkos + +#endif // KOKKOS_IMPL_SHAREDALLOC_TIMPL_HPP diff --git a/packages/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp b/packages/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp index 85de7c977c99d2e5d8867ea8dd7d945ae3482c9d..0773a0914befe4e9db3b3b79ae3c446bcb0f3ad1 100644 --- a/packages/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp @@ -225,9 +225,9 @@ class SimpleTaskScheduler Impl::DefaultDestroy<task_queue_type> >; // Allocate space for the task queue - auto* record = - record_type::allocate(memory_space(), "TaskQueue", allocation_size); - m_queue = new (record->data()) + auto* record = record_type::allocate(memory_space(), "Kokkos::TaskQueue", + allocation_size); + m_queue = new (record->data()) task_queue_type(arg_execution_space, arg_memory_space, arg_memory_pool); record->m_destroy.managed_object = m_queue; m_track.assign_allocated_record_to_uninitialized(record); diff --git a/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp b/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp index 2ae5c7863c4d25ee70769cf51e8df83e50a74861..8ac034e249f1c1d1a4309003ee77c0cbe38682de 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Spinwait.cpp @@ -49,12 +49,8 @@ #include <impl/Kokkos_Spinwait.hpp> #include <impl/Kokkos_BitOps.hpp> -#if defined(KOKKOS_ENABLE_STDTHREAD) || defined(_WIN32) #include <thread> -#elif !defined(_WIN32) -#include <sched.h> -#include <time.h> -#else +#if defined(_WIN32) #include <process.h> #include <winsock2.h> #include <windows.h> @@ -73,28 +69,14 @@ void host_thread_yield(const uint32_t i, const WaitMode mode) { if (WaitMode::ROOT != mode) { if (sleep_limit < i) { - // Attempt to put the thread to sleep for 'c' milliseconds - -#if defined(KOKKOS_ENABLE_STDTHREAD) || defined(_WIN32) - auto start = std::chrono::high_resolution_clock::now(); + // Attempt to put the thread to sleep for 'c' microseconds std::this_thread::yield(); - std::this_thread::sleep_until(start + std::chrono::nanoseconds(c * 1000)); -#else - timespec req; - req.tv_sec = 0; - req.tv_nsec = 1000 * c; - nanosleep(&req, nullptr); -#endif + std::this_thread::sleep_for(std::chrono::microseconds(c)); } else if (mode == WaitMode::PASSIVE || yield_limit < i) { // Attempt to yield thread resources to runtime - -#if defined(KOKKOS_ENABLE_STDTHREAD) || defined(_WIN32) std::this_thread::yield(); -#else - sched_yield(); -#endif } #if defined(KOKKOS_ENABLE_ASM) diff --git a/packages/kokkos/core/src/impl/Kokkos_Traits.hpp b/packages/kokkos/core/src/impl/Kokkos_Traits.hpp index ed0ede86e0bc3638e5e9458caa5f10d83578ee93..d88230f5b247829dbf6e8ee79b111cb2d1309118 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Traits.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Traits.hpp @@ -147,13 +147,6 @@ struct are_integral<T, Args...> { namespace Kokkos { namespace Impl { -//---------------------------------------------------------------------------- - -template <class, class T = void> -struct enable_if_type { - using type = T; -}; - //---------------------------------------------------------------------------- // if_ diff --git a/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp b/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp index facc8ba5b08d779ebd5053d5a952796e069ef9ef..cb8cf281ae06fe0a71862b47428a2ffa12f4bd67 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Utilities.hpp @@ -49,6 +49,7 @@ #include <cstdint> #include <type_traits> #include <initializer_list> // in-order comma operator fold emulation +#include <utility> // integer_sequence and friends //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -56,338 +57,6 @@ namespace Kokkos { namespace Impl { -//---------------------------------------- -// C++14 integer sequence -template <typename T, T... Ints> -struct integer_sequence { - using value_type = T; - static constexpr std::size_t size() noexcept { return sizeof...(Ints); } -}; - -template <typename T, std::size_t N> -struct make_integer_sequence_helper; - -template <typename T, T N> -using make_integer_sequence = typename make_integer_sequence_helper<T, N>::type; - -template <typename T> -struct make_integer_sequence_helper<T, 0> { - using type = integer_sequence<T>; -}; - -template <typename T> -struct make_integer_sequence_helper<T, 1> { - using type = integer_sequence<T, 0>; -}; - -template <typename T> -struct make_integer_sequence_helper<T, 2> { - using type = integer_sequence<T, 0, 1>; -}; - -template <typename T> -struct make_integer_sequence_helper<T, 3> { - using type = integer_sequence<T, 0, 1, 2>; -}; - -template <typename T> -struct make_integer_sequence_helper<T, 4> { - using type = integer_sequence<T, 0, 1, 2, 3>; -}; - -template <typename T> -struct make_integer_sequence_helper<T, 5> { - using type = integer_sequence<T, 0, 1, 2, 3, 4>; -}; - -template <typename T> -struct make_integer_sequence_helper<T, 6> { - using type = integer_sequence<T, 0, 1, 2, 3, 4, 5>; -}; - -template <typename T> -struct make_integer_sequence_helper<T, 7> { - using type = integer_sequence<T, 0, 1, 2, 3, 4, 5, 6>; -}; - -template <typename T> -struct make_integer_sequence_helper<T, 8> { - using type = integer_sequence<T, 0, 1, 2, 3, 4, 5, 6, 7>; -}; - -template <typename X, typename Y> -struct make_integer_sequence_concat; - -template <typename T, T... x, T... y> -struct make_integer_sequence_concat<integer_sequence<T, x...>, - integer_sequence<T, y...>> { - using type = integer_sequence<T, x..., (sizeof...(x) + y)...>; -}; - -template <typename T, std::size_t N> -struct make_integer_sequence_helper { - using type = typename make_integer_sequence_concat< - typename make_integer_sequence_helper<T, N / 2>::type, - typename make_integer_sequence_helper<T, N - N / 2>::type>::type; -}; - -//---------------------------------------- - -template <std::size_t... Indices> -using index_sequence = integer_sequence<std::size_t, Indices...>; - -template <std::size_t N> -using make_index_sequence = make_integer_sequence<std::size_t, N>; - -//---------------------------------------- - -template <unsigned I, typename IntegerSequence> -struct integer_sequence_at; - -template <unsigned I, typename T, T h0, T... tail> -struct integer_sequence_at<I, integer_sequence<T, h0, tail...>> - : public integer_sequence_at<I - 1u, integer_sequence<T, tail...>> { - static_assert(8 <= I, "Reasoning Error"); - static_assert(I < integer_sequence<T, h0, tail...>::size(), - "Error: Index out of bounds"); -}; - -template <typename T, T h0, T... tail> -struct integer_sequence_at<0u, integer_sequence<T, h0, tail...>> { - using type = T; - static constexpr T value = h0; -}; - -template <typename T, T h0, T h1, T... tail> -struct integer_sequence_at<1u, integer_sequence<T, h0, h1, tail...>> { - using type = T; - static constexpr T value = h1; -}; - -template <typename T, T h0, T h1, T h2, T... tail> -struct integer_sequence_at<2u, integer_sequence<T, h0, h1, h2, tail...>> { - using type = T; - static constexpr T value = h2; -}; - -template <typename T, T h0, T h1, T h2, T h3, T... tail> -struct integer_sequence_at<3u, integer_sequence<T, h0, h1, h2, h3, tail...>> { - using type = T; - static constexpr T value = h3; -}; - -template <typename T, T h0, T h1, T h2, T h3, T h4, T... tail> -struct integer_sequence_at<4u, - integer_sequence<T, h0, h1, h2, h3, h4, tail...>> { - using type = T; - static constexpr T value = h4; -}; - -template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T... tail> -struct integer_sequence_at< - 5u, integer_sequence<T, h0, h1, h2, h3, h4, h5, tail...>> { - using type = T; - static constexpr T value = h5; -}; - -template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T... tail> -struct integer_sequence_at< - 6u, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, tail...>> { - using type = T; - static constexpr T value = h6; -}; - -template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T h7, T... tail> -struct integer_sequence_at< - 7u, integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, h7, tail...>> { - using type = T; - static constexpr T value = h7; -}; - -//---------------------------------------- - -template <typename T> -constexpr T at(const unsigned, integer_sequence<T>) noexcept { - return ~static_cast<T>(0); -} - -template <typename T, T h0, T... tail> -constexpr T at(const unsigned i, integer_sequence<T, h0>) noexcept { - return i == 0u ? h0 : ~static_cast<T>(0); -} - -template <typename T, T h0, T h1> -constexpr T at(const unsigned i, integer_sequence<T, h0, h1>) noexcept { - return i == 0u ? h0 : i == 1u ? h1 : ~static_cast<T>(0); -} - -template <typename T, T h0, T h1, T h2> -constexpr T at(const unsigned i, integer_sequence<T, h0, h1, h2>) noexcept { - return i == 0u ? h0 : i == 1u ? h1 : i == 2u ? h2 : ~static_cast<T>(0); -} - -template <typename T, T h0, T h1, T h2, T h3> -constexpr T at(const unsigned i, integer_sequence<T, h0, h1, h2, h3>) noexcept { - return i == 0u - ? h0 - : i == 1u ? h1 : i == 2u ? h2 : i == 3u ? h3 : ~static_cast<T>(0); -} - -template <typename T, T h0, T h1, T h2, T h3, T h4> -constexpr T at(const unsigned i, - integer_sequence<T, h0, h1, h2, h3, h4>) noexcept { - return i == 0u - ? h0 - : i == 1u - ? h1 - : i == 2u ? h2 - : i == 3u ? h3 : i == 4u ? h4 : ~static_cast<T>(0); -} - -template <typename T, T h0, T h1, T h2, T h3, T h4, T h5> -constexpr T at(const unsigned i, - integer_sequence<T, h0, h1, h2, h3, h4, h5>) noexcept { - return i == 0u - ? h0 - : i == 1u - ? h1 - : i == 2u ? h2 - : i == 3u ? h3 - : i == 4u ? h4 - : i == 5u ? h5 - : ~static_cast<T>(0); -} - -template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6> -constexpr T at(const unsigned i, - integer_sequence<T, h0, h1, h2, h3, h4, h5, h6>) noexcept { - return i == 0u - ? h0 - : i == 1u - ? h1 - : i == 2u - ? h2 - : i == 3u - ? h3 - : i == 4u - ? h4 - : i == 5u - ? h5 - : i == 6u ? h6 : ~static_cast<T>(0); -} - -template <typename T, T h0, T h1, T h2, T h3, T h4, T h5, T h6, T h7, T... tail> -constexpr T at( - const unsigned i, - integer_sequence<T, h0, h1, h2, h3, h4, h5, h6, h7, tail...>) noexcept { - return i == 0u - ? h0 - : i == 1u - ? h1 - : i == 2u - ? h2 - : i == 3u - ? h3 - : i == 4u - ? h4 - : i == 5u - ? h5 - : i == 6u - ? h6 - : i == 7u - ? h7 - : at(i - 8u, - integer_sequence< - T, tail...>{}); -} - -//---------------------------------------- - -template <typename IntegerSequence, - typename ResultSequence = - integer_sequence<typename IntegerSequence::value_type>> -struct reverse_integer_sequence_helper; - -template <typename T, T h0, T... tail, T... results> -struct reverse_integer_sequence_helper<integer_sequence<T, h0, tail...>, - integer_sequence<T, results...>> - : public reverse_integer_sequence_helper< - integer_sequence<T, tail...>, integer_sequence<T, h0, results...>> {}; - -template <typename T, T... results> -struct reverse_integer_sequence_helper<integer_sequence<T>, - integer_sequence<T, results...>> { - using type = integer_sequence<T, results...>; -}; - -template <typename IntegerSequence> -using reverse_integer_sequence = - typename reverse_integer_sequence_helper<IntegerSequence>::type; - -//---------------------------------------- - -template <typename IntegerSequence, typename Result, - typename ResultSequence = - integer_sequence<typename IntegerSequence::value_type>> -struct exclusive_scan_integer_sequence_helper; - -template <typename T, T h0, T... tail, typename Result, T... results> -struct exclusive_scan_integer_sequence_helper< - integer_sequence<T, h0, tail...>, Result, integer_sequence<T, results...>> - : public exclusive_scan_integer_sequence_helper< - integer_sequence<T, tail...>, - std::integral_constant<T, Result::value + h0>, - integer_sequence<T, 0, (results + h0)...>> {}; - -template <typename T, typename Result, T... results> -struct exclusive_scan_integer_sequence_helper<integer_sequence<T>, Result, - integer_sequence<T, results...>> { - using type = integer_sequence<T, results...>; - static constexpr T value = Result::value; -}; - -template <typename IntegerSequence> -struct exclusive_scan_integer_sequence { - using value_type = typename IntegerSequence::value_type; - using helper = exclusive_scan_integer_sequence_helper< - reverse_integer_sequence<IntegerSequence>, - std::integral_constant<value_type, 0>>; - using type = typename helper::type; - static constexpr value_type value = helper::value; -}; - -//---------------------------------------- - -template <typename IntegerSequence, typename Result, - typename ResultSequence = - integer_sequence<typename IntegerSequence::value_type>> -struct inclusive_scan_integer_sequence_helper; - -template <typename T, T h0, T... tail, typename Result, T... results> -struct inclusive_scan_integer_sequence_helper< - integer_sequence<T, h0, tail...>, Result, integer_sequence<T, results...>> - : public inclusive_scan_integer_sequence_helper< - integer_sequence<T, tail...>, - std::integral_constant<T, Result::value + h0>, - integer_sequence<T, h0, (results + h0)...>> {}; - -template <typename T, typename Result, T... results> -struct inclusive_scan_integer_sequence_helper<integer_sequence<T>, Result, - integer_sequence<T, results...>> { - using type = integer_sequence<T, results...>; - static constexpr T value = Result::value; -}; - -template <typename IntegerSequence> -struct inclusive_scan_integer_sequence { - using value_type = typename IntegerSequence::value_type; - using helper = inclusive_scan_integer_sequence_helper< - reverse_integer_sequence<IntegerSequence>, - std::integral_constant<value_type, 0>>; - using type = typename helper::type; - static constexpr value_type value = helper::value; -}; - template <typename T> struct identity { using type = T; @@ -396,6 +65,21 @@ struct identity { template <typename T> using identity_t = typename identity<T>::type; +struct not_a_type { + not_a_type() = delete; + ~not_a_type() = delete; + not_a_type(not_a_type const&) = delete; + void operator=(not_a_type const&) = delete; +}; + +#if defined(__cpp_lib_void_t) +// since C++17 +using std::void_t; +#else +template <class...> +using void_t = void; +#endif + //============================================================================== // <editor-fold desc="remove_cvref_t"> {{{1 @@ -467,6 +151,16 @@ struct destruct_delete { }; //============================================================================== +//============================================================================== +// <editor-fold desc="type_list"> {{{1 + +// An intentionally uninstantiateable type_list for metaprogramming purposes +template <class...> +struct type_list; + +// </editor-fold> end type_list }}}1 +//============================================================================== + } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp index 267b35f66584c96b4115486aa455bea6e2fb7ae2..6915622352e47d25efa34ae687f3e4f190150974 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp @@ -122,47 +122,47 @@ struct ViewOffset< is_array_layout<Layout>::value && is_array_layout_tiled<Layout>::value)>::type> { public: - // enum { outer_pattern = Layout::outer_pattern }; - // enum { inner_pattern = Layout::inner_pattern }; static constexpr Kokkos::Iterate outer_pattern = Layout::outer_pattern; static constexpr Kokkos::Iterate inner_pattern = Layout::inner_pattern; - enum { VORank = Dimension::rank }; - - enum : unsigned { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) }; - enum : unsigned { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) }; - enum : unsigned { SHIFT_2 = Kokkos::Impl::integral_power_of_two(Layout::N2) }; - enum : unsigned { SHIFT_3 = Kokkos::Impl::integral_power_of_two(Layout::N3) }; - enum : unsigned { SHIFT_4 = Kokkos::Impl::integral_power_of_two(Layout::N4) }; - enum : unsigned { SHIFT_5 = Kokkos::Impl::integral_power_of_two(Layout::N5) }; - enum : unsigned { SHIFT_6 = Kokkos::Impl::integral_power_of_two(Layout::N6) }; - enum : unsigned { SHIFT_7 = Kokkos::Impl::integral_power_of_two(Layout::N7) }; - enum { MASK_0 = Layout::N0 - 1 }; - enum { MASK_1 = Layout::N1 - 1 }; - enum { MASK_2 = Layout::N2 - 1 }; - enum { MASK_3 = Layout::N3 - 1 }; - enum { MASK_4 = Layout::N4 - 1 }; - enum { MASK_5 = Layout::N5 - 1 }; - enum { MASK_6 = Layout::N6 - 1 }; - enum { MASK_7 = Layout::N7 - 1 }; - - enum : unsigned { SHIFT_2T = SHIFT_0 + SHIFT_1 }; - enum : unsigned { SHIFT_3T = SHIFT_0 + SHIFT_1 + SHIFT_2 }; - enum : unsigned { SHIFT_4T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 }; - enum : unsigned { - SHIFT_5T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 - }; - enum : unsigned { - SHIFT_6T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 - }; - enum : unsigned { - SHIFT_7T = - SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6 - }; - enum : unsigned { - SHIFT_8T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + - SHIFT_6 + SHIFT_7 - }; + static constexpr int VORank = Dimension::rank; + + static constexpr unsigned SHIFT_0 = + Kokkos::Impl::integral_power_of_two(Layout::N0); + static constexpr unsigned SHIFT_1 = + Kokkos::Impl::integral_power_of_two(Layout::N1); + static constexpr unsigned SHIFT_2 = + Kokkos::Impl::integral_power_of_two(Layout::N2); + static constexpr unsigned SHIFT_3 = + Kokkos::Impl::integral_power_of_two(Layout::N3); + static constexpr unsigned SHIFT_4 = + Kokkos::Impl::integral_power_of_two(Layout::N4); + static constexpr unsigned SHIFT_5 = + Kokkos::Impl::integral_power_of_two(Layout::N5); + static constexpr unsigned SHIFT_6 = + Kokkos::Impl::integral_power_of_two(Layout::N6); + static constexpr unsigned SHIFT_7 = + Kokkos::Impl::integral_power_of_two(Layout::N7); + static constexpr int MASK_0 = Layout::N0 - 1; + static constexpr int MASK_1 = Layout::N1 - 1; + static constexpr int MASK_2 = Layout::N2 - 1; + static constexpr int MASK_3 = Layout::N3 - 1; + static constexpr int MASK_4 = Layout::N4 - 1; + static constexpr int MASK_5 = Layout::N5 - 1; + static constexpr int MASK_6 = Layout::N6 - 1; + static constexpr int MASK_7 = Layout::N7 - 1; + + static constexpr unsigned SHIFT_2T = SHIFT_0 + SHIFT_1; + static constexpr unsigned SHIFT_3T = SHIFT_0 + SHIFT_1 + SHIFT_2; + static constexpr unsigned SHIFT_4T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3; + static constexpr unsigned SHIFT_5T = + SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4; + static constexpr unsigned SHIFT_6T = + SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5; + static constexpr unsigned SHIFT_7T = + SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6; + static constexpr unsigned SHIFT_8T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + + SHIFT_4 + SHIFT_5 + SHIFT_6 + SHIFT_7; // Is an irregular layout that does not have uniform striding for each index. using is_mapping_plugin = std::true_type; @@ -659,6 +659,91 @@ struct ViewOffset< : 0) {} }; +// FIXME Remove the out-of-class definitions when we require C++17 +#define KOKKOS_ITERATE_VIEW_OFFSET_ENABLE \ + typename std::enable_if<((Dimension::rank <= 8) && (Dimension::rank >= 2) && \ + is_array_layout<Layout>::value && \ + is_array_layout_tiled<Layout>::value)>::type +template <class Dimension, class Layout> +constexpr Kokkos::Iterate ViewOffset< + Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::outer_pattern; +template <class Dimension, class Layout> +constexpr Kokkos::Iterate ViewOffset< + Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::inner_pattern; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::VORank; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_0; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_1; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_2; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_3; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_4; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_5; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_6; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_7; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::MASK_0; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::MASK_1; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::MASK_2; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::MASK_3; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::MASK_4; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::MASK_5; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::MASK_6; +template <class Dimension, class Layout> +constexpr int + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::MASK_7; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_2T; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_3T; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_4T; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_5T; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_6T; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_7T; +template <class Dimension, class Layout> +constexpr unsigned + ViewOffset<Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::SHIFT_8T; +#undef KOKKOS_ITERATE_VIEW_OFFSET_ENABLE + //---------------------------------------- // ViewMapping assign method needed in order to return a 'subview' tile as a @@ -687,8 +772,8 @@ class ViewMapping< N6, N7, true>; using src_traits = Kokkos::ViewTraits<T**, src_layout, P...>; - enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; - enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); + static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); using array_layout = typename std::conditional<is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; @@ -739,8 +824,8 @@ class ViewMapping<typename std::enable_if<(N3 == 0 && N4 == 0 && N5 == 0 && N6, N7, true>; using src_traits = Kokkos::ViewTraits<T***, src_layout, P...>; - enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; - enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); + static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); using array_layout = typename std::conditional<is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; @@ -797,8 +882,8 @@ class ViewMapping<typename std::enable_if<(N4 == 0 && N5 == 0 && N6 == 0 && N6, N7, true>; using src_traits = Kokkos::ViewTraits<T****, src_layout, P...>; - enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; - enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); + static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); using array_layout = typename std::conditional<is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; @@ -860,8 +945,8 @@ class ViewMapping< N6, N7, true>; using src_traits = Kokkos::ViewTraits<T*****, src_layout, P...>; - enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; - enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); + static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); using array_layout = typename std::conditional<is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; @@ -928,8 +1013,8 @@ class ViewMapping<typename std::enable_if<(N6 == 0 && N7 == 0)>::type // void N6, N7, true>; using src_traits = Kokkos::ViewTraits<T******, src_layout, P...>; - enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; - enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); + static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); using array_layout = typename std::conditional<is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; @@ -1002,8 +1087,8 @@ class ViewMapping<typename std::enable_if<(N7 == 0)>::type // void N6, N7, true>; using src_traits = Kokkos::ViewTraits<T*******, src_layout, P...>; - enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; - enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); + static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); using array_layout = typename std::conditional<is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; @@ -1085,8 +1170,8 @@ class ViewMapping<typename std::enable_if<(N0 != 0 && N1 != 0 && N2 != 0 && N6, N7, true>; using src_traits = Kokkos::ViewTraits<T********, src_layout, P...>; - enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; - enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); + static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); using array_layout = typename std::conditional<is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight>::type; diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index d72b802511d8147cd4caff158e3c335b49aaaaf2..a380a306931f4150e95b6f433c8bb076b091c456 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -2684,7 +2684,7 @@ struct ViewDataHandle< template <class SrcHandleType> KOKKOS_INLINE_FUNCTION static handle_type assign( const SrcHandleType& arg_handle, size_t offset) { - return handle_type(arg_handle.ptr + offset); + return handle_type(arg_handle + offset); } }; @@ -3846,8 +3846,8 @@ template <class T, class Enable = void> struct has_printable_label_typedef : public std::false_type {}; template <class T> -struct has_printable_label_typedef< - T, typename enable_if_type<typename T::printable_label_typedef>::type> +struct has_printable_label_typedef<T, + void_t<typename T::printable_label_typedef>> : public std::true_type {}; template <class MapType> diff --git a/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp b/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a5f5406746befc984f17f815e04bac63f0fadff4 --- /dev/null +++ b/packages/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp @@ -0,0 +1,73 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SETUP_SYCL_HPP_ +#define KOKKOS_SETUP_SYCL_HPP_ + +#include <CL/sycl.hpp> + +#ifdef __SYCL_DEVICE_ONLY__ +#ifdef KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF +namespace Kokkos { +namespace ImplSYCL { +template <typename... Args> +void sink(Args&&... args) { + (void)(sizeof...(args)); +} +} // namespace ImplSYCL +} // namespace Kokkos +#define KOKKOS_IMPL_DO_NOT_USE_PRINTF(...) \ + do { \ + Kokkos::ImplSYCL::sink(__VA_ARGS__); \ + } while (0) +#else +#define KOKKOS_IMPL_DO_NOT_USE_PRINTF(format, ...) \ + do { \ + static const __attribute__((opencl_constant)) char fmt[] = (format); \ + sycl::ONEAPI::experimental::printf(fmt, ##__VA_ARGS__); \ + } while (0) +#endif +#endif + +#endif diff --git a/packages/kokkos/core/src/traits/Kokkos_ExecutionSpaceTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_ExecutionSpaceTrait.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4467b2e03c486d07d80c3fee66e6c3b50c42256e --- /dev/null +++ b/packages/kokkos/core/src/traits/Kokkos_ExecutionSpaceTrait.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_EXECUTIONSPACETRAIT_HPP +#define KOKKOS_KOKKOS_EXECUTIONSPACETRAIT_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_Concepts.hpp> // is_execution_space +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> +#include <traits/Kokkos_Traits_fwd.hpp> + +namespace Kokkos { +namespace Impl { + +//============================================================================== +// <editor-fold desc="trait specification"> {{{1 + +struct ExecutionSpaceTrait : TraitSpecificationBase<ExecutionSpaceTrait> { + struct base_traits { + static constexpr auto execution_space_is_defaulted = true; + + using execution_space = Kokkos::DefaultExecutionSpace; + }; + template <class T> + using trait_matches_specification = is_execution_space<T>; +}; + +// </editor-fold> end trait specification }}}1 +//============================================================================== + +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 + +template <class ExecutionSpace, class... Traits> +struct AnalyzeExecPolicy< + std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>, + ExecutionSpace, Traits...> : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + + static_assert(base_t::execution_space_is_defaulted, + "Kokkos Error: More than one execution space given"); + + static constexpr bool execution_space_is_defaulted = false; + + using execution_space = ExecutionSpace; +}; + +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== +} // end namespace Impl +} // end namespace Kokkos + +#endif // KOKKOS_KOKKOS_EXECUTIONSPACETRAIT_HPP diff --git a/packages/kokkos/algorithms/unit_tests/TestHPX.cpp b/packages/kokkos/core/src/traits/Kokkos_GraphKernelTrait.hpp similarity index 58% rename from packages/kokkos/algorithms/unit_tests/TestHPX.cpp rename to packages/kokkos/core/src/traits/Kokkos_GraphKernelTrait.hpp index 2981e97945cb45065452a8f5330b0b35a9f4c65c..eb649dc0887a2aab8c88feae8156676b70a7cdf7 100644 --- a/packages/kokkos/algorithms/unit_tests/TestHPX.cpp +++ b/packages/kokkos/core/src/traits/Kokkos_GraphKernelTrait.hpp @@ -42,46 +42,46 @@ //@HEADER */ +#ifndef KOKKOS_KOKKOS_GRAPHKERNELTRAIT_HPP +#define KOKKOS_KOKKOS_GRAPHKERNELTRAIT_HPP + #include <Kokkos_Macros.hpp> -#ifdef KOKKOS_ENABLE_HPX +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> +#include <impl/Kokkos_GraphImpl_fwd.hpp> // IsGraphKernelTag +#include <traits/Kokkos_Traits_fwd.hpp> +#include <impl/Kokkos_Utilities.hpp> -#include <gtest/gtest.h> -#include <Kokkos_Core.hpp> +namespace Kokkos { +namespace Impl { -//---------------------------------------------------------------------------- -#include <TestRandom.hpp> -#include <TestSort.hpp> -#include <iomanip> +//============================================================================== +// <editor-fold desc="trait specification"> {{{1 -namespace Test { +struct GraphKernelTrait : TraitSpecificationBase<GraphKernelTrait> { + struct base_traits { + using is_graph_kernel = std::false_type; + }; + template <class T> + using trait_matches_specification = std::is_same<T, IsGraphKernelTag>; +}; -#define HPX_RANDOM_XORSHIFT64(num_draws) \ - TEST(hpx, Random_XorShift64) { \ - Impl::test_random< \ - Kokkos::Random_XorShift64_Pool<Kokkos::Experimental::HPX> >( \ - num_draws); \ - } +// </editor-fold> end trait specification }}}1 +//============================================================================== -#define HPX_RANDOM_XORSHIFT1024(num_draws) \ - TEST(hpx, Random_XorShift1024) { \ - Impl::test_random< \ - Kokkos::Random_XorShift1024_Pool<Kokkos::Experimental::HPX> >( \ - num_draws); \ - } +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 -#define HPX_SORT_UNSIGNED(size) \ - TEST(hpx, SortUnsigned) { \ - Impl::test_sort<Kokkos::Experimental::HPX, unsigned>(size); \ - } +template <class... Traits> +struct AnalyzeExecPolicy<void, Impl::IsGraphKernelTag, Traits...> + : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + using is_graph_kernel = std::true_type; +}; -HPX_RANDOM_XORSHIFT64(10240000) -HPX_RANDOM_XORSHIFT1024(10130144) -HPX_SORT_UNSIGNED(171) +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== +} // end namespace Impl +} // end namespace Kokkos -#undef HPX_RANDOM_XORSHIFT64 -#undef HPX_RANDOM_XORSHIFT1024 -#undef HPX_SORT_UNSIGNED -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTHPX_PREVENT_LINK_ERROR() {} -#endif +#endif // KOKKOS_KOKKOS_GRAPHKERNELTRAIT_HPP diff --git a/packages/kokkos/core/src/traits/Kokkos_IndexTypeTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_IndexTypeTrait.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e15adc17116cb66481f90acc0b9ba5a83ec1ab52 --- /dev/null +++ b/packages/kokkos/core/src/traits/Kokkos_IndexTypeTrait.hpp @@ -0,0 +1,107 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_INDEXTYPETRAIT_HPP +#define KOKKOS_KOKKOS_INDEXTYPETRAIT_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_Concepts.hpp> // IndexType, is_index_type +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> +#include <traits/Kokkos_Traits_fwd.hpp> + +namespace Kokkos { +namespace Impl { + +//============================================================================== +// <editor-fold desc="trait specification"> {{{1 + +struct IndexTypeTrait : TraitSpecificationBase<IndexTypeTrait> { + struct base_traits { + static constexpr bool index_type_is_defaulted = true; + using index_type = dependent_policy_trait_default; + }; + template <class T> + using trait_matches_specification = + std::integral_constant<bool, std::is_integral<T>::value || + is_index_type<T>::value>; +}; + +// </editor-fold> end trait specification }}}1 +//============================================================================== + +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 + +// Index type given as IndexType template +template <class IntegralIndexType, class... Traits> +struct AnalyzeExecPolicy<void, Kokkos::IndexType<IntegralIndexType>, Traits...> + : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + static_assert(base_t::index_type_is_defaulted, + "Kokkos Error: More than one index type given"); + static constexpr bool index_type_is_defaulted = false; + using index_type = Kokkos::IndexType<IntegralIndexType>; +}; + +// IndexType given as an integral type directly +template <class IntegralIndexType, class... Traits> +struct AnalyzeExecPolicy< + std::enable_if_t<std::is_integral<IntegralIndexType>::value>, + IntegralIndexType, Traits...> : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + static_assert(base_t::index_type_is_defaulted, + "Kokkos Error: More than one index type given"); + static constexpr bool index_type_is_defaulted = false; + using index_type = Kokkos::IndexType<IntegralIndexType>; +}; + +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== + +} // end namespace Impl +} // end namespace Kokkos + +#endif // KOKKOS_KOKKOS_INDEXTYPETRAIT_HPP diff --git a/packages/kokkos/algorithms/unit_tests/TestSerial.cpp b/packages/kokkos/core/src/traits/Kokkos_IterationPatternTrait.hpp similarity index 54% rename from packages/kokkos/algorithms/unit_tests/TestSerial.cpp rename to packages/kokkos/core/src/traits/Kokkos_IterationPatternTrait.hpp index 2eacdc2677184988c226bbaca13827a4b55cccff..30e07039a405d61f2c78217284f9036a0a533f06 100644 --- a/packages/kokkos/algorithms/unit_tests/TestSerial.cpp +++ b/packages/kokkos/core/src/traits/Kokkos_IterationPatternTrait.hpp @@ -42,47 +42,47 @@ //@HEADER */ -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_ENABLE_SERIAL +#ifndef KOKKOS_KOKKOS_ITERATIONPATTERNTRAIT_HPP +#define KOKKOS_KOKKOS_ITERATIONPATTERNTRAIT_HPP -#include <gtest/gtest.h> +#include <Kokkos_Concepts.hpp> // is_iteration_pattern +#include <type_traits> // is_void -#include <Kokkos_Core.hpp> +namespace Kokkos { +namespace Impl { -#include <TestRandom.hpp> -#include <TestSort.hpp> -#include <iomanip> +//============================================================================== +// <editor-fold desc="trait specification"> {{{1 -//---------------------------------------------------------------------------- +struct IterationPatternTrait : TraitSpecificationBase<IterationPatternTrait> { + struct base_traits { + using iteration_pattern = void; // TODO set default iteration pattern + }; + template <class T> + using trait_matches_specification = is_iteration_pattern<T>; +}; -namespace Test { +// </editor-fold> end trait specification }}}1 +//============================================================================== -#define SERIAL_RANDOM_XORSHIFT64(num_draws) \ - TEST(serial, Random_XorShift64) { \ - Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Serial> >( \ - num_draws); \ - } +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 -#define SERIAL_RANDOM_XORSHIFT1024(num_draws) \ - TEST(serial, Random_XorShift1024) { \ - Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Serial> >( \ - num_draws); \ - } +template <class IterationPattern, class... Traits> +struct AnalyzeExecPolicy< + std::enable_if_t<is_iteration_pattern<IterationPattern>::value>, + IterationPattern, Traits...> : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + static_assert(std::is_void<typename base_t::iteration_pattern>::value, + "Kokkos Error: More than one iteration pattern given"); + using iteration_pattern = IterationPattern; +}; -#define SERIAL_SORT_UNSIGNED(size) \ - TEST(serial, SortUnsigned) { \ - Impl::test_sort<Kokkos::Serial, unsigned>(size); \ - } +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== -SERIAL_RANDOM_XORSHIFT64(10240000) -SERIAL_RANDOM_XORSHIFT1024(10130144) -SERIAL_SORT_UNSIGNED(171) +} // end namespace Impl +} // end namespace Kokkos -#undef SERIAL_RANDOM_XORSHIFT64 -#undef SERIAL_RANDOM_XORSHIFT1024 -#undef SERIAL_SORT_UNSIGNED - -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTSERIAL_PREVENT_LINK_ERROR() {} -#endif // KOKKOS_ENABLE_SERIAL +#endif // KOKKOS_KOKKOS_ITERATIONPATTERNTRAIT_HPP diff --git a/packages/kokkos/algorithms/unit_tests/TestCuda.cpp b/packages/kokkos/core/src/traits/Kokkos_LaunchBoundsTrait.hpp similarity index 54% rename from packages/kokkos/algorithms/unit_tests/TestCuda.cpp rename to packages/kokkos/core/src/traits/Kokkos_LaunchBoundsTrait.hpp index 86cee61f64f31800515e25a6052e8ac599ee423e..73ae8e27e2eca54412b4cbab464b1760c93d7aed 100644 --- a/packages/kokkos/algorithms/unit_tests/TestCuda.cpp +++ b/packages/kokkos/core/src/traits/Kokkos_LaunchBoundsTrait.hpp @@ -42,51 +42,50 @@ //@HEADER */ -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_ENABLE_CUDA - -#include <cstdint> -#include <iostream> -#include <iomanip> +#ifndef KOKKOS_KOKKOS_LAUNCHBOUNDSTRAIT_HPP +#define KOKKOS_KOKKOS_LAUNCHBOUNDSTRAIT_HPP -#include <gtest/gtest.h> - -#include <Kokkos_Core.hpp> +#include <Kokkos_Macros.hpp> +#include <Kokkos_Concepts.hpp> // LaunchBounds +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> +#include <traits/Kokkos_Traits_fwd.hpp> -#include <TestRandom.hpp> -#include <TestSort.hpp> +namespace Kokkos { +namespace Impl { -namespace Test { +//============================================================================== +// <editor-fold desc="trait specification"> {{{1 -void cuda_test_random_xorshift64(int num_draws) { - Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda>>(num_draws); - Impl::test_random<Kokkos::Random_XorShift64_Pool< - Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>>>(num_draws); -} +struct LaunchBoundsTrait : TraitSpecificationBase<LaunchBoundsTrait> { + struct base_traits { + static constexpr bool launch_bounds_is_defaulted = true; -void cuda_test_random_xorshift1024(int num_draws) { - Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda>>(num_draws); - Impl::test_random<Kokkos::Random_XorShift1024_Pool< - Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>>>(num_draws); -} + using launch_bounds = LaunchBounds<>; + }; + template <class T> + using trait_matches_specification = is_launch_bounds<T>; +}; -#define CUDA_RANDOM_XORSHIFT64(num_draws) \ - TEST(cuda, Random_XorShift64) { cuda_test_random_xorshift64(num_draws); } +// </editor-fold> end trait specification }}}1 +//============================================================================== -#define CUDA_RANDOM_XORSHIFT1024(num_draws) \ - TEST(cuda, Random_XorShift1024) { cuda_test_random_xorshift1024(num_draws); } +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 -#define CUDA_SORT_UNSIGNED(size) \ - TEST(cuda, SortUnsigned) { Impl::test_sort<Kokkos::Cuda, unsigned>(size); } +template <unsigned int MaxT, unsigned int MinB, class... Traits> +struct AnalyzeExecPolicy<void, Kokkos::LaunchBounds<MaxT, MinB>, Traits...> + : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + static_assert(base_t::launch_bounds_is_defaulted, + "Kokkos Error: More than one launch_bounds given"); + static constexpr bool launch_bounds_is_defaulted = false; + using launch_bounds = Kokkos::LaunchBounds<MaxT, MinB>; +}; -CUDA_RANDOM_XORSHIFT64(132141141) -CUDA_RANDOM_XORSHIFT1024(52428813) -CUDA_SORT_UNSIGNED(171) +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== +} // end namespace Impl +} // end namespace Kokkos -#undef CUDA_RANDOM_XORSHIFT64 -#undef CUDA_RANDOM_XORSHIFT1024 -#undef CUDA_SORT_UNSIGNED -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTCUDA_PREVENT_LINK_ERROR() {} -#endif /* #ifdef KOKKOS_ENABLE_CUDA */ +#endif // KOKKOS_KOKKOS_LAUNCHBOUNDSTRAIT_HPP diff --git a/packages/kokkos/core/src/traits/Kokkos_OccupancyControlTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_OccupancyControlTrait.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3deb4a94d54ddeee0a6a0712f107d61674818668 --- /dev/null +++ b/packages/kokkos/core/src/traits/Kokkos_OccupancyControlTrait.hpp @@ -0,0 +1,208 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_OCCUPANCYCONTROLTRAIT_HPP +#define KOKKOS_KOKKOS_OCCUPANCYCONTROLTRAIT_HPP + +#include <impl/Kokkos_Error.hpp> // KOKKOS_EXPECTS macro + +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> + +#include <traits/Kokkos_Traits_fwd.hpp> + +namespace Kokkos { + +namespace Experimental { + +//============================================================================== +// <editor-fold desc="Occupancy control user interface"> {{{1 + +struct MaximizeOccupancy; + +struct DesiredOccupancy { + int m_occ = 100; + explicit constexpr DesiredOccupancy(int occ) : m_occ(occ) { + KOKKOS_EXPECTS(0 <= occ && occ <= 100); + } + explicit constexpr operator int() const { return m_occ; } + constexpr int value() const { return m_occ; } + DesiredOccupancy() = default; + explicit DesiredOccupancy(MaximizeOccupancy const&) : DesiredOccupancy() {} +}; + +struct MaximizeOccupancy { + explicit MaximizeOccupancy() = default; +}; + +// </editor-fold> end Occupancy control user interface }}}1 +//============================================================================== + +} // end namespace Experimental + +namespace Impl { + +//============================================================================== +// <editor-fold desc="Occupancy control trait specification"> {{{1 + +struct OccupancyControlTrait : TraitSpecificationBase<OccupancyControlTrait> { + struct base_traits { + using occupancy_control = Kokkos::Experimental::MaximizeOccupancy; + static constexpr bool experimental_contains_desired_occupancy = false; + // Default access occupancy_control, for when it is the (stateless) default + static constexpr occupancy_control impl_get_occupancy_control() { + return occupancy_control{}; + } + }; + template <class T> + using trait_matches_specification = std::integral_constant< + bool, + std::is_same<T, Kokkos::Experimental::DesiredOccupancy>::value || + std::is_same<T, Kokkos::Experimental::MaximizeOccupancy>::value>; +}; + +// </editor-fold> end Occupancy control trait specification }}}1 +//============================================================================== + +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 + +// The DesiredOccupancy case has runtime storage, so we need to handle copies +// and assignments +template <class... Traits> +struct AnalyzeExecPolicy<void, Kokkos::Experimental::DesiredOccupancy, + Traits...> : AnalyzeExecPolicy<void, Traits...> { + public: + using base_t = AnalyzeExecPolicy<void, Traits...>; + using occupancy_control = Kokkos::Experimental::DesiredOccupancy; + static constexpr bool experimental_contains_desired_occupancy = true; + + template <class OccControl> + using with_occupancy_control = AnalyzeExecPolicy<void, OccControl, Traits...>; + + // Treat this as private, but make it public so that MSVC will still treat + // this as a standard layout class and make it the right size: storage for a + // stateful desired occupancy + // private: + occupancy_control m_desired_occupancy; + + AnalyzeExecPolicy() = default; + // Converting constructor + // Just rely on the convertibility of occupancy_control to transfer the data + template <class Other> + AnalyzeExecPolicy(ExecPolicyTraitsWithDefaults<Other> const& other) + : base_t(other), + m_desired_occupancy(other.impl_get_occupancy_control()) {} + + // Converting assignment operator + // Just rely on the convertibility of occupancy_control to transfer the data + template <class Other> + AnalyzeExecPolicy& operator=( + ExecPolicyTraitsWithDefaults<Other> const& other) { + *static_cast<base_t*>(this) = other; + this->impl_set_desired_occupancy( + occupancy_control{other.impl_get_occupancy_control()}); + return *this; + } + + // Access to occupancy control instance, usable in generic context + constexpr occupancy_control impl_get_occupancy_control() const { + return m_desired_occupancy; + } + + // Access to desired occupancy (getter and setter) + Kokkos::Experimental::DesiredOccupancy impl_get_desired_occupancy() const { + return m_desired_occupancy; + } + + void impl_set_desired_occupancy(occupancy_control desired_occupancy) { + m_desired_occupancy = desired_occupancy; + } +}; + +template <class... Traits> +struct AnalyzeExecPolicy<void, Kokkos::Experimental::MaximizeOccupancy, + Traits...> : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + using occupancy_control = Kokkos::Experimental::MaximizeOccupancy; + static constexpr bool experimental_contains_desired_occupancy = false; +}; + +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== + +} // end namespace Impl + +namespace Experimental { + +//============================================================================== +// <editor-fold desc="User interface"> {{{1 + +template <typename Policy> +auto prefer(Policy const& p, DesiredOccupancy occ) { + using new_policy_t = + Kokkos::Impl::OccupancyControlTrait::policy_with_trait<Policy, + DesiredOccupancy>; + new_policy_t pwo{p}; + pwo.impl_set_desired_occupancy(occ); + return pwo; +} + +template <typename Policy> +constexpr auto prefer(Policy const& p, MaximizeOccupancy) { + static_assert(Kokkos::is_execution_policy<Policy>::value, ""); + using new_policy_t = + Kokkos::Impl::OccupancyControlTrait::policy_with_trait<Policy, + MaximizeOccupancy>; + return new_policy_t{p}; +} + +// </editor-fold> end User interface }}}1 +//============================================================================== + +} // end namespace Experimental + +} // end namespace Kokkos + +#endif // KOKKOS_KOKKOS_OCCUPANCYCONTROLTRAIT_HPP diff --git a/packages/kokkos/core/src/traits/Kokkos_PolicyTraitAdaptor.hpp b/packages/kokkos/core/src/traits/Kokkos_PolicyTraitAdaptor.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b087dac85559bd6dc67c983bdaad1a6675cfde9b --- /dev/null +++ b/packages/kokkos/core/src/traits/Kokkos_PolicyTraitAdaptor.hpp @@ -0,0 +1,156 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <impl/Kokkos_Utilities.hpp> // type_list + +#include <traits/Kokkos_Traits_fwd.hpp> + +#ifndef KOKKOS_KOKKOS_POLICYTRAITADAPTOR_HPP +#define KOKKOS_KOKKOS_POLICYTRAITADAPTOR_HPP + +namespace Kokkos { +namespace Impl { + +//============================================================================== +// <editor-fold desc="Adapter for replacing/adding a trait"> {{{1 + +//------------------------------------------------------------------------------ + +// General strategy: given a TraitSpecification, go through the entries in the +// parameter pack of the policy template and find the first one that returns +// `true` for the nested `trait_matches_specification` variable template. If +// that nested variable template is not found these overloads should be safely +// ignored, and the trait can specialize PolicyTraitAdapterImpl to get the +// desired behavior. + +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ +// <editor-fold desc="PolicyTraitMatcher"> {{{2 + +// To handle the WorkTag case, we need more than just a predicate; we need +// something that we can default to in the unspecialized case, just like we +// do for AnalyzeExecPolicy +template <class TraitSpec, class Trait, class Enable = void> +struct PolicyTraitMatcher; + +template <class TraitSpec, class Trait> +struct PolicyTraitMatcher< + TraitSpec, Trait, + std::enable_if_t< + TraitSpec::template trait_matches_specification<Trait>::value>> + : std::true_type {}; + +// </editor-fold> end PolicyTraitMatcher }}}2 +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ +// <editor-fold desc="PolicyTraitAdaptorImpl specializations"> {{{2 + +// Matching version, replace the trait +template <class TraitSpec, template <class...> class PolicyTemplate, + class... ProcessedTraits, class MatchingTrait, + class... ToProcessTraits, class NewTrait> +struct PolicyTraitAdaptorImpl< + TraitSpec, PolicyTemplate, type_list<ProcessedTraits...>, + type_list<MatchingTrait, ToProcessTraits...>, NewTrait, + std::enable_if_t<PolicyTraitMatcher<TraitSpec, MatchingTrait>::value>> { + static_assert(PolicyTraitMatcher<TraitSpec, NewTrait>::value, ""); + using type = PolicyTemplate<ProcessedTraits..., NewTrait, ToProcessTraits...>; +}; + +// Non-matching version, check the next option +template <class TraitSpec, template <class...> class PolicyTemplate, + class... ProcessedTraits, class NonMatchingTrait, + class... ToProcessTraits, class NewTrait> +struct PolicyTraitAdaptorImpl< + TraitSpec, PolicyTemplate, type_list<ProcessedTraits...>, + type_list<NonMatchingTrait, ToProcessTraits...>, NewTrait, + std::enable_if_t<!PolicyTraitMatcher<TraitSpec, NonMatchingTrait>::value>> { + using type = typename PolicyTraitAdaptorImpl< + TraitSpec, PolicyTemplate, + type_list<ProcessedTraits..., NonMatchingTrait>, + type_list<ToProcessTraits...>, NewTrait>::type; +}; + +// Base case: no matches found; just add the trait to the end of the list +template <class TraitSpec, template <class...> class PolicyTemplate, + class... ProcessedTraits, class NewTrait> +struct PolicyTraitAdaptorImpl<TraitSpec, PolicyTemplate, + type_list<ProcessedTraits...>, type_list<>, + NewTrait> { + static_assert(PolicyTraitMatcher<TraitSpec, NewTrait>::value, ""); + using type = PolicyTemplate<ProcessedTraits..., NewTrait>; +}; + +// </editor-fold> end PolicyTraitAdaptorImpl specializations }}}2 +//------------------------------------------------------------------------------ + +template <class TraitSpec, template <class...> class PolicyTemplate, + class... Traits, class NewTrait> +struct PolicyTraitAdaptor<TraitSpec, PolicyTemplate<Traits...>, NewTrait> + : PolicyTraitAdaptorImpl<TraitSpec, PolicyTemplate, type_list<>, + type_list<Traits...>, NewTrait> {}; + +// </editor-fold> end Adapter for replacing/adding a trait }}}1 +//============================================================================== + +//============================================================================== +// <editor-fold desc="CRTP Base class for trait specifications"> {{{1 + +template <class TraitSpec> +struct TraitSpecificationBase { + using trait_specification = TraitSpec; + template <class Policy, class Trait> + using policy_with_trait = + typename PolicyTraitAdaptor<TraitSpec, Policy, Trait>::type; +}; + +// </editor-fold> end CRTP Base class for trait specifications }}}1 +//============================================================================== + +} // end namespace Impl +} // end namespace Kokkos + +#endif // KOKKOS_KOKKOS_POLICYTRAITADAPTOR_HPP diff --git a/packages/kokkos/core/src/traits/Kokkos_ScheduleTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_ScheduleTrait.hpp new file mode 100644 index 0000000000000000000000000000000000000000..74bab6fce2a632269a804971af3e50348e34c8b2 --- /dev/null +++ b/packages/kokkos/core/src/traits/Kokkos_ScheduleTrait.hpp @@ -0,0 +1,112 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_SCHEDULETRAIT_HPP +#define KOKKOS_KOKKOS_SCHEDULETRAIT_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_Concepts.hpp> // is_schedule_type, Schedule +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> +#include <traits/Kokkos_Traits_fwd.hpp> + +namespace Kokkos { + +namespace Impl { + +//============================================================================== +// <editor-fold desc="trait specification"> {{{1 + +struct ScheduleTrait : TraitSpecificationBase<ScheduleTrait> { + struct base_traits { + static constexpr auto schedule_type_is_defaulted = true; + + using schedule_type = Schedule<Static>; + }; + template <class T> + using trait_matches_specification = is_schedule_type<T>; +}; + +// </editor-fold> end trait specification }}}1 +//============================================================================== + +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 + +template <class ScheduleType, class... Traits> +struct AnalyzeExecPolicy<void, Kokkos::Schedule<ScheduleType>, Traits...> + : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + static_assert(base_t::schedule_type_is_defaulted, + "Kokkos Error: More than one schedule type given"); + static constexpr bool schedule_type_is_defaulted = false; + using schedule_type = Kokkos::Schedule<ScheduleType>; +}; + +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== + +} // end namespace Impl + +namespace Experimental { + +//============================================================================== +// <editor-fold desc="User interface"> {{{1 + +template <class Policy, class ScheduleType> +constexpr auto require(Policy const& p, Kokkos::Schedule<ScheduleType>) { + static_assert(Kokkos::is_execution_policy<Policy>::value, ""); + using new_policy_t = Kokkos::Impl::ScheduleTrait::policy_with_trait< + Policy, Kokkos::Schedule<ScheduleType>>; + return new_policy_t{p}; +} + +// </editor-fold> end User interface }}}1 +//============================================================================== + +} // end namespace Experimental + +} // end namespace Kokkos + +#endif // KOKKOS_KOKKOS_SCHEDULETRAIT_HPP diff --git a/packages/kokkos/core/src/traits/Kokkos_Traits_fwd.hpp b/packages/kokkos/core/src/traits/Kokkos_Traits_fwd.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b8b9a0ca2d889b08116528803d0c1b096060ecad --- /dev/null +++ b/packages/kokkos/core/src/traits/Kokkos_Traits_fwd.hpp @@ -0,0 +1,73 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_TRAITS_FWD_HPP +#define KOKKOS_KOKKOS_TRAITS_FWD_HPP + +namespace Kokkos { +namespace Impl { + +template <class Enable, class... TraitsList> +struct AnalyzeExecPolicy; + +template <class AnalysisResults> +struct ExecPolicyTraitsWithDefaults; + +template <class TraitSpec, template <class...> class PolicyTemplate, + class AlreadyProcessedList, class ToProcessList, class NewTrait, + class Enable = void> +struct PolicyTraitAdaptorImpl; + +template <class TraitSpec, class Policy, class NewTrait> +struct PolicyTraitAdaptor; + +// A tag class for dependent defaults that must be handled by the +// ExecPolicyTraitsWithDefaults wrapper, since their defaults depend on other +// traits +struct dependent_policy_trait_default; + +} // end namespace Impl +} // end namespace Kokkos + +#endif // KOKKOS_KOKKOS_TRAITS_FWD_HPP diff --git a/packages/kokkos/core/src/traits/Kokkos_WorkItemPropertyTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_WorkItemPropertyTrait.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2656316fb934333655d0370f4dc3d40eea7bbb86 --- /dev/null +++ b/packages/kokkos/core/src/traits/Kokkos_WorkItemPropertyTrait.hpp @@ -0,0 +1,114 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_WORKITEMPROPERTYTRAIT_HPP +#define KOKKOS_KOKKOS_WORKITEMPROPERTYTRAIT_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_Concepts.hpp> // WorkItemProperty +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> +#include <traits/Kokkos_Traits_fwd.hpp> + +namespace Kokkos { +namespace Impl { + +//============================================================================== +// <editor-fold desc="trait specification"> {{{1 + +struct WorkItemPropertyTrait : TraitSpecificationBase<WorkItemPropertyTrait> { + struct base_traits { + using work_item_property = Kokkos::Experimental::WorkItemProperty::None_t; + }; + template <class T> + using trait_matches_specification = + Kokkos::Experimental::is_work_item_property<T>; +}; + +// </editor-fold> end trait specification }}}1 +//============================================================================== + +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 + +template <class Property, class... Traits> +struct AnalyzeExecPolicy< + std::enable_if_t< + Kokkos::Experimental::is_work_item_property<Property>::value>, + Property, Traits...> : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + static_assert( + std::is_same<typename base_t::work_item_property, + Kokkos::Experimental::WorkItemProperty::None_t>::value, + "Kokkos Error: More than one work item property given"); + using work_item_property = Property; +}; + +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== + +} // end namespace Impl + +namespace Experimental { + +//============================================================================== +// <editor-fold desc="User interface"> {{{1 + +template <class Policy, unsigned long Property> +constexpr auto require(const Policy p, + WorkItemProperty::ImplWorkItemProperty<Property>) { + static_assert(Kokkos::is_execution_policy<Policy>::value, ""); + using new_policy_t = Kokkos::Impl::WorkItemPropertyTrait::policy_with_trait< + Policy, WorkItemProperty::ImplWorkItemProperty<Property>>; + return new_policy_t{p}; +} + +// </editor-fold> end User interface }}}1 +//============================================================================== + +} // namespace Experimental + +} // end namespace Kokkos + +#endif // KOKKOS_KOKKOS_WORKITEMPROPERTYTRAIT_HPP diff --git a/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp b/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp new file mode 100644 index 0000000000000000000000000000000000000000..877005756a703b067c07c6f57c3fc4212f7484ca --- /dev/null +++ b/packages/kokkos/core/src/traits/Kokkos_WorkTagTrait.hpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_WORKTAGTRAIT_HPP +#define KOKKOS_KOKKOS_WORKTAGTRAIT_HPP + +#include <Kokkos_Macros.hpp> +#include <Kokkos_Concepts.hpp> // is_execution_space +#include <traits/Kokkos_PolicyTraitAdaptor.hpp> +#include <traits/Kokkos_Traits_fwd.hpp> + +namespace Kokkos { +namespace Impl { + +//============================================================================== +// <editor-fold desc="trait specification"> {{{1 + +struct WorkTagTrait : TraitSpecificationBase<WorkTagTrait> { + struct base_traits { + using work_tag = void; + }; +}; + +// </editor-fold> end trait specification }}}1 +//============================================================================== + +//============================================================================== +// <editor-fold desc="AnalyzeExecPolicy specializations"> {{{1 + +// Since we don't have subsumption in pre-C++20, we need to have the work tag +// "trait" handling code be unspecialized, so we handle it instead in a class +// with a different name. +template <class... Traits> +struct AnalyzeExecPolicyHandleWorkTag : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; +}; + +template <class WorkTag, class... Traits> +struct AnalyzeExecPolicyHandleWorkTag<WorkTag, Traits...> + : AnalyzeExecPolicy<void, Traits...> { + using base_t = AnalyzeExecPolicy<void, Traits...>; + using base_t::base_t; + static_assert(std::is_void<typename base_t::work_tag>::value, + "Kokkos Error: More than one work tag given"); + using work_tag = WorkTag; +}; + +// This only works if this is not a partial specialization, so we have to +// do the partial specialization elsewhere +template <class Enable, class... Traits> +struct AnalyzeExecPolicy : AnalyzeExecPolicyHandleWorkTag<Traits...> { + using base_t = AnalyzeExecPolicyHandleWorkTag<Traits...>; + using base_t::base_t; +}; + +// </editor-fold> end AnalyzeExecPolicy specializations }}}1 +//============================================================================== + +//============================================================================== +// <editor-fold desc="PolicyTraitMatcher specializations"> {{{1 + +// In order to match the work tag trait the work tag "matcher" needs to be +// unspecialized and the logic needs to be handled in a differently-named class, +// just like above. +template <class TraitSpec, class Trait> +struct PolicyTraitMatcherHandleWorkTag : std::false_type {}; + +template <class Trait> +struct PolicyTraitMatcherHandleWorkTag<WorkTagTrait, Trait> + : std::integral_constant<bool, !std::is_void<Trait>::value> {}; + +template <class TraitSpec, class Trait, class Enable> +struct PolicyTraitMatcher /* unspecialized! */ + : PolicyTraitMatcherHandleWorkTag<TraitSpec, Trait> {}; + +// </editor-fold> end PolicyTraitMatcher specializations }}}1 +//============================================================================== + +} // end namespace Impl +} // end namespace Kokkos + +#endif // KOKKOS_KOKKOS_WORKTAGTRAIT_HPP diff --git a/packages/kokkos/core/unit_test/CMakeLists.txt b/packages/kokkos/core/unit_test/CMakeLists.txt index 125560db3eb5c9ca28f7c48b8b6f66cb650cbfcf..5826208851090933ee296988287a6a633eb2c476 100644 --- a/packages/kokkos/core/unit_test/CMakeLists.txt +++ b/packages/kokkos/core/unit_test/CMakeLists.txt @@ -17,9 +17,8 @@ KOKKOS_ADD_TEST_LIBRARY( TARGET_COMPILE_DEFINITIONS(kokkos_gtest PUBLIC GTEST_HAS_TR1_TUPLE=0 GTEST_HAS_PTHREAD=0) TARGET_INCLUDE_DIRECTORIES(kokkos_gtest PUBLIC ${GTEST_SOURCE_DIR}) -#Gtest minimally requires C++11 IF((NOT (Kokkos_ENABLE_CUDA AND WIN32)) AND (NOT ("${KOKKOS_CXX_COMPILER_ID}" STREQUAL "Fujitsu"))) -TARGET_COMPILE_FEATURES(kokkos_gtest PUBLIC cxx_std_11) + TARGET_COMPILE_FEATURES(kokkos_gtest PUBLIC cxx_std_14) ENDIF() # Suppress clang-tidy diagnostics on code that we do not have control over @@ -40,11 +39,18 @@ SET(KOKKOS_HPX_FEATURE_LEVEL 999) SET(KOKKOS_HPX_NAME Experimental::HPX) SET(KOKKOS_OPENMP_FEATURE_LEVEL 999) SET(KOKKOS_OPENMP_NAME OpenMP) -SET(KOKKOS_OPENMPTARGET_FEATURE_LEVEL 12) + +# FIXME_OPENMPTARGET - The NVIDIA HPC compiler nvc++ only compiles the first 8 incremental tests for the OpenMPTarget backend. +IF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) + SET(KOKKOS_OPENMPTARGET_FEATURE_LEVEL 8) +ELSE() + SET(KOKKOS_OPENMPTARGET_FEATURE_LEVEL 13) +ENDIF() + SET(KOKKOS_OPENMPTARGET_NAME Experimental::OpenMPTarget) SET(KOKKOS_SERIAL_FEATURE_LEVEL 999) SET(KOKKOS_SERIAL_NAME Serial) -SET(KOKKOS_SYCL_FEATURE_LEVEL 5) +SET(KOKKOS_SYCL_FEATURE_LEVEL 999) SET(KOKKOS_SYCL_NAME Experimental::SYCL) SET(KOKKOS_THREADS_FEATURE_LEVEL 999) SET(KOKKOS_THREADS_NAME Threads) @@ -57,6 +63,7 @@ SET(KOKKOS_THREADS_NAME Threads) #I will leave these alone for now because I don't need transitive dependencies on tests KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/core/unit_test/category_files) foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) # Because there is always an exception to the rule @@ -73,7 +80,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) # Needed to split this for Windows NVCC, since it ends up putting everything on the # command line in an intermediate compilation step even if CMake generated a response # file. That then exceeded the shell command line max length. - set(${Tag}_SOURCES1) + set(${Tag}_SOURCES1A) foreach(Name AtomicOperations_int AtomicOperations_unsignedint @@ -94,12 +101,30 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) FunctorAnalysis Init LocalDeepCopy + MathematicalFunctions MDRange_a MDRange_b MDRange_c + HostSharedPtr + HostSharedPtrAccessOnDevice + ) + set(file ${dir}/Test${Tag}_${Name}.cpp) + # Write to a temporary intermediate file and call configure_file to avoid + # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. + file(WRITE ${dir}/dummy.cpp + "#include <Test${Tag}_Category.hpp>\n" + "#include <Test${Name}.hpp>\n" + ) + configure_file(${dir}/dummy.cpp ${file}) + list(APPEND ${Tag}_SOURCES1A ${file}) + endforeach() + + set(${Tag}_SOURCES1B) + foreach(Name MDRange_d MDRange_e MDRange_f + NumericTraits Other RangePolicy RangePolicyRequire @@ -121,10 +146,10 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) "#include <Test${Name}.hpp>\n" ) configure_file(${dir}/dummy.cpp ${file}) - list(APPEND ${Tag}_SOURCES1 ${file}) + list(APPEND ${Tag}_SOURCES1B ${file}) endforeach() - SET(${Tag}_SOURCES2) + SET(${Tag}_SOURCES2A) foreach(Name TeamBasic TeamReductionScan @@ -144,9 +169,9 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) ViewMapping_b ViewMapping_subview ViewOfClass - WorkGraph - View_64bit ViewResize + View_64bit + WorkGraph ) set(file ${dir}/Test${Tag}_${Name}.cpp) # Write to a temporary intermediate file and call configure_file to avoid @@ -156,7 +181,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) "#include <Test${Name}.hpp>\n" ) configure_file(${dir}/dummy.cpp ${file}) - list(APPEND ${Tag}_SOURCES2 ${file}) + list(APPEND ${Tag}_SOURCES2A ${file}) endforeach() set(TagHostAccessible ${Tag}) @@ -164,7 +189,11 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) set(TagHostAccessible CudaUVM) elseif(Tag STREQUAL "HIP") set(TagHostAccessible HIPHostPinned) + elseif(Tag STREQUAL "SYCL") + set(TagHostAccessible SYCLSharedUSMSpace) endif() + + set(${Tag}_SOURCES2B) foreach(Name SubView_a SubView_b @@ -173,26 +202,57 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP;SYCL) SubView_c03 SubView_c04 SubView_c05 + ) + set(file ${dir}/Test${Tag}_${Name}.cpp) + # Write to a temporary intermediate file and call configure_file to avoid + # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. + file(WRITE ${dir}/dummy.cpp + "#include <Test${TagHostAccessible}_Category.hpp>\n" + "#include <Test${Name}.hpp>\n" + ) + configure_file(${dir}/dummy.cpp ${file}) + list(APPEND ${Tag}_SOURCES2B ${file}) + endforeach() + + set(${Tag}_SOURCES2C) + foreach(Name SubView_c06 SubView_c07 SubView_c08 SubView_c09 + ) + set(file ${dir}/Test${Tag}_${Name}.cpp) + # Write to a temporary intermediate file and call configure_file to avoid + # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. + file(WRITE ${dir}/dummy.cpp + "#include <Test${TagHostAccessible}_Category.hpp>\n" + "#include <Test${Name}.hpp>\n" + ) + configure_file(${dir}/dummy.cpp ${file}) + list(APPEND ${Tag}_SOURCES2C ${file}) + endforeach() + + set(${Tag}_SOURCES2D) + foreach(Name SubView_c10 SubView_c11 SubView_c12 SubView_c13 + SubView_c14 ) set(file ${dir}/Test${Tag}_${Name}.cpp) # Write to a temporary intermediate file and call configure_file to avoid # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. file(WRITE ${dir}/dummy.cpp - "#include <Test${TagHostAccessible}_Category.hpp>\n" + "#include <Test${TagHostAccessible}_Category.hpp>\n" "#include <Test${Name}.hpp>\n" ) configure_file(${dir}/dummy.cpp ${file}) - list(APPEND ${Tag}_SOURCES2 ${file}) + list(APPEND ${Tag}_SOURCES2D ${file}) endforeach() + SET(${Tag}_SOURCES1 ${${Tag}_SOURCES1A} ${${Tag}_SOURCES1B}) + SET(${Tag}_SOURCES2 ${${Tag}_SOURCES2A} ${${Tag}_SOURCES2B} ${${Tag}_SOURCES2C} ${${Tag}_SOURCES2D}) SET(${Tag}_SOURCES ${${Tag}_SOURCES1} ${${Tag}_SOURCES2}) endif() endforeach() @@ -203,29 +263,81 @@ if(Kokkos_ENABLE_OPENMPTARGET) ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations_complexdouble.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Crs.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_LocalDeepCopy.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_a.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_b.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_c.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_d.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_e.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_f.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Other.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reductions_DeviceView.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Scan.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamBasic.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamScratch.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamReductionScan.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamTeamSize.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamScan.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamVectorRange.cpp - ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_UniqueToken.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewAPI_e.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewCopy_a.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewCopy_b.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewMapping_subview.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewOfClass.cpp ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_WorkGraph.cpp - ) + ) +endif() + +# FIXME_OPENMPTARGET - Comment non-passing tests with the NVIDIA HPC compiler nvc++ +IF(KOKKOS_ENABLE_OPENMPTARGET + AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) + list(REMOVE_ITEM OpenMPTarget_SOURCES + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_UniqueToken.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_HostSharedPtr.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_HostSharedPtrAccessOnDevice.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamScratch.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TestScan.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TestTeamScan.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TestTeamReductionScan.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Atomics.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations_float.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations_int.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations_longint.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations_longlongint.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations_double.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations_unsignedint.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicOperations_unsignedlongint.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_AtomicViews.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_BlockSizeDeduction.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_a.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_b.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_c.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_d.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewMapping_b.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_TeamBasic.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Scan.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_NumericTraits.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_DeepCopyAlignment.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MathematicalFunctions.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_b.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c01.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c02.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c03.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c04.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c05.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c06.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c07.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c08.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c09.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c10.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c11.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c12.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c13.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_a.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_b.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_c.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_MDRange_d.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewAPI_a.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewAPI_b.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewAPI_c.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewAPI_d.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewAPI_f.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_ViewResize.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_RangePolicyRequire.cpp + ${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_RangePolicy.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/default/TestDefaultDeviceType_a1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/default/TestDefaultDeviceType_b1.cpp + ) endif() if(Kokkos_ENABLE_SERIAL) @@ -422,6 +534,7 @@ if(Kokkos_ENABLE_HIP) hip/TestHIPHostPinned_ViewMapping_a.cpp hip/TestHIPHostPinned_ViewMapping_b.cpp hip/TestHIPHostPinned_ViewMapping_subview.cpp + hip/TestHIP_AsyncLauncher.cpp ) KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_HIPInterOpInit @@ -438,80 +551,100 @@ if(Kokkos_ENABLE_HIP) endif() if(Kokkos_ENABLE_SYCL) - list(REMOVE_ITEM SYCL_SOURCES - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_int.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_unsignedint.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_longint.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_unsignedlongint.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_longlongint.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_double.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_float.cpp + list(REMOVE_ITEM SYCL_SOURCES1A + # FIXME_SYCL atomic_fetch_oper for large types to be implemented ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_complexdouble.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicOperations_complexfloat.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicViews.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Atomics.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Atomics.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_AtomicViews.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_BlockSizeDeduction.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Crs.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_DeepCopyAlignment.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_LocalDeepCopy.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_MDRange_a.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_MDRange_b.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_MDRange_c.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_MDRange_d.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_MDRange_e.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_MDRange_f.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Other.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Reductions.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Reducers_a.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Reducers_b.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Reducers_c.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Reducers_d.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_Reductions_DeviceView.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SharedAlloc.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c04.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c05.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c06.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c07.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c08.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c09.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c10.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c11.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_SubView_c12.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_TeamBasic.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_TeamReductionScan.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_TeamScan.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_TeamScratch.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_TeamTeamSize.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_TeamVectorRange.cpp - ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_UniqueToken.cpp + ) + + list(REMOVE_ITEM SYCL_SOURCES2A ${CMAKE_CURRENT_BINARY_DIR}/sycl/TestSYCL_WorkGraph.cpp ) KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_SYCL + UnitTest_SYCL1A + SOURCES + UnitTestMainInit.cpp + ${SYCL_SOURCES1A} + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SYCL1B + SOURCES + UnitTestMainInit.cpp + ${SYCL_SOURCES1B} + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SYCL2A + SOURCES + UnitTestMainInit.cpp + ${SYCL_SOURCES2A} + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SYCL2B + SOURCES + UnitTestMainInit.cpp + ${SYCL_SOURCES2B} + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SYCL2C + SOURCES + UnitTestMainInit.cpp + ${SYCL_SOURCES2C} + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SYCL2D SOURCES UnitTestMainInit.cpp - ${SYCL_SOURCES} + ${SYCL_SOURCES2D} + ) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SYCLInterOpInit + SOURCES + UnitTestMain.cpp + sycl/TestSYCL_InterOp_Init.cpp + ) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SYCLInterOpInit_Context + SOURCES + UnitTestMainInit.cpp + sycl/TestSYCL_InterOp_Init_Context.cpp + ) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SYCLInterOpStreams + SOURCES + UnitTestMain.cpp + sycl/TestSYCL_InterOp_Streams.cpp ) endif() -SET(DEFAULT_DEVICE_SOURCES - UnitTestMainInit.cpp - default/TestDefaultDeviceType.cpp - default/TestDefaultDeviceType_a1.cpp - default/TestDefaultDeviceType_b1.cpp - default/TestDefaultDeviceType_c1.cpp - default/TestDefaultDeviceType_a2.cpp - default/TestDefaultDeviceType_b2.cpp - default/TestDefaultDeviceType_c2.cpp - default/TestDefaultDeviceType_a3.cpp - default/TestDefaultDeviceType_b3.cpp - default/TestDefaultDeviceType_c3.cpp - default/TestDefaultDeviceType_d.cpp - default/TestDefaultDeviceTypeResize.cpp -) +# FIXME_OPENMPTARGET - Comment non-passing tests with the NVIDIA HPC compiler nvc++ +if (KOKKOS_ENABLE_OPENMPTARGET + AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) + SET(DEFAULT_DEVICE_SOURCES + UnitTestMainInit.cpp + default/TestDefaultDeviceType.cpp + ) +else() + SET(DEFAULT_DEVICE_SOURCES + UnitTestMainInit.cpp + default/TestDefaultDeviceType.cpp + default/TestDefaultDeviceType_a1.cpp + default/TestDefaultDeviceType_b1.cpp + default/TestDefaultDeviceType_c1.cpp + default/TestDefaultDeviceType_a2.cpp + default/TestDefaultDeviceType_b2.cpp + default/TestDefaultDeviceType_c2.cpp + default/TestDefaultDeviceType_a3.cpp + default/TestDefaultDeviceType_b3.cpp + default/TestDefaultDeviceType_c3.cpp + default/TestDefaultDeviceType_d.cpp + default/TestDefaultDeviceTypeResize.cpp + ) +endif() KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Default @@ -572,6 +705,10 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate SOURCES tools/printing-tool.cpp ) + if((NOT (Kokkos_ENABLE_CUDA AND WIN32)) AND (NOT ("${KOKKOS_CXX_COMPILER_ID}" STREQUAL "Fujitsu"))) + TARGET_COMPILE_FEATURES(kokkosprinter-tool PUBLIC cxx_std_14) + endif() + KOKKOS_ADD_TEST_EXECUTABLE( ProfilingAllCalls tools/TestAllCalls.cpp @@ -582,10 +719,50 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate set(SIZE_REGEX "[0-9]*") set(SKIP_SCRATCH_INITIALIZATION_REGEX ".*") - KOKKOS_ADD_TEST( NAME ProfilingTestLibraryLoad + # check help works via environment variable + KOKKOS_ADD_TEST( + SKIP_TRIBITS + NAME ProfilingTestLibraryLoadHelp + EXE ProfilingAllCalls + TOOL kokkosprinter-tool + ARGS --kokkos-tools-help + PASS_REGULAR_EXPRESSION + "kokkosp_init_library::kokkosp_print_help:KokkosCore_ProfilingAllCalls::kokkosp_finalize_library::") + + # check help works via direct library specification + KOKKOS_ADD_TEST( + SKIP_TRIBITS + NAME ProfilingTestLibraryCmdLineHelp + EXE ProfilingAllCalls + ARGS --kokkos-tools-help + --kokkos-tools-library=$<TARGET_FILE:kokkosprinter-tool> + PASS_REGULAR_EXPRESSION + "kokkosp_init_library::kokkosp_print_help:KokkosCore_ProfilingAllCalls::kokkosp_finalize_library::") + + KOKKOS_ADD_TEST( + SKIP_TRIBITS + NAME ProfilingTestLibraryLoad EXE ProfilingAllCalls TOOL kokkosprinter-tool - PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_allocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]source]:0:0::kokkosp_end_parallel_for:0::kokkosp_allocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]destination]:0:0::kokkosp_end_parallel_for:0::kokkosp_begin_deep_copy:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_end_deep_copy::kokkosp_begin_parallel_for:parallel_for:${SIZE_REGEX}:0::kokkosp_end_parallel_for:0::kokkosp_begin_parallel_reduce:parallel_reduce:${SIZE_REGEX}:1${SKIP_SCRATCH_INITIALIZATION_REGEX}::kokkosp_end_parallel_reduce:1::kokkosp_begin_parallel_scan:parallel_scan:${SIZE_REGEX}:2::kokkosp_end_parallel_scan:2::kokkosp_push_profile_region:push_region::kokkosp_pop_profile_region::kokkosp_create_profile_section:created_section:3::kokkosp_start_profile_section:3::kokkosp_stop_profile_section:3::kokkosp_destroy_profile_section:3::kokkosp_profile_event:profiling_event::kokkosp_deallocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_deallocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_finalize_library::" + ARGS --kokkos-tools-args="-c test delimit" + PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_parse_args:4:KokkosCore_ProfilingAllCalls:-c:test:delimit::.*::kokkosp_allocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]source]:0:0::kokkosp_end_parallel_for:0::kokkosp_allocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]destination]:0:0::kokkosp_end_parallel_for:0::kokkosp_begin_deep_copy:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_end_deep_copy::kokkosp_begin_parallel_for:parallel_for:${SIZE_REGEX}:0::kokkosp_end_parallel_for:0::kokkosp_begin_parallel_reduce:parallel_reduce:${SIZE_REGEX}:1${SKIP_SCRATCH_INITIALIZATION_REGEX}::kokkosp_end_parallel_reduce:1::kokkosp_begin_parallel_scan:parallel_scan:${SIZE_REGEX}:2::kokkosp_end_parallel_scan:2::kokkosp_push_profile_region:push_region::kokkosp_pop_profile_region::kokkosp_create_profile_section:created_section:3::kokkosp_start_profile_section:3::kokkosp_stop_profile_section:3::kokkosp_destroy_profile_section:3::kokkosp_profile_event:profiling_event::kokkosp_declare_metadata:dogs:good::kokkosp_deallocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_deallocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_finalize_library::" + ) + + # Above will test that leading/trailing quotes are stripped bc ctest cmd args is: + # "--kokkos-tools-args="-c test delimit"" + # The bracket argument syntax: [=[ and ]=] used below ensures it is treated as + # a single argument: + # "--kokkos-tools-args=-c test delimit" + # + # https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument + # + KOKKOS_ADD_TEST( + SKIP_TRIBITS + NAME ProfilingTestLibraryCmdLine + EXE ProfilingAllCalls + ARGS [=[--kokkos-tools-args=-c test delimit]=] + --kokkos-tools-library=$<TARGET_FILE:kokkosprinter-tool> + PASS_REGULAR_EXPRESSION "kokkosp_init_library::kokkosp_parse_args:4:KokkosCore_ProfilingAllCalls:-c:test:delimit::.*::kokkosp_allocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]source]:0:0::kokkosp_end_parallel_for:0::kokkosp_allocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_begin_parallel_for:Kokkos::View::initialization [[]destination]:0:0::kokkosp_end_parallel_for:0::kokkosp_begin_deep_copy:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_end_deep_copy::kokkosp_begin_parallel_for:parallel_for:${SIZE_REGEX}:0::kokkosp_end_parallel_for:0::kokkosp_begin_parallel_reduce:parallel_reduce:${SIZE_REGEX}:1${SKIP_SCRATCH_INITIALIZATION_REGEX}::kokkosp_end_parallel_reduce:1::kokkosp_begin_parallel_scan:parallel_scan:${SIZE_REGEX}:2::kokkosp_end_parallel_scan:2::kokkosp_push_profile_region:push_region::kokkosp_pop_profile_region::kokkosp_create_profile_section:created_section:3::kokkosp_start_profile_section:3::kokkosp_stop_profile_section:3::kokkosp_destroy_profile_section:3::kokkosp_profile_event:profiling_event::kokkosp_declare_metadata:dogs:good::kokkosp_deallocate_data:${MEMSPACE_REGEX}:destination:${ADDRESS_REGEX}:40::kokkosp_deallocate_data:${MEMSPACE_REGEX}:source:${ADDRESS_REGEX}:40::kokkosp_finalize_library::" ) endif() #KOKKOS_ENABLE_LIBDL if(NOT KOKKOS_HAS_TRILINOS) @@ -623,11 +800,6 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( ) endif() -KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_HostBarrier - SOURCES UnitTestMain.cpp TestHostBarrier.cpp -) - FUNCTION (KOKKOS_ADD_INCREMENTAL_TEST DEVICE) KOKKOS_OPTION( ${DEVICE}_EXCLUDE_TESTS "" STRING "Incremental test exclude list" ) # Add unit test main @@ -689,4 +861,6 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( ARGS "one 2 THREE" ) -add_subdirectory(headers_self_contained) +if (KOKKOS_ENABLE_HEADER_SELF_CONTAINMENT_TESTS AND NOT KOKKOS_HAS_TRILINOS) + add_subdirectory(headers_self_contained) +endif() diff --git a/packages/kokkos/core/unit_test/Makefile b/packages/kokkos/core/unit_test/Makefile index f039d889ee2762afa8ba23726d45ff298c856d49..390fc79a4755e46cbd61b28ee54d44814fa501d9 100644 --- a/packages/kokkos/core/unit_test/Makefile +++ b/packages/kokkos/core/unit_test/Makefile @@ -32,7 +32,7 @@ override LDFLAGS += -lpthread include $(KOKKOS_PATH)/Makefile.kokkos -KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/unit_test +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/unit_test -I${KOKKOS_PATH}/core/unit_test/category_files TEST_TARGETS = TARGETS = @@ -361,10 +361,6 @@ OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_HWLOC TEST_TARGETS += test-hwloc -OBJ_HOST_BARRIER = TestHostBarrier.o UnitTestMain.o gtest-all.o -TARGETS += KokkosCore_UnitTest_HostBarrier -TEST_TARGETS += test-host-barrier - OBJ_DEFAULT = UnitTestMainInit.o gtest-all.o ifneq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) @@ -432,9 +428,6 @@ KokkosCore_UnitTest_HPXInterOp: UnitTestMain.o gtest-all.o TestHPX_InterOp.o $(K KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HWLOC -KokkosCore_UnitTest_HostBarrier: $(OBJ_HOST_BARRIER) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_HOST_BARRIER) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HostBarrier - KokkosCore_UnitTest_AllocationTracker: $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(LIB) -o KokkosCore_UnitTest_AllocationTracker @@ -482,9 +475,6 @@ test-hpx: KokkosCore_UnitTest_HPX test-hwloc: KokkosCore_UnitTest_HWLOC ./KokkosCore_UnitTest_HWLOC -test-host-barrier: KokkosCore_UnitTest_HostBarrier - ./KokkosCore_UnitTest_HostBarrier - test-allocationtracker: KokkosCore_UnitTest_AllocationTracker ./KokkosCore_UnitTest_AllocationTracker diff --git a/packages/kokkos/core/unit_test/TestAtomics.hpp b/packages/kokkos/core/unit_test/TestAtomics.hpp index 1051ae20f6d55979077fa9380526e3db3981e2a6..e41ad5257d64ad3acb3266a0354f18d291662377 100644 --- a/packages/kokkos/core/unit_test/TestAtomics.hpp +++ b/packages/kokkos/core/unit_test/TestAtomics.hpp @@ -122,7 +122,7 @@ struct SuperScalar { } KOKKOS_INLINE_FUNCTION - bool operator==(const SuperScalar& src) { + bool operator==(const SuperScalar& src) const { bool compare = true; for (int i = 0; i < N; i++) { compare = compare && (val[i] == src.val[i]); @@ -131,7 +131,7 @@ struct SuperScalar { } KOKKOS_INLINE_FUNCTION - bool operator!=(const SuperScalar& src) { + bool operator!=(const SuperScalar& src) const { bool compare = true; for (int i = 0; i < N; i++) { compare = compare && (val[i] == src.val[i]); @@ -538,6 +538,8 @@ TEST(TEST_CATEGORY, atomics) { ASSERT_TRUE( (TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 3))); +// FIXME_SYCL atomics for large types to be implemented +#ifndef KOKKOS_ENABLE_SYCL // FIXME_HIP HIP doesn't yet support atomics for >64bit types properly #ifndef KOKKOS_ENABLE_HIP ASSERT_TRUE( @@ -565,6 +567,7 @@ TEST(TEST_CATEGORY, atomics) { #endif #endif #endif +#endif } } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestComplex.hpp b/packages/kokkos/core/unit_test/TestComplex.hpp index b0307ec8cf2627e0695feeacd04f6d5a2ecf7fd8..b926058ebf990b0c7d0bff6f4c22b5bd4c12e2e8 100644 --- a/packages/kokkos/core/unit_test/TestComplex.hpp +++ b/packages/kokkos/core/unit_test/TestComplex.hpp @@ -414,13 +414,12 @@ TEST(TEST_CATEGORY, complex_special_funtions) { TEST(TEST_CATEGORY, complex_io) { testComplexIO(); } TEST(TEST_CATEGORY, complex_trivially_copyable) { - using RealType = double; - // Kokkos::complex<RealType> is trivially copyable when RealType is // trivially copyable // Simply disable the check for IBM's XL compiler since we can't reliably // check for a version that defines relevant functions. #if !defined(__ibmxl__) + using RealType = double; // clang claims compatibility with gcc 4.2.1 but all versions tested know // about std::is_trivially_copyable. ASSERT_TRUE(std::is_trivially_copyable<Kokkos::complex<RealType>>::value || @@ -428,4 +427,92 @@ TEST(TEST_CATEGORY, complex_trivially_copyable) { #endif } +template <class ExecSpace> +struct TestBugPowAndLogComplex { + Kokkos::View<Kokkos::complex<double> *, ExecSpace> d_pow; + Kokkos::View<Kokkos::complex<double> *, ExecSpace> d_log; + TestBugPowAndLogComplex() : d_pow("pow", 2), d_log("log", 2) { test(); } + void test() { + Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0, 1), *this); + auto h_pow = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), d_pow); + ASSERT_FLOAT_EQ(h_pow(0).real(), 18); + ASSERT_FLOAT_EQ(h_pow(0).imag(), 26); + ASSERT_FLOAT_EQ(h_pow(1).real(), -18); + ASSERT_FLOAT_EQ(h_pow(1).imag(), 26); + auto h_log = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), d_log); + ASSERT_FLOAT_EQ(h_log(0).real(), 1.151292546497023); + ASSERT_FLOAT_EQ(h_log(0).imag(), 0.3217505543966422); + ASSERT_FLOAT_EQ(h_log(1).real(), 1.151292546497023); + ASSERT_FLOAT_EQ(h_log(1).imag(), 2.819842099193151); + } + KOKKOS_FUNCTION void operator()(int) const { + d_pow(0) = Kokkos::pow(Kokkos::complex<double>(+3., 1.), 3.); + d_pow(1) = Kokkos::pow(Kokkos::complex<double>(-3., 1.), 3.); + d_log(0) = Kokkos::log(Kokkos::complex<double>(+3., 1.)); + d_log(1) = Kokkos::log(Kokkos::complex<double>(-3., 1.)); + } +}; + +TEST(TEST_CATEGORY, complex_issue_3865) { + TestBugPowAndLogComplex<TEST_EXECSPACE>(); +} + +TEST(TEST_CATEGORY, complex_issue_3867) { + ASSERT_EQ(Kokkos::pow(Kokkos::complex<double>(2., 1.), 3.), + Kokkos::pow(Kokkos::complex<double>(2., 1.), 3)); + ASSERT_EQ( + Kokkos::pow(Kokkos::complex<double>(2., 1.), 3.), + Kokkos::pow(Kokkos::complex<double>(2., 1.), Kokkos::complex<double>(3))); + + auto x = Kokkos::pow(Kokkos::complex<double>(2, 1), + Kokkos::complex<double>(-3, 4)); + auto y = Kokkos::complex<double>( + std::pow(std::complex<double>(2, 1), std::complex<double>(-3, 4))); + ASSERT_FLOAT_EQ(x.real(), y.real()); + ASSERT_FLOAT_EQ(x.imag(), y.imag()); + +#define CHECK_POW_COMPLEX_PROMOTION(ARGTYPE1, ARGTYPE2, RETURNTYPE) \ + static_assert( \ + std::is_same<RETURNTYPE, \ + decltype(Kokkos::pow(std::declval<ARGTYPE1>(), \ + std::declval<ARGTYPE2>()))>::value, \ + ""); \ + static_assert( \ + std::is_same<RETURNTYPE, \ + decltype(Kokkos::pow(std::declval<ARGTYPE2>(), \ + std::declval<ARGTYPE1>()))>::value, \ + ""); + + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<long double>, long double, + Kokkos::complex<long double>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<long double>, double, + Kokkos::complex<long double>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<long double>, float, + Kokkos::complex<long double>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<long double>, int, + Kokkos::complex<long double>); + + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<double>, long double, + Kokkos::complex<long double>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<double>, double, + Kokkos::complex<double>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<double>, float, + Kokkos::complex<double>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<double>, int, + Kokkos::complex<double>); + + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<float>, long double, + Kokkos::complex<long double>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<float>, double, + Kokkos::complex<double>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<float>, float, + Kokkos::complex<float>); + CHECK_POW_COMPLEX_PROMOTION(Kokkos::complex<float>, int, + Kokkos::complex<double>); + +#undef CHECK_POW_COMPLEX_PROMOTION +} + } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp b/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp index 8158f4058082c65445b6fa15dddd7f56b476fd8d..49f8daf89eabca9b3aa7e1f06d7a10ceb23a6a24 100644 --- a/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp +++ b/packages/kokkos/core/unit_test/TestDeepCopyAlignment.hpp @@ -17,7 +17,7 @@ struct TestDeepCopy { static void reset_a_copy_and_b( Kokkos::View<char*, Kokkos::LayoutRight, MemorySpaceA> a_char_copy, Kokkos::View<char*, Kokkos::LayoutRight, MemorySpaceB> b_char) { - const int N = b_char.extent(0); + const int N = b_char.extent_int(0); Kokkos::parallel_for( "TestDeepCopy: FillA_copy", policyA_t(0, N), KOKKOS_LAMBDA(const int& i) { a_char_copy(i) = char(0); }); @@ -29,7 +29,7 @@ struct TestDeepCopy { static int compare_equal( Kokkos::View<char*, Kokkos::LayoutRight, MemorySpaceA> a_char_copy, Kokkos::View<char*, Kokkos::LayoutRight, MemorySpaceA> a_char) { - const int N = a_char.extent(0); + const int N = a_char.extent_int(0); int errors; Kokkos::parallel_reduce( "TestDeepCopy: FillA_copy", policyA_t(0, N), @@ -74,12 +74,12 @@ struct TestDeepCopy { int b_begin = 0; int b_end = 0; auto a = Kokkos::subview( - a_char, std::pair<int, int>(a_begin, a_char.extent(0) - a_end)); + a_char, std::pair<int, int>(a_begin, a_char.extent_int(0) - a_end)); auto b = Kokkos::subview( - b_char, std::pair<int, int>(b_begin, b_char.extent(0) - b_end)); + b_char, std::pair<int, int>(b_begin, b_char.extent_int(0) - b_end)); auto a_copy = Kokkos::subview( a_char_copy, - std::pair<int, int>(a_begin, a_char_copy.extent(0) - a_end)); + std::pair<int, int>(a_begin, a_char_copy.extent_int(0) - a_end)); Kokkos::deep_copy(b, a); Kokkos::deep_copy(a_copy, b); int check = compare_equal(a_copy, a); @@ -92,12 +92,12 @@ struct TestDeepCopy { int b_begin = 0; int b_end = 5; auto a = Kokkos::subview( - a_char, std::pair<int, int>(a_begin, a_char.extent(0) - a_end)); + a_char, std::pair<int, int>(a_begin, a_char.extent_int(0) - a_end)); auto b = Kokkos::subview( - b_char, std::pair<int, int>(b_begin, b_char.extent(0) - b_end)); + b_char, std::pair<int, int>(b_begin, b_char.extent_int(0) - b_end)); auto a_copy = Kokkos::subview( a_char_copy, - std::pair<int, int>(a_begin, a_char_copy.extent(0) - a_end)); + std::pair<int, int>(a_begin, a_char_copy.extent_int(0) - a_end)); Kokkos::deep_copy(b, a); Kokkos::deep_copy(a_copy, b); int check = compare_equal(a_copy, a); @@ -110,12 +110,12 @@ struct TestDeepCopy { int b_begin = 3; int b_end = 0; auto a = Kokkos::subview( - a_char, std::pair<int, int>(a_begin, a_char.extent(0) - a_end)); + a_char, std::pair<int, int>(a_begin, a_char.extent_int(0) - a_end)); auto b = Kokkos::subview( - b_char, std::pair<int, int>(b_begin, b_char.extent(0) - b_end)); + b_char, std::pair<int, int>(b_begin, b_char.extent_int(0) - b_end)); auto a_copy = Kokkos::subview( a_char_copy, - std::pair<int, int>(a_begin, a_char_copy.extent(0) - a_end)); + std::pair<int, int>(a_begin, a_char_copy.extent_int(0) - a_end)); Kokkos::deep_copy(b, a); Kokkos::deep_copy(a_copy, b); int check = compare_equal(a_copy, a); @@ -128,12 +128,12 @@ struct TestDeepCopy { int b_begin = 3; int b_end = 6; auto a = Kokkos::subview( - a_char, std::pair<int, int>(a_begin, a_char.extent(0) - a_end)); + a_char, std::pair<int, int>(a_begin, a_char.extent_int(0) - a_end)); auto b = Kokkos::subview( - b_char, std::pair<int, int>(b_begin, b_char.extent(0) - b_end)); + b_char, std::pair<int, int>(b_begin, b_char.extent_int(0) - b_end)); auto a_copy = Kokkos::subview( a_char_copy, - std::pair<int, int>(a_begin, a_char_copy.extent(0) - a_end)); + std::pair<int, int>(a_begin, a_char_copy.extent_int(0) - a_end)); Kokkos::deep_copy(b, a); Kokkos::deep_copy(a_copy, b); int check = compare_equal(a_copy, a); @@ -146,12 +146,12 @@ struct TestDeepCopy { int b_begin = 3; int b_end = 6; auto a = Kokkos::subview( - a_char, std::pair<int, int>(a_begin, a_char.extent(0) - a_end)); + a_char, std::pair<int, int>(a_begin, a_char.extent_int(0) - a_end)); auto b = Kokkos::subview( - b_char, std::pair<int, int>(b_begin, b_char.extent(0) - b_end)); + b_char, std::pair<int, int>(b_begin, b_char.extent_int(0) - b_end)); auto a_copy = Kokkos::subview( a_char_copy, - std::pair<int, int>(a_begin, a_char_copy.extent(0) - a_end)); + std::pair<int, int>(a_begin, a_char_copy.extent_int(0) - a_end)); Kokkos::deep_copy(b, a); Kokkos::deep_copy(a_copy, b); int check = compare_equal(a_copy, a); @@ -164,12 +164,12 @@ struct TestDeepCopy { int b_begin = 2; int b_end = 6; auto a = Kokkos::subview( - a_char, std::pair<int, int>(a_begin, a_char.extent(0) - a_end)); + a_char, std::pair<int, int>(a_begin, a_char.extent_int(0) - a_end)); auto b = Kokkos::subview( - b_char, std::pair<int, int>(b_begin, b_char.extent(0) - b_end)); + b_char, std::pair<int, int>(b_begin, b_char.extent_int(0) - b_end)); auto a_copy = Kokkos::subview( a_char_copy, - std::pair<int, int>(a_begin, a_char_copy.extent(0) - a_end)); + std::pair<int, int>(a_begin, a_char_copy.extent_int(0) - a_end)); Kokkos::deep_copy(b, a); Kokkos::deep_copy(a_copy, b); int check = compare_equal(a_copy, a); @@ -182,12 +182,12 @@ struct TestDeepCopy { int b_begin = 0; int b_end = 8; auto a = Kokkos::subview( - a_char, std::pair<int, int>(a_begin, a_char.extent(0) - a_end)); + a_char, std::pair<int, int>(a_begin, a_char.extent_int(0) - a_end)); auto b = Kokkos::subview( - b_char, std::pair<int, int>(b_begin, b_char.extent(0) - b_end)); + b_char, std::pair<int, int>(b_begin, b_char.extent_int(0) - b_end)); auto a_copy = Kokkos::subview( a_char_copy, - std::pair<int, int>(a_begin, a_char_copy.extent(0) - a_end)); + std::pair<int, int>(a_begin, a_char_copy.extent_int(0) - a_end)); Kokkos::deep_copy(b, a); Kokkos::deep_copy(a_copy, b); int check = compare_equal(a_copy, a); diff --git a/packages/kokkos/core/unit_test/TestHalfOperators.hpp b/packages/kokkos/core/unit_test/TestHalfOperators.hpp index feba5acdde68aac4952a054e4496a1a8b36f599a..db52a05d5d36d5919e101f60dd7652c92771c885 100644 --- a/packages/kokkos/core/unit_test/TestHalfOperators.hpp +++ b/packages/kokkos/core/unit_test/TestHalfOperators.hpp @@ -68,24 +68,192 @@ enum OP_TESTS { POSTFIX_DEC, CADD_H_H, CADD_H_S, + CADD_S_H, + CADD_H_D, + CADD_D_H, CSUB_H_H, CSUB_H_S, + CSUB_S_H, + CSUB_H_D, + CSUB_D_H, CMUL_H_H, CMUL_H_S, + CMUL_S_H, + CMUL_H_D, + CMUL_D_H, CDIV_H_H, CDIV_H_S, + CDIV_S_H, + CDIV_H_D, + CDIV_D_H, ADD_H_H, ADD_H_S, ADD_S_H, + ADD_H_D, + ADD_D_H, + ADD_H_H_SZ, + ADD_H_S_SZ, + ADD_S_H_SZ, + ADD_H_D_SZ, + ADD_D_H_SZ, + ADD_SI_H, + ADD_SI_H_SZ, + ADD_I_H, + ADD_I_H_SZ, + ADD_LI_H, + ADD_LI_H_SZ, + ADD_LLI_H, + ADD_LLI_H_SZ, + ADD_USI_H, + ADD_USI_H_SZ, + ADD_UI_H, + ADD_UI_H_SZ, + ADD_ULI_H, + ADD_ULI_H_SZ, + ADD_ULLI_H, + ADD_ULLI_H_SZ, + ADD_H_SI, + ADD_H_SI_SZ, + ADD_H_I, + ADD_H_I_SZ, + ADD_H_LI, + ADD_H_LI_SZ, + ADD_H_LLI, + ADD_H_LLI_SZ, + ADD_H_USI, + ADD_H_USI_SZ, + ADD_H_UI, + ADD_H_UI_SZ, + ADD_H_ULI, + ADD_H_ULI_SZ, + ADD_H_ULLI, + ADD_H_ULLI_SZ, SUB_H_H, SUB_H_S, SUB_S_H, + SUB_H_D, + SUB_D_H, + SUB_H_H_SZ, + SUB_H_S_SZ, + SUB_S_H_SZ, + SUB_H_D_SZ, + SUB_D_H_SZ, + SUB_SI_H, + SUB_SI_H_SZ, + SUB_I_H, + SUB_I_H_SZ, + SUB_LI_H, + SUB_LI_H_SZ, + SUB_LLI_H, + SUB_LLI_H_SZ, + SUB_USI_H, + SUB_USI_H_SZ, + SUB_UI_H, + SUB_UI_H_SZ, + SUB_ULI_H, + SUB_ULI_H_SZ, + SUB_ULLI_H, + SUB_ULLI_H_SZ, + SUB_H_SI, + SUB_H_SI_SZ, + SUB_H_I, + SUB_H_I_SZ, + SUB_H_LI, + SUB_H_LI_SZ, + SUB_H_LLI, + SUB_H_LLI_SZ, + SUB_H_USI, + SUB_H_USI_SZ, + SUB_H_UI, + SUB_H_UI_SZ, + SUB_H_ULI, + SUB_H_ULI_SZ, + SUB_H_ULLI, + SUB_H_ULLI_SZ, MUL_H_H, MUL_H_S, MUL_S_H, + MUL_H_D, + MUL_D_H, + MUL_H_H_SZ, + MUL_H_S_SZ, + MUL_S_H_SZ, + MUL_H_D_SZ, + MUL_D_H_SZ, + MUL_SI_H, + MUL_SI_H_SZ, + MUL_I_H, + MUL_I_H_SZ, + MUL_LI_H, + MUL_LI_H_SZ, + MUL_LLI_H, + MUL_LLI_H_SZ, + MUL_USI_H, + MUL_USI_H_SZ, + MUL_UI_H, + MUL_UI_H_SZ, + MUL_ULI_H, + MUL_ULI_H_SZ, + MUL_ULLI_H, + MUL_ULLI_H_SZ, + MUL_H_SI, + MUL_H_SI_SZ, + MUL_H_I, + MUL_H_I_SZ, + MUL_H_LI, + MUL_H_LI_SZ, + MUL_H_LLI, + MUL_H_LLI_SZ, + MUL_H_USI, + MUL_H_USI_SZ, + MUL_H_UI, + MUL_H_UI_SZ, + MUL_H_ULI, + MUL_H_ULI_SZ, + MUL_H_ULLI, + MUL_H_ULLI_SZ, DIV_H_H, DIV_H_S, DIV_S_H, + DIV_H_D, + DIV_D_H, + DIV_H_H_SZ, + DIV_H_S_SZ, + DIV_S_H_SZ, + DIV_H_D_SZ, + DIV_D_H_SZ, + DIV_SI_H, + DIV_SI_H_SZ, + DIV_I_H, + DIV_I_H_SZ, + DIV_LI_H, + DIV_LI_H_SZ, + DIV_LLI_H, + DIV_LLI_H_SZ, + DIV_USI_H, + DIV_USI_H_SZ, + DIV_UI_H, + DIV_UI_H_SZ, + DIV_ULI_H, + DIV_ULI_H_SZ, + DIV_ULLI_H, + DIV_ULLI_H_SZ, + DIV_H_SI, + DIV_H_SI_SZ, + DIV_H_I, + DIV_H_I_SZ, + DIV_H_LI, + DIV_H_LI_SZ, + DIV_H_LLI, + DIV_H_LLI_SZ, + DIV_H_USI, + DIV_H_USI_SZ, + DIV_H_UI, + DIV_H_UI_SZ, + DIV_H_ULI, + DIV_H_ULI_SZ, + DIV_H_ULLI, + DIV_H_ULLI_SZ, NEG, AND, OR, @@ -94,8 +262,7 @@ enum OP_TESTS { LT, GT, LE, - GE, - TW, + GE, // TODO: TW, PASS_BY_REF, AO_IMPL_HALF, AO_HALF_T, @@ -124,13 +291,102 @@ struct Functor_TestHalfOperators { } } + // BEGIN: Binary Arithmetic test helpers + template <class LhsType, class RhsType, class ExpectedResultType> + KOKKOS_INLINE_FUNCTION void test_add(int op_test_idx, + int op_test_sz_idx) const { + auto sum = static_cast<LhsType>(h_lhs) + static_cast<RhsType>(h_rhs); + actual_lhs(op_test_idx) = static_cast<double>(sum); + + if (std::is_same<RhsType, half_t>::value && + std::is_same<LhsType, half_t>::value) { + expected_lhs(op_test_idx) = d_lhs + d_rhs; + } else { + if (std::is_same<LhsType, half_t>::value) + expected_lhs(op_test_idx) = d_lhs + static_cast<RhsType>(d_rhs); + if (std::is_same<RhsType, half_t>::value) + expected_lhs(op_test_idx) = static_cast<LhsType>(d_lhs) + d_rhs; + } + + actual_lhs(op_test_sz_idx) = sizeof(sum); + expected_lhs(op_test_sz_idx) = sizeof(ExpectedResultType); + } + + template <class LhsType, class RhsType, class ExpectedResultType> + KOKKOS_INLINE_FUNCTION void test_sub(int op_test_idx, + int op_test_sz_idx) const { + auto result = static_cast<LhsType>(h_lhs) - static_cast<RhsType>(h_rhs); + actual_lhs(op_test_idx) = static_cast<double>(result); + + if (std::is_same<RhsType, half_t>::value && + std::is_same<LhsType, half_t>::value) { + expected_lhs(op_test_idx) = d_lhs - d_rhs; + } else { + if (std::is_same<LhsType, half_t>::value) + expected_lhs(op_test_idx) = d_lhs - static_cast<RhsType>(d_rhs); + if (std::is_same<RhsType, half_t>::value) + expected_lhs(op_test_idx) = static_cast<LhsType>(d_lhs) - d_rhs; + } + + actual_lhs(op_test_sz_idx) = sizeof(result); + expected_lhs(op_test_sz_idx) = sizeof(ExpectedResultType); + } + + template <class LhsType, class RhsType, class ExpectedResultType> + KOKKOS_INLINE_FUNCTION void test_mul(int op_test_idx, + int op_test_sz_idx) const { + auto result = static_cast<LhsType>(h_lhs) * static_cast<RhsType>(h_rhs); + actual_lhs(op_test_idx) = static_cast<double>(result); + + if (std::is_same<RhsType, half_t>::value && + std::is_same<LhsType, half_t>::value) { + expected_lhs(op_test_idx) = d_lhs * d_rhs; + } else { + if (std::is_same<LhsType, half_t>::value) + expected_lhs(op_test_idx) = d_lhs * static_cast<RhsType>(d_rhs); + if (std::is_same<RhsType, half_t>::value) + expected_lhs(op_test_idx) = static_cast<LhsType>(d_lhs) * d_rhs; + } + + actual_lhs(op_test_sz_idx) = sizeof(result); + expected_lhs(op_test_sz_idx) = sizeof(ExpectedResultType); + } + + template <class LhsType, class RhsType, class ExpectedResultType> + KOKKOS_INLINE_FUNCTION void test_div(int op_test_idx, + int op_test_sz_idx) const { + auto result = static_cast<LhsType>(h_lhs) / static_cast<RhsType>(h_rhs); + actual_lhs(op_test_idx) = static_cast<double>(result); + + if (std::is_same<RhsType, half_t>::value && + std::is_same<LhsType, half_t>::value) { + expected_lhs(op_test_idx) = d_lhs / d_rhs; + } else { + if (std::is_same<LhsType, half_t>::value) + expected_lhs(op_test_idx) = d_lhs / static_cast<RhsType>(d_rhs); + if (std::is_same<RhsType, half_t>::value) + expected_lhs(op_test_idx) = static_cast<LhsType>(d_lhs) / d_rhs; + } + + actual_lhs(op_test_sz_idx) = sizeof(result); + expected_lhs(op_test_sz_idx) = sizeof(ExpectedResultType); + } + // END: Binary Arithmetic test helpers + KOKKOS_FUNCTION void operator()(int) const { half_t tmp_lhs, tmp2_lhs, *tmp_ptr; double tmp_d_lhs; + float tmp_s_lhs; using half_impl_type = Kokkos::Impl::half_impl_t::type; half_impl_type half_tmp; + // Initialze output views to catch missing test invocations + for (int i = 0; i < N_OP_TESTS; ++i) { + actual_lhs(i) = 1; + expected_lhs(i) = -1; + } + tmp_lhs = h_lhs; actual_lhs(ASSIGN) = cast_from_half<double>(tmp_lhs); expected_lhs(ASSIGN) = d_lhs; @@ -177,11 +433,29 @@ struct Functor_TestHalfOperators { expected_lhs(CADD_H_H) = d_lhs; expected_lhs(CADD_H_H) += d_rhs; - // tmp_lhs = h_lhs; - // tmp_lhs += static_cast<float>(d_rhs); - // actual_lhs(CADD_H_S) = cast_from_half<double>(tmp_lhs); - // expected_lhs(CADD_H_S) = d_lhs; - // expected_lhs(CADD_H_S) += d_rhs; + tmp_lhs = h_lhs; + tmp_lhs += static_cast<float>(d_rhs); + actual_lhs(CADD_H_S) = cast_from_half<double>(tmp_lhs); + expected_lhs(CADD_H_S) = d_lhs; + expected_lhs(CADD_H_S) += d_rhs; + + tmp_s_lhs = static_cast<float>(h_lhs); + tmp_s_lhs += h_rhs; + actual_lhs(CADD_S_H) = static_cast<double>(tmp_s_lhs); + expected_lhs(CADD_S_H) = d_lhs; + expected_lhs(CADD_S_H) += d_rhs; + + tmp_lhs = static_cast<double>(h_lhs); + tmp_lhs += static_cast<double>(d_rhs); + actual_lhs(CADD_H_D) = cast_from_half<double>(tmp_lhs); + expected_lhs(CADD_H_D) = d_lhs; + expected_lhs(CADD_H_D) += d_rhs; + + tmp_d_lhs = static_cast<double>(h_lhs); + tmp_d_lhs += h_rhs; + actual_lhs(CADD_D_H) = static_cast<double>(tmp_d_lhs); + expected_lhs(CADD_D_H) = d_lhs; + expected_lhs(CADD_D_H) += d_rhs; tmp_lhs = h_lhs; tmp_lhs -= h_rhs; @@ -189,11 +463,29 @@ struct Functor_TestHalfOperators { expected_lhs(CSUB_H_H) = d_lhs; expected_lhs(CSUB_H_H) -= d_rhs; - // tmp_lhs = h_lhs; - // tmp_lhs -= static_cast<float>(d_rhs); - // actual_lhs(CSUB_H_S) = cast_from_half<double>(tmp_lhs); - // expected_lhs(CSUB_H_S) = d_lhs; - // expected_lhs(CSUB_H_S) -= d_rhs; + tmp_lhs = h_lhs; + tmp_lhs -= static_cast<float>(d_rhs); + actual_lhs(CSUB_H_S) = cast_from_half<double>(tmp_lhs); + expected_lhs(CSUB_H_S) = d_lhs; + expected_lhs(CSUB_H_S) -= d_rhs; + + tmp_s_lhs = static_cast<float>(h_lhs); + tmp_s_lhs -= h_rhs; + actual_lhs(CSUB_S_H) = static_cast<double>(tmp_s_lhs); + expected_lhs(CSUB_S_H) = d_lhs; + expected_lhs(CSUB_S_H) -= d_rhs; + + tmp_lhs = h_lhs; + tmp_lhs -= d_rhs; + actual_lhs(CSUB_H_D) = static_cast<double>(tmp_lhs); + expected_lhs(CSUB_H_D) = d_lhs; + expected_lhs(CSUB_H_D) -= d_rhs; + + tmp_d_lhs = static_cast<double>(h_lhs); + tmp_d_lhs -= h_rhs; + actual_lhs(CSUB_D_H) = tmp_d_lhs; + expected_lhs(CSUB_D_H) = d_lhs; + expected_lhs(CSUB_D_H) -= d_rhs; tmp_lhs = h_lhs; tmp_lhs *= h_rhs; @@ -201,11 +493,29 @@ struct Functor_TestHalfOperators { expected_lhs(CMUL_H_H) = d_lhs; expected_lhs(CMUL_H_H) *= d_rhs; - // tmp_lhs = h_lhs; - // tmp_lhs *= static_cast<float>(d_rhs); - // actual_lhs(CMUL_H_S) = cast_from_half<double>(tmp_lhs); - // expected_lhs(CMUL_H_S) = d_lhs; - // expected_lhs(CMUL_H_S) *= d_rhs; + tmp_lhs = h_lhs; + tmp_lhs *= static_cast<float>(d_rhs); + actual_lhs(CMUL_H_S) = cast_from_half<double>(tmp_lhs); + expected_lhs(CMUL_H_S) = d_lhs; + expected_lhs(CMUL_H_S) *= d_rhs; + + tmp_s_lhs = static_cast<float>(h_lhs); + tmp_s_lhs *= h_rhs; + actual_lhs(CMUL_S_H) = static_cast<double>(tmp_s_lhs); + expected_lhs(CMUL_S_H) = d_lhs; + expected_lhs(CMUL_S_H) *= d_rhs; + + tmp_lhs = h_lhs; + tmp_lhs *= d_rhs; + actual_lhs(CMUL_H_D) = static_cast<double>(tmp_lhs); + expected_lhs(CMUL_H_D) = d_lhs; + expected_lhs(CMUL_H_D) *= d_rhs; + + tmp_d_lhs = static_cast<double>(h_lhs); + tmp_d_lhs *= h_rhs; + actual_lhs(CMUL_D_H) = tmp_d_lhs; + expected_lhs(CMUL_D_H) = d_lhs; + expected_lhs(CMUL_D_H) *= d_rhs; tmp_lhs = h_lhs; tmp_lhs /= h_rhs; @@ -213,47 +523,249 @@ struct Functor_TestHalfOperators { expected_lhs(CDIV_H_H) = d_lhs; expected_lhs(CDIV_H_H) /= d_rhs; - // tmp_lhs = h_lhs; - // tmp_lhs /= static_cast<float>(d_rhs); - // actual_lhs(CDIV_H_S) = cast_from_half<double>(tmp_lhs); - // expected_lhs(CDIV_H_S) = d_lhs; - // expected_lhs(CDIV_H_S) /= d_rhs; - - actual_lhs(ADD_H_H) = cast_from_half<double>(h_lhs + h_rhs); - expected_lhs(ADD_H_H) = d_lhs + d_rhs; - // actual_lhs(ADD_H_S) = - // cast_from_half<double>(h_lhs + static_cast<float>(d_rhs)); - // expected_lhs(ADD_H_S) = d_lhs + d_rhs; - // actual_lhs(ADD_S_H) = - // cast_from_half<double>(static_cast<float>(d_lhs) + h_rhs); - // expected_lhs(ADD_S_H) = d_lhs + d_rhs; - - actual_lhs(SUB_H_H) = cast_from_half<double>(h_lhs - h_rhs); - expected_lhs(SUB_H_H) = d_lhs - d_rhs; - // actual_lhs(SUB_H_S) = - // cast_from_half<double>(h_lhs - static_cast<float>(d_rhs)); - // expected_lhs(SUB_H_S) = d_lhs - d_rhs; - // actual_lhs(SUB_S_H) = - // cast_from_half<double>(static_cast<float>(d_lhs) - h_rhs); - // expected_lhs(SUB_S_H) = d_lhs - d_rhs; - - actual_lhs(MUL_H_H) = cast_from_half<double>(h_lhs * h_rhs); - expected_lhs(MUL_H_H) = d_lhs * d_rhs; - // actual_lhs(MUL_H_S) = - // cast_from_half<double>(h_lhs * static_cast<float>(d_rhs)); - // expected_lhs(MUL_H_S) = d_lhs * d_rhs; - // actual_lhs(MUL_S_H) = - // cast_from_half<double>(static_cast<float>(d_lhs) * h_rhs); - // expected_lhs(MUL_S_H) = d_lhs * d_rhs; - - actual_lhs(DIV_H_H) = cast_from_half<double>(h_lhs / h_rhs); - expected_lhs(DIV_H_H) = d_lhs / d_rhs; - // actual_lhs(DIV_H_S) = - // cast_from_half<double>(h_lhs / static_cast<float>(d_rhs)); - // expected_lhs(DIV_H_S) = d_lhs / d_rhs; - // actual_lhs(DIV_S_H) = - // cast_from_half<double>(static_cast<float>(d_lhs) / h_rhs); - // expected_lhs(DIV_S_H) = d_lhs / d_rhs; + tmp_lhs = h_lhs; + tmp_lhs /= static_cast<float>(d_rhs); + actual_lhs(CDIV_H_S) = cast_from_half<double>(tmp_lhs); + expected_lhs(CDIV_H_S) = d_lhs; + expected_lhs(CDIV_H_S) /= d_rhs; + + tmp_s_lhs = static_cast<float>(h_lhs); + tmp_s_lhs /= h_rhs; + actual_lhs(CDIV_S_H) = static_cast<double>(tmp_s_lhs); + expected_lhs(CDIV_S_H) = d_lhs; + expected_lhs(CDIV_S_H) /= d_rhs; + + tmp_lhs = h_lhs; + tmp_lhs /= d_rhs; + actual_lhs(CDIV_H_D) = static_cast<double>(tmp_lhs); + expected_lhs(CDIV_H_D) = d_lhs; + expected_lhs(CDIV_H_D) /= d_rhs; + + tmp_d_lhs = static_cast<double>(h_lhs); + tmp_d_lhs /= h_rhs; + actual_lhs(CDIV_D_H) = tmp_d_lhs; + expected_lhs(CDIV_D_H) = d_lhs; + expected_lhs(CDIV_D_H) /= d_rhs; + + test_add<half_t, half_t, half_t>(ADD_H_H, ADD_H_H_SZ); + test_add<float, half_t, float>(ADD_S_H, ADD_S_H_SZ); + test_add<double, half_t, double>(ADD_D_H, ADD_D_H_SZ); + test_add<short int, half_t, half_t>(ADD_SI_H, ADD_SI_H_SZ); + test_add<int, half_t, half_t>(ADD_I_H, ADD_I_H_SZ); + test_add<long int, half_t, half_t>(ADD_LI_H, ADD_LI_H_SZ); + test_add<long long int, half_t, half_t>(ADD_LLI_H, ADD_LLI_H_SZ); + test_add<half_t, float, float>(ADD_H_S, ADD_H_S_SZ); + test_add<half_t, double, double>(ADD_H_D, ADD_H_D_SZ); + test_add<half_t, short int, half_t>(ADD_H_SI, ADD_H_SI_SZ); + test_add<half_t, int, half_t>(ADD_H_I, ADD_H_I_SZ); + test_add<half_t, long int, half_t>(ADD_H_LI, ADD_H_LI_SZ); + test_add<half_t, long long int, half_t>(ADD_H_LLI, ADD_H_LLI_SZ); + + // Check for potential overflow due to negative half_t -> unsigned integral + // cast + if (h_lhs >= 0) { + test_add<unsigned short int, half_t, half_t>(ADD_USI_H, ADD_USI_H_SZ); + test_add<unsigned int, half_t, half_t>(ADD_UI_H, ADD_UI_H_SZ); + test_add<unsigned long int, half_t, half_t>(ADD_ULI_H, ADD_ULI_H_SZ); + test_add<unsigned long long int, half_t, half_t>(ADD_ULLI_H, + ADD_ULLI_H_SZ); + } else { + actual_lhs(ADD_USI_H) = expected_lhs(ADD_USI_H); + actual_lhs(ADD_USI_H_SZ) = expected_lhs(ADD_USI_H_SZ); + actual_lhs(ADD_UI_H) = expected_lhs(ADD_UI_H); + actual_lhs(ADD_UI_H_SZ) = expected_lhs(ADD_UI_H_SZ); + actual_lhs(ADD_ULI_H) = expected_lhs(ADD_ULI_H); + actual_lhs(ADD_ULI_H_SZ) = expected_lhs(ADD_ULI_H_SZ); + actual_lhs(ADD_ULLI_H) = expected_lhs(ADD_ULLI_H); + actual_lhs(ADD_ULLI_H_SZ) = expected_lhs(ADD_ULLI_H_SZ); + } + + // Check for potential overflow due to negative half_t -> unsigned integral + // cast + if (h_rhs >= 0) { + test_add<half_t, unsigned short int, half_t>(ADD_H_USI, ADD_H_USI_SZ); + test_add<half_t, unsigned int, half_t>(ADD_H_UI, ADD_H_UI_SZ); + test_add<half_t, unsigned long int, half_t>(ADD_H_ULI, ADD_H_ULI_SZ); + test_add<half_t, unsigned long long int, half_t>(ADD_H_ULLI, + ADD_H_ULLI_SZ); + } else { + actual_lhs(ADD_H_USI) = expected_lhs(ADD_H_USI); + actual_lhs(ADD_H_USI_SZ) = expected_lhs(ADD_H_USI_SZ); + actual_lhs(ADD_H_UI) = expected_lhs(ADD_H_UI); + actual_lhs(ADD_H_UI_SZ) = expected_lhs(ADD_H_UI_SZ); + actual_lhs(ADD_H_ULI) = expected_lhs(ADD_H_ULI); + actual_lhs(ADD_H_ULI_SZ) = expected_lhs(ADD_H_ULI_SZ); + actual_lhs(ADD_H_ULLI) = expected_lhs(ADD_H_ULLI); + actual_lhs(ADD_H_ULLI_SZ) = expected_lhs(ADD_H_ULLI_SZ); + } + + test_sub<half_t, half_t, half_t>(SUB_H_H, SUB_H_H_SZ); + test_sub<float, half_t, float>(SUB_S_H, SUB_S_H_SZ); + test_sub<double, half_t, double>(SUB_D_H, SUB_D_H_SZ); + test_sub<short int, half_t, half_t>(SUB_SI_H, SUB_SI_H_SZ); + test_sub<int, half_t, half_t>(SUB_I_H, SUB_I_H_SZ); + test_sub<long int, half_t, half_t>(SUB_LI_H, SUB_LI_H_SZ); + test_sub<long long int, half_t, half_t>(SUB_LLI_H, SUB_LLI_H_SZ); + test_sub<half_t, float, float>(SUB_H_S, SUB_H_S_SZ); + test_sub<half_t, double, double>(SUB_H_D, SUB_H_D_SZ); + test_sub<half_t, short int, half_t>(SUB_H_SI, SUB_H_SI_SZ); + test_sub<half_t, int, half_t>(SUB_H_I, SUB_H_I_SZ); + test_sub<half_t, long int, half_t>(SUB_H_LI, SUB_H_LI_SZ); + test_sub<half_t, long long int, half_t>(SUB_H_LLI, SUB_H_LLI_SZ); + + // Check for potential overflow due to negative half_t -> unsigned integral + // cast + if (h_lhs >= half_t(0)) { + test_sub<unsigned short int, half_t, half_t>(SUB_USI_H, SUB_USI_H_SZ); + test_sub<unsigned int, half_t, half_t>(SUB_UI_H, SUB_UI_H_SZ); + test_sub<unsigned long int, half_t, half_t>(SUB_ULI_H, SUB_ULI_H_SZ); + test_sub<unsigned long long int, half_t, half_t>(SUB_ULLI_H, + SUB_ULLI_H_SZ); + } else { + actual_lhs(SUB_USI_H) = expected_lhs(SUB_USI_H); + actual_lhs(SUB_USI_H_SZ) = expected_lhs(SUB_USI_H_SZ); + actual_lhs(SUB_UI_H) = expected_lhs(SUB_UI_H); + actual_lhs(SUB_UI_H_SZ) = expected_lhs(SUB_UI_H_SZ); + actual_lhs(SUB_ULI_H) = expected_lhs(SUB_ULI_H); + actual_lhs(SUB_ULI_H_SZ) = expected_lhs(SUB_ULI_H_SZ); + actual_lhs(SUB_ULLI_H) = expected_lhs(SUB_ULLI_H); + actual_lhs(SUB_ULLI_H_SZ) = expected_lhs(SUB_ULLI_H_SZ); + } + + // Check for potential overflow due to negative half_t -> unsigned integral + // cast + if (h_rhs >= half_t(0)) { + test_sub<half_t, unsigned short int, half_t>(SUB_H_USI, SUB_H_USI_SZ); + test_sub<half_t, unsigned int, half_t>(SUB_H_UI, SUB_H_UI_SZ); + test_sub<half_t, unsigned long int, half_t>(SUB_H_ULI, SUB_H_ULI_SZ); + test_sub<half_t, unsigned long long int, half_t>(SUB_H_ULLI, + SUB_H_ULLI_SZ); + } else { + actual_lhs(SUB_H_USI) = expected_lhs(SUB_H_USI); + actual_lhs(SUB_H_USI_SZ) = expected_lhs(SUB_H_USI_SZ); + actual_lhs(SUB_H_UI) = expected_lhs(SUB_H_UI); + actual_lhs(SUB_H_UI_SZ) = expected_lhs(SUB_H_UI_SZ); + actual_lhs(SUB_H_ULI) = expected_lhs(SUB_H_ULI); + actual_lhs(SUB_H_ULI_SZ) = expected_lhs(SUB_H_ULI_SZ); + actual_lhs(SUB_H_ULLI) = expected_lhs(SUB_H_ULLI); + actual_lhs(SUB_H_ULLI_SZ) = expected_lhs(SUB_H_ULLI_SZ); + } + + test_mul<half_t, half_t, half_t>(MUL_H_H, MUL_H_H_SZ); + test_mul<float, half_t, float>(MUL_S_H, MUL_S_H_SZ); + test_mul<double, half_t, double>(MUL_D_H, MUL_D_H_SZ); + test_mul<short int, half_t, half_t>(MUL_SI_H, MUL_SI_H_SZ); + test_mul<int, half_t, half_t>(MUL_I_H, MUL_I_H_SZ); + test_mul<long int, half_t, half_t>(MUL_LI_H, MUL_LI_H_SZ); + test_mul<long long int, half_t, half_t>(MUL_LLI_H, MUL_LLI_H_SZ); + test_mul<half_t, float, float>(MUL_H_S, MUL_H_S_SZ); + test_mul<half_t, double, double>(MUL_H_D, MUL_H_D_SZ); + test_mul<half_t, short int, half_t>(MUL_H_SI, MUL_H_SI_SZ); + test_mul<half_t, int, half_t>(MUL_H_I, MUL_H_I_SZ); + test_mul<half_t, long int, half_t>(MUL_H_LI, MUL_H_LI_SZ); + test_mul<half_t, long long int, half_t>(MUL_H_LLI, MUL_H_LLI_SZ); + + // Check for potential overflow due to negative half_t -> unsigned integral + // cast + if (h_lhs >= half_t(0)) { + test_mul<unsigned short int, half_t, half_t>(MUL_USI_H, MUL_USI_H_SZ); + test_mul<unsigned int, half_t, half_t>(MUL_UI_H, MUL_UI_H_SZ); + test_mul<unsigned long int, half_t, half_t>(MUL_ULI_H, MUL_ULI_H_SZ); + test_mul<unsigned long long int, half_t, half_t>(MUL_ULLI_H, + MUL_ULLI_H_SZ); + } else { + actual_lhs(MUL_USI_H) = expected_lhs(MUL_USI_H); + actual_lhs(MUL_UI_H) = expected_lhs(MUL_UI_H); + actual_lhs(MUL_ULI_H) = expected_lhs(MUL_ULI_H); + actual_lhs(MUL_ULLI_H) = expected_lhs(MUL_ULLI_H); + actual_lhs(MUL_USI_H_SZ) = expected_lhs(MUL_USI_H_SZ); + actual_lhs(MUL_UI_H_SZ) = expected_lhs(MUL_UI_H_SZ); + actual_lhs(MUL_ULI_H_SZ) = expected_lhs(MUL_ULI_H_SZ); + actual_lhs(MUL_ULLI_H_SZ) = expected_lhs(MUL_ULLI_H_SZ); + } + + // Check for potential overflow due to negative half_t -> unsigned integral + // cast + if (h_rhs >= half_t(0)) { + test_mul<half_t, unsigned short int, half_t>(MUL_H_USI, MUL_H_USI_SZ); + test_mul<half_t, unsigned int, half_t>(MUL_H_UI, MUL_H_UI_SZ); + test_mul<half_t, unsigned long int, half_t>(MUL_H_ULI, MUL_H_ULI_SZ); + test_mul<half_t, unsigned long long int, half_t>(MUL_H_ULLI, + MUL_H_ULLI_SZ); + } else { + actual_lhs(MUL_H_USI) = expected_lhs(MUL_H_USI); + actual_lhs(MUL_H_UI) = expected_lhs(MUL_H_UI); + actual_lhs(MUL_H_ULI) = expected_lhs(MUL_H_ULI); + actual_lhs(MUL_H_ULLI) = expected_lhs(MUL_H_ULLI); + actual_lhs(MUL_H_USI_SZ) = expected_lhs(MUL_H_USI_SZ); + actual_lhs(MUL_H_UI_SZ) = expected_lhs(MUL_H_UI_SZ); + actual_lhs(MUL_H_ULI_SZ) = expected_lhs(MUL_H_ULI_SZ); + actual_lhs(MUL_H_ULLI_SZ) = expected_lhs(MUL_H_ULLI_SZ); + } + + test_div<half_t, half_t, half_t>(DIV_H_H, DIV_H_H_SZ); + test_div<float, half_t, float>(DIV_S_H, DIV_S_H_SZ); + test_div<double, half_t, double>(DIV_D_H, DIV_D_H_SZ); + test_div<short int, half_t, half_t>(DIV_SI_H, DIV_SI_H_SZ); + test_div<int, half_t, half_t>(DIV_I_H, DIV_I_H_SZ); + test_div<long int, half_t, half_t>(DIV_LI_H, DIV_LI_H_SZ); + test_div<long long int, half_t, half_t>(DIV_LLI_H, DIV_LLI_H_SZ); + test_div<half_t, float, float>(DIV_H_S, DIV_H_S_SZ); + test_div<half_t, double, double>(DIV_H_D, DIV_H_D_SZ); + + // Check for division by zero due to truncation by half_t -> integral cast + if (h_rhs >= half_t(1) || h_rhs <= half_t(-1)) { + test_div<half_t, short int, half_t>(DIV_H_SI, DIV_H_SI_SZ); + test_div<half_t, int, half_t>(DIV_H_I, DIV_H_I_SZ); + test_div<half_t, long int, half_t>(DIV_H_LI, DIV_H_LI_SZ); + test_div<half_t, long long int, half_t>(DIV_H_LLI, DIV_H_LLI_SZ); + } else { + actual_lhs(DIV_H_SI) = expected_lhs(DIV_H_SI); + actual_lhs(DIV_H_I) = expected_lhs(DIV_H_I); + actual_lhs(DIV_H_LI) = expected_lhs(DIV_H_LI); + actual_lhs(DIV_H_LLI) = expected_lhs(DIV_H_LLI); + actual_lhs(DIV_H_SI_SZ) = expected_lhs(DIV_H_SI_SZ); + actual_lhs(DIV_H_I_SZ) = expected_lhs(DIV_H_I_SZ); + actual_lhs(DIV_H_LI_SZ) = expected_lhs(DIV_H_LI_SZ); + actual_lhs(DIV_H_LLI_SZ) = expected_lhs(DIV_H_LLI_SZ); + } + + // Check for potential overflow due to negative half_t -> unsigned integral + // cast + if (h_lhs >= half_t(0)) { + test_div<unsigned short int, half_t, half_t>(DIV_USI_H, DIV_USI_H_SZ); + test_div<unsigned int, half_t, half_t>(DIV_UI_H, DIV_UI_H_SZ); + test_div<unsigned long int, half_t, half_t>(DIV_ULI_H, DIV_ULI_H_SZ); + test_div<unsigned long long int, half_t, half_t>(DIV_ULLI_H, + DIV_ULLI_H_SZ); + } else { + actual_lhs(DIV_USI_H) = expected_lhs(DIV_USI_H); + actual_lhs(DIV_UI_H) = expected_lhs(DIV_UI_H); + actual_lhs(DIV_ULI_H) = expected_lhs(DIV_ULI_H); + actual_lhs(DIV_ULLI_H) = expected_lhs(DIV_ULLI_H); + actual_lhs(DIV_USI_H_SZ) = expected_lhs(DIV_USI_H_SZ); + actual_lhs(DIV_UI_H_SZ) = expected_lhs(DIV_UI_H_SZ); + actual_lhs(DIV_ULI_H_SZ) = expected_lhs(DIV_ULI_H_SZ); + actual_lhs(DIV_ULLI_H_SZ) = expected_lhs(DIV_ULLI_H_SZ); + } + + // Check for division by zero due to truncation by half_t -> integral cast + if (h_rhs >= half_t(1)) { + test_div<half_t, unsigned short int, half_t>(DIV_H_USI, DIV_H_USI_SZ); + test_div<half_t, unsigned int, half_t>(DIV_H_UI, DIV_H_UI_SZ); + test_div<half_t, unsigned long int, half_t>(DIV_H_ULI, DIV_H_ULI_SZ); + test_div<half_t, unsigned long long int, half_t>(DIV_H_ULLI, + DIV_H_ULLI_SZ); + } else { + actual_lhs(DIV_H_USI) = expected_lhs(DIV_H_USI); + actual_lhs(DIV_H_USI_SZ) = expected_lhs(DIV_H_USI_SZ); + actual_lhs(DIV_H_UI) = expected_lhs(DIV_H_UI); + actual_lhs(DIV_H_UI_SZ) = expected_lhs(DIV_H_UI_SZ); + actual_lhs(DIV_H_ULI) = expected_lhs(DIV_H_ULI); + actual_lhs(DIV_H_ULI_SZ) = expected_lhs(DIV_H_ULI_SZ); + actual_lhs(DIV_H_ULLI) = expected_lhs(DIV_H_ULLI); + actual_lhs(DIV_H_ULLI_SZ) = expected_lhs(DIV_H_ULLI_SZ); + } // TODO: figure out why operator{!,&&,||} are returning __nv_bool actual_lhs(NEG) = static_cast<double>(!h_lhs); @@ -303,7 +815,8 @@ struct Functor_TestHalfOperators { actual_lhs(AO_HALF_T) = cast_from_half<double>(tmp_ptr[0]); expected_lhs(AO_HALF_T) = d_lhs; - // TODO: Add upcast / downcast tests using sizeof + // TODO: Check upcasting and downcasting in large expressions involving + // integral and floating point types } }; @@ -320,7 +833,7 @@ void __test_half_operators(half_t h_lhs, half_t h_rhs) { Kokkos::deep_copy(f_device_actual_lhs, f_device.actual_lhs); Kokkos::deep_copy(f_device_expected_lhs, f_device.expected_lhs); for (int op_test = 0; op_test < N_OP_TESTS; op_test++) { - // printf("%lf\n", actual_lhs(op)); + // printf("op_test = %d\n", op_test); ASSERT_NEAR(f_device_actual_lhs(op_test), f_device_expected_lhs(op_test), epsilon); ASSERT_NEAR(f_host.actual_lhs(op_test), f_host.expected_lhs(op_test), @@ -351,8 +864,13 @@ void __test_half_operators(half_t h_lhs, half_t h_rhs) { void test_half_operators() { half_t h_lhs = half_t(0.23458), h_rhs = half_t(0.67898); for (int i = -3; i < 2; i++) { + // printf("%f OP %f\n", float(h_lhs + cast_to_half(i + 1)), float(h_rhs + + // cast_to_half(i))); __test_half_operators(h_lhs + cast_to_half(i + 1), h_rhs + cast_to_half(i)); + // TODO: __test_half_operators(h_lhs + cast_to_half(i + 1), half_t(0)); + // TODO: __test_half_operators(half_t(0), h_rhs + cast_to_half(i)); } + // TODO: __test_half_operators(0, 0); } TEST(TEST_CATEGORY, half_operators) { test_half_operators(); } diff --git a/packages/kokkos/core/unit_test/TestHostBarrier.cpp b/packages/kokkos/core/unit_test/TestHostBarrier.cpp deleted file mode 100644 index 230ba2fb83a278d2fef085ae7540789e0e1a74d2..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/unit_test/TestHostBarrier.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include <gtest/gtest.h> - -namespace Test { - -TEST(host_barrier, openmp) {} - -} // namespace Test diff --git a/packages/kokkos/core/unit_test/TestHostSharedPtr.hpp b/packages/kokkos/core/unit_test/TestHostSharedPtr.hpp new file mode 100644 index 0000000000000000000000000000000000000000..731e9fc36d9bf17aa93fc1e458d3058bf7a37994 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestHostSharedPtr.hpp @@ -0,0 +1,155 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <impl/Kokkos_HostSharedPtr.hpp> + +#include <gtest/gtest.h> + +using Kokkos::Impl::HostSharedPtr; + +TEST(TEST_CATEGORY, host_shared_ptr_use_count) { + using T = int; + { + HostSharedPtr<T> p1; + EXPECT_EQ(p1.use_count(), 0); + } + { + HostSharedPtr<T> p1(nullptr); + EXPECT_EQ(p1.use_count(), 0); + } + { + HostSharedPtr<T> p1(new T()); + EXPECT_EQ(p1.use_count(), 1); + } + { + HostSharedPtr<T> p1(new T(), [](T* p) { delete p; }); + EXPECT_EQ(p1.use_count(), 1); + } + { + T i; + HostSharedPtr<T> p1(&i, [](T*) {}); + EXPECT_EQ(p1.use_count(), 1); + } + { + HostSharedPtr<T> p1(new T()); + HostSharedPtr<T> p2(p1); // copy construction + EXPECT_EQ(p1.use_count(), 2); + EXPECT_EQ(p2.use_count(), 2); + } + { + HostSharedPtr<T> p1(new T()); + HostSharedPtr<T> p2(std::move(p1)); // move construction + EXPECT_EQ(p2.use_count(), 1); + } + { + HostSharedPtr<T> p1(new T()); + HostSharedPtr<T> p2; + p2 = p1; // copy assignment + EXPECT_EQ(p1.use_count(), 2); + EXPECT_EQ(p2.use_count(), 2); + } + { + HostSharedPtr<T> p1(new T()); + HostSharedPtr<T> p2; + p2 = std::move(p1); // move assignment + EXPECT_EQ(p2.use_count(), 1); + } +} + +TEST(TEST_CATEGORY, host_shared_ptr_get) { + using T = int; + { + HostSharedPtr<T> p1; + EXPECT_EQ(p1.get(), nullptr); + } + { + HostSharedPtr<T> p1(nullptr); + EXPECT_EQ(p1.get(), nullptr); + } + { + T* p_i = new T(); + HostSharedPtr<T> p1(p_i); + EXPECT_EQ(p1.get(), p_i); + } + { + T* p_i = new T(); + HostSharedPtr<T> p1(p_i, [](T* p) { delete p; }); + EXPECT_EQ(p1.get(), p_i); + } + { + T i; + HostSharedPtr<T> p1(&i, [](T*) {}); + EXPECT_EQ(p1.get(), &i); + } + { + T i; + HostSharedPtr<T> p1(&i, [](T*) {}); + HostSharedPtr<T> p2(p1); // copy construction + EXPECT_EQ(p1.get(), &i); + EXPECT_EQ(p1.get(), &i); + } + { + T i; + HostSharedPtr<T> p1(&i, [](T*) {}); + HostSharedPtr<T> p2(std::move(p1)); // move construction + EXPECT_EQ(p1.get(), nullptr); + EXPECT_EQ(p2.get(), &i); + } + { + T i; + HostSharedPtr<T> p1(&i, [](T*) {}); + HostSharedPtr<T> p2; + p2 = p1; // copy assignment + EXPECT_EQ(p1.get(), &i); + EXPECT_EQ(p1.get(), &i); + } + { + T i; + HostSharedPtr<T> p1(&i, [](T*) {}); + HostSharedPtr<T> p2; + p2 = std::move(p1); // move assignment + EXPECT_EQ(p1.get(), nullptr); + EXPECT_EQ(p2.get(), &i); + } +} diff --git a/packages/kokkos/core/unit_test/TestHostSharedPtrAccessOnDevice.hpp b/packages/kokkos/core/unit_test/TestHostSharedPtrAccessOnDevice.hpp new file mode 100644 index 0000000000000000000000000000000000000000..18d1ac85188ca17cd7d127d3187103f42402be18 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestHostSharedPtrAccessOnDevice.hpp @@ -0,0 +1,156 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <impl/Kokkos_HostSharedPtr.hpp> +#include <Kokkos_Core.hpp> + +#include <gtest/gtest.h> + +using Kokkos::Impl::HostSharedPtr; + +namespace { + +class Data { + Kokkos::Array<char, 64> d; + + public: + KOKKOS_FUNCTION void write(char const* c) { + for (int i = 0; i < 64 && c; ++i, ++c) { + d[i] = *c; + } + } +}; + +template <class SmartPtr> +struct CheckAccessStoredPointerAndDereferenceOnDevice { + SmartPtr m_device_ptr; + using ElementType = typename SmartPtr::element_type; + static_assert(std::is_same<ElementType, Data>::value, ""); + + CheckAccessStoredPointerAndDereferenceOnDevice(SmartPtr device_ptr) + : m_device_ptr(device_ptr) { + int errors; + Kokkos::parallel_reduce(Kokkos::RangePolicy<TEST_EXECSPACE>(0, 1), *this, + errors); + EXPECT_EQ(errors, 0); + } + + KOKKOS_FUNCTION void operator()(int, int& e) const { + auto raw_ptr = m_device_ptr.get(); // get + + auto tmp = new (raw_ptr) ElementType(); + + auto& obj = *m_device_ptr; // operator* + if (&obj != raw_ptr) ++e; + + m_device_ptr->write("hello world"); // operator-> + + tmp->~ElementType(); + } +}; + +template <class Ptr> +CheckAccessStoredPointerAndDereferenceOnDevice<Ptr> +check_access_stored_pointer_and_dereference_on_device(Ptr p) { + return {p}; +} + +template <class SmartPtr> +struct CheckSpecialMembersOnDevice { + SmartPtr m_device_ptr; + + KOKKOS_FUNCTION void operator()(int, int& e) const { + SmartPtr p1 = m_device_ptr; // copy construction + SmartPtr p2 = std::move(p1); // move construction + + p1 = p2; // copy assignment + p2 = std::move(p1); // move assignment + + SmartPtr p3; // default constructor + if (p3) ++e; + SmartPtr p4{nullptr}; + if (p4) ++e; + } + + CheckSpecialMembersOnDevice(SmartPtr device_ptr) : m_device_ptr(device_ptr) { + int errors; + Kokkos::parallel_reduce(Kokkos::RangePolicy<TEST_EXECSPACE>(0, 1), *this, + errors); + EXPECT_EQ(errors, 0); + } +}; + +template <class Ptr> +CheckSpecialMembersOnDevice<Ptr> check_special_members_on_device(Ptr p) { + return {p}; +} + +} // namespace + +TEST(TEST_CATEGORY, host_shared_ptr_dereference_on_device) { + using T = Data; + + using MemorySpace = TEST_EXECSPACE::memory_space; + + HostSharedPtr<T> device_ptr( + static_cast<T*>(Kokkos::kokkos_malloc<MemorySpace>(sizeof(T))), + [](T* p) { Kokkos::kokkos_free<MemorySpace>(p); }); + + check_access_stored_pointer_and_dereference_on_device(device_ptr); +} + +// FIXME_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET +TEST(TEST_CATEGORY, host_shared_ptr_special_members_on_device) { + using T = Data; + + using MemorySpace = TEST_EXECSPACE::memory_space; + + HostSharedPtr<T> device_ptr( + static_cast<T*>(Kokkos::kokkos_malloc<MemorySpace>(sizeof(T))), + [](T* p) { Kokkos::kokkos_free<MemorySpace>(p); }); + + check_special_members_on_device(device_ptr); +} +#endif diff --git a/packages/kokkos/core/unit_test/TestMDRange.hpp b/packages/kokkos/core/unit_test/TestMDRange.hpp index 90a31fe0f38c565026109f0360c13df19aaf27e1..5618e40989b185a0233de2b20d6dec6636c9fe51 100644 --- a/packages/kokkos/core/unit_test/TestMDRange.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange.hpp @@ -378,7 +378,7 @@ struct TestMDRange_2D { parallel_reduce( "rank2-min-reducer", range, KOKKOS_LAMBDA(const int i, const int j, double &min_val) { - min_val = fmin(v_in(i, j), min_val); + min_val = Kokkos::Experimental::fmin(v_in(i, j), min_val); }, reducer_scalar); @@ -1411,8 +1411,13 @@ struct TestMDRange_3D { using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{0, 0, 0}}, point_type{{N0, N1, N2}}, + tile_type{{8, 8, 4}}); +#else range_type range(point_type{{0, 0, 0}}, point_type{{N0, N1, N2}}, tile_type{{8, 8, 8}}); +#endif TestMDRange_3D functor(N0, N1, N2); parallel_for(range, functor); @@ -1874,8 +1879,13 @@ struct TestMDRange_4D { int s1 = 1; int s2 = 1; int s3 = 1; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{s0, s1, s2, s3}}, + point_type{{N0, N1, N2, N3}}, tile_type{{3, 11, 3, 2}}); +#else range_type range(point_type{{s0, s1, s2, s3}}, point_type{{N0, N1, N2, N3}}, tile_type{{3, 11, 3, 3}}); +#endif TestMDRange_4D functor(N0, N1, N2, N3); parallel_for(range, functor); @@ -2440,9 +2450,16 @@ struct TestMDRange_5D { int s2 = 1; int s3 = 1; int s4 = 1; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{s0, s1, s2, s3, s4}}, + point_type{{N0, N1, N2, N3, N4}}, + tile_type{{3, 3, 3, 3, 3}}); +#else range_type range(point_type{{s0, s1, s2, s3, s4}}, point_type{{N0, N1, N2, N3, N4}}, tile_type{{3, 3, 3, 3, 5}}); +#endif + TestMDRange_5D functor(N0, N1, N2, N3, N4); parallel_for(range, functor); @@ -2767,9 +2784,16 @@ struct TestMDRange_6D { int s3 = 1; int s4 = 1; int s5 = 1; + +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{s0, s1, s2, s3, s4, s5}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{3, 3, 3, 2, 2, 2}}); +#else range_type range(point_type{{s0, s1, s2, s3, s4, s5}}, point_type{{N0, N1, N2, N3, N4, N5}}, tile_type{{3, 3, 3, 3, 3, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -2786,8 +2810,13 @@ struct TestMDRange_6D { using range_type = typename Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int>>; +#ifdef KOKKOS_ENABLE_SYCL + range_type range({{0, 0, 0, 0, 0, 0}}, {{N0, N1, N2, N3, N4, N5}}, + {{3, 3, 3, 2, 2, 2}}); +#else range_type range({{0, 0, 0, 0, 0, 0}}, {{N0, N1, N2, N3, N4, N5}}, {{3, 3, 3, 3, 3, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -2806,8 +2835,14 @@ struct TestMDRange_6D { using range_type = typename Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int>>; + +#ifdef KOKKOS_ENABLE_SYCL + range_type range({{0, 0, 0, 0, 0, 0}}, {{N0, N1, N2, N3, N4, N5}}, + {{3, 3, 3, 2, 2, 2}}); +#else range_type range({{0, 0, 0, 0, 0, 0}}, {{N0, N1, N2, N3, N4, N5}}, {{3, 3, 3, 3, 3, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -2826,8 +2861,13 @@ struct TestMDRange_6D { using range_type = typename Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int>>; +#ifdef KOKKOS_ENABLE_SYCL + range_type range({{0, 0, 0, 0, 0, 0}}, {{N0, N1, N2, N3, N4, N5}}, + {{3, 3, 3, 2, 2, 2}}); +#else range_type range({{0, 0, 0, 0, 0, 0}}, {{N0, N1, N2, N3, N4, N5}}, {{3, 3, 3, 3, 3, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -2851,7 +2891,6 @@ struct TestMDRange_6D { using range_type = typename Kokkos::MDRangePolicy<ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int>>; - range_type range({{1, 1, 1, 1, 1, 1}}, {{N0, N1, N2, N3, N4, N5}}, {{3, 3, 3, 2, 2, 1}}); @@ -2890,9 +2929,15 @@ struct TestMDRange_6D { using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{2, 4, 4, 2, 2, 2}}); +#else range_type range(point_type{{0, 0, 0, 0, 0, 0}}, point_type{{N0, N1, N2, N3, N4, N5}}, tile_type{{2, 4, 6, 2, 2, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -3029,11 +3074,17 @@ struct TestMDRange_6D { int s3 = 1; int s4 = 1; int s5 = 1; - range_type range( - point_type{{s0, s1, s2, s3, s4, s5}}, - point_type{{N0, N1, N2, N3, N4, N5}}, - tile_type{{3, 3, 3, 3, 2, 3}}); // tile dims 3,3,3,3,3,3 more than - // cuda can handle with debugging +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{s0, s1, s2, s3, s4, s5}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{3, 3, 3, 2, 2, 2}}); +#else + // tile dims 3,3,3,3,3,3 more than cuda can handle with debugging + range_type range(point_type{{s0, s1, s2, s3, s4, s5}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{3, 3, 3, 3, 2, 3}}); +#endif + TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); parallel_for(range, functor); @@ -3070,9 +3121,15 @@ struct TestMDRange_6D { using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{4, 4, 2, 2, 2, 2}}); +#else range_type range(point_type{{0, 0, 0, 0, 0, 0}}, point_type{{N0, N1, N2, N3, N4, N5}}, tile_type{{4, 4, 4, 2, 2, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -3107,9 +3164,15 @@ struct TestMDRange_6D { using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{4, 4, 2, 2, 2, 2}}); +#else range_type range(point_type{{0, 0, 0, 0, 0, 0}}, point_type{{N0, N1, N2, N3, N4, N5}}, tile_type{{4, 4, 4, 2, 2, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -3144,9 +3207,15 @@ struct TestMDRange_6D { using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{4, 4, 2, 2, 2, 2}}); +#else range_type range(point_type{{0, 0, 0, 0, 0, 0}}, point_type{{N0, N1, N2, N3, N4, N5}}, tile_type{{4, 4, 4, 2, 2, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -3181,9 +3250,15 @@ struct TestMDRange_6D { using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{4, 4, 2, 2, 2, 2}}); +#else range_type range(point_type{{0, 0, 0, 0, 0, 0}}, point_type{{N0, N1, N2, N3, N4, N5}}, tile_type{{4, 4, 4, 2, 2, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -3218,9 +3293,15 @@ struct TestMDRange_6D { using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{4, 4, 2, 2, 2, 2}}); +#else range_type range(point_type{{0, 0, 0, 0, 0, 0}}, point_type{{N0, N1, N2, N3, N4, N5}}, tile_type{{4, 4, 4, 2, 2, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); @@ -3255,9 +3336,15 @@ struct TestMDRange_6D { using tile_type = typename range_type::tile_type; using point_type = typename range_type::point_type; +#ifdef KOKKOS_ENABLE_SYCL + range_type range(point_type{{0, 0, 0, 0, 0, 0}}, + point_type{{N0, N1, N2, N3, N4, N5}}, + tile_type{{4, 4, 2, 2, 2, 2}}); +#else range_type range(point_type{{0, 0, 0, 0, 0, 0}}, point_type{{N0, N1, N2, N3, N4, N5}}, tile_type{{4, 4, 4, 2, 2, 2}}); +#endif TestMDRange_6D functor(N0, N1, N2, N3, N4, N5); diff --git a/packages/kokkos/core/unit_test/TestMDRange_a.hpp b/packages/kokkos/core/unit_test/TestMDRange_a.hpp index 3f3d13e7ce9243f962b2e88c3c832d26959fe1bf..0f2abd6d65e921bf07b512984b17ac3d5f5fe67c 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_a.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_a.hpp @@ -47,7 +47,10 @@ namespace Test { TEST(TEST_CATEGORY, mdrange_5d) { +// FIXME_OPENMPTARGET requires MDRange parallel_reduce +#ifndef KOKKOS_ENABLE_OPENMPTARGET TestMDRange_5D<TEST_EXECSPACE>::test_reduce5(100, 10, 10, 10, 5); +#endif TestMDRange_5D<TEST_EXECSPACE>::test_for5(100, 10, 10, 10, 5); } diff --git a/packages/kokkos/core/unit_test/TestMDRange_b.hpp b/packages/kokkos/core/unit_test/TestMDRange_b.hpp index f43ba38c7ca840b62878580c28563fe16e84fcaf..85410d5c27fa6ba60c5d8034efa0d30bb1f6db7a 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_b.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_b.hpp @@ -48,7 +48,10 @@ namespace Test { TEST(TEST_CATEGORY, mdrange_6d) { TestMDRange_6D<TEST_EXECSPACE>::test_for6(10, 10, 10, 10, 5, 5); +#ifndef KOKKOS_ENABLE_OPENMPTARGET + // FIXME_OPENMPTARGET requires MDRange parallel_reduce TestMDRange_6D<TEST_EXECSPACE>::test_reduce6(100, 10, 10, 10, 5, 5); +#endif } } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestMDRange_c.hpp b/packages/kokkos/core/unit_test/TestMDRange_c.hpp index dbaed8ec128b81f4eeae49a3073d9c19f47ea2bc..9f597ec54b5777fe1df4f7e831c20e9eb1eab38d 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_c.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_c.hpp @@ -47,13 +47,18 @@ namespace Test { TEST(TEST_CATEGORY, mdrange_2d) { +// FIXME_OPENMPTARGET requires MDRange parallel_reduce +#ifndef KOKKOS_ENABLE_OPENMPTARGET TestMDRange_2D<TEST_EXECSPACE>::test_reduce2(100, 100); +#endif TestMDRange_2D<TEST_EXECSPACE>::test_for2(100, 100); } +#ifndef KOKKOS_ENABLE_OPENMPTARGET TEST(TEST_CATEGORY, mdrange_array_reduce) { TestMDRange_ReduceArray_2D<TEST_EXECSPACE>::test_arrayreduce2(4, 5); TestMDRange_ReduceArray_3D<TEST_EXECSPACE>::test_arrayreduce3(4, 5, 10); } +#endif } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestMDRange_d.hpp b/packages/kokkos/core/unit_test/TestMDRange_d.hpp index ea5300a1a33735ca183356e6a2a9d9fe4a063645..5ca57ccf483710bdfb7907bcd4e10d03d13ecc39 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_d.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_d.hpp @@ -49,10 +49,14 @@ namespace Test { TEST(TEST_CATEGORY, mdrange_3d) { TestMDRange_3D<TEST_EXECSPACE>::test_for3(1, 10, 100); TestMDRange_3D<TEST_EXECSPACE>::test_for3(100, 10, 100); +#ifndef KOKKOS_ENABLE_OPENMPTARGET + // FIXME_OPENMPTARGET requires MDRange parallel_reduce TestMDRange_3D<TEST_EXECSPACE>::test_reduce3(1, 10, 100); TestMDRange_3D<TEST_EXECSPACE>::test_reduce3(100, 10, 100); +#endif } +#ifndef KOKKOS_ENABLE_OPENMPTARGET TEST(TEST_CATEGORY, mdrange_neg_idx) { TestMDRange_2D_NegIdx<TEST_EXECSPACE>::test_2D_negidx(128, 32); TestMDRange_3D_NegIdx<TEST_EXECSPACE>::test_3D_negidx(128, 32, 8); @@ -60,5 +64,6 @@ TEST(TEST_CATEGORY, mdrange_neg_idx) { TestMDRange_5D_NegIdx<TEST_EXECSPACE>::test_5D_negidx(128, 32, 8, 8, 4); TestMDRange_6D_NegIdx<TEST_EXECSPACE>::test_6D_negidx(128, 32, 8, 8, 4, 2); } +#endif } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestMDRange_e.hpp b/packages/kokkos/core/unit_test/TestMDRange_e.hpp index d1576e5e5be0d2e1efa50614bda8b10b657ad7be..b9754e63d56bacb497fec4f932eb348c38f6c79f 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_e.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_e.hpp @@ -47,7 +47,10 @@ namespace Test { TEST(TEST_CATEGORY, mdrange_4d) { +// FIXME_OPENMPTARGET requires MDRange parallel_reduce +#ifndef KOKKOS_ENABLE_OPENMPTARGET TestMDRange_4D<TEST_EXECSPACE>::test_reduce4(100, 10, 10, 10); +#endif TestMDRange_4D<TEST_EXECSPACE>::test_for4(100, 10, 10, 10); } diff --git a/packages/kokkos/core/unit_test/TestMDRange_f.hpp b/packages/kokkos/core/unit_test/TestMDRange_f.hpp index 4f10ce273724b7b07c0d5b9733be8448bb9edee5..2cef1324d7c75059dfa50417d940bd7bf40a9763 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_f.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_f.hpp @@ -46,8 +46,11 @@ namespace Test { +// FIXME_OPENMPTARGET requires MDRange parallel_reduce +#ifndef KOKKOS_ENABLE_OPENMPTARGET TEST(TEST_CATEGORY, mdrange_scalar) { TestMDRange_ReduceScalar<TEST_EXECSPACE>::test_scalar_reduce(12, 11); } +#endif } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp b/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp new file mode 100644 index 0000000000000000000000000000000000000000..777f91aea3e560981d5dde05767f1726d8a1542f --- /dev/null +++ b/packages/kokkos/core/unit_test/TestMathematicalFunctions.hpp @@ -0,0 +1,871 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <algorithm> +#include <initializer_list> +#include <type_traits> +#include "Kokkos_ExecPolicy.hpp" +#include "Kokkos_Parallel_Reduce.hpp" + +#include <cfloat> + +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) +#else +#define MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS +#endif + +// clang-format off +template <class> +struct math_unary_function_return_type; +// Floating-point types +template <> struct math_unary_function_return_type< float> { using type = float; }; +template <> struct math_unary_function_return_type< double> { using type = double; }; +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS +template <> struct math_unary_function_return_type<long double> { using type = long double; }; +#endif +// Integral types +template <> struct math_unary_function_return_type< bool> { using type = double; }; +template <> struct math_unary_function_return_type< short> { using type = double; }; +template <> struct math_unary_function_return_type< unsigned short> { using type = double; }; +template <> struct math_unary_function_return_type< int> { using type = double; }; +template <> struct math_unary_function_return_type< unsigned int> { using type = double; }; +template <> struct math_unary_function_return_type< long> { using type = double; }; +template <> struct math_unary_function_return_type< unsigned long> { using type = double; }; +template <> struct math_unary_function_return_type< long long> { using type = double; }; +template <> struct math_unary_function_return_type<unsigned long long> { using type = double; }; +template <class T> +using math_unary_function_return_type_t = typename math_unary_function_return_type<T>::type; +template <class, class> +struct math_binary_function_return_type; +template <> struct math_binary_function_return_type< float, float> { using type = float; }; +template <> struct math_binary_function_return_type< float, double> { using type = double; }; +template <> struct math_binary_function_return_type< float, bool> { using type = double; }; +template <> struct math_binary_function_return_type< float, short> { using type = double; }; +template <> struct math_binary_function_return_type< float, int> { using type = double; }; +template <> struct math_binary_function_return_type< float, long> { using type = double; }; +template <> struct math_binary_function_return_type< float, long long> { using type = double; }; +template <> struct math_binary_function_return_type< float, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< float, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< float, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< float, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type< double, float> { using type = double; }; +template <> struct math_binary_function_return_type< double, double> { using type = double; }; +template <> struct math_binary_function_return_type< double, bool> { using type = double; }; +template <> struct math_binary_function_return_type< double, short> { using type = double; }; +template <> struct math_binary_function_return_type< double, int> { using type = double; }; +template <> struct math_binary_function_return_type< double, long> { using type = double; }; +template <> struct math_binary_function_return_type< double, long long> { using type = double; }; +template <> struct math_binary_function_return_type< double, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< double, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< double, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< double, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type< short, float> { using type = double; }; +template <> struct math_binary_function_return_type< short, double> { using type = double; }; +template <> struct math_binary_function_return_type< short, bool> { using type = double; }; +template <> struct math_binary_function_return_type< short, short> { using type = double; }; +template <> struct math_binary_function_return_type< short, int> { using type = double; }; +template <> struct math_binary_function_return_type< short, long> { using type = double; }; +template <> struct math_binary_function_return_type< short, long long> { using type = double; }; +template <> struct math_binary_function_return_type< short, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< short, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< short, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< short, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type< int, float> { using type = double; }; +template <> struct math_binary_function_return_type< int, double> { using type = double; }; +template <> struct math_binary_function_return_type< int, bool> { using type = double; }; +template <> struct math_binary_function_return_type< int, short> { using type = double; }; +template <> struct math_binary_function_return_type< int, int> { using type = double; }; +template <> struct math_binary_function_return_type< int, long> { using type = double; }; +template <> struct math_binary_function_return_type< int, long long> { using type = double; }; +template <> struct math_binary_function_return_type< int, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< int, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< int, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< int, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type< long, float> { using type = double; }; +template <> struct math_binary_function_return_type< long, double> { using type = double; }; +template <> struct math_binary_function_return_type< long, bool> { using type = double; }; +template <> struct math_binary_function_return_type< long, short> { using type = double; }; +template <> struct math_binary_function_return_type< long, int> { using type = double; }; +template <> struct math_binary_function_return_type< long, long> { using type = double; }; +template <> struct math_binary_function_return_type< long, long long> { using type = double; }; +template <> struct math_binary_function_return_type< long, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< long, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< long, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< long, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type< long long, float> { using type = double; }; +template <> struct math_binary_function_return_type< long long, double> { using type = double; }; +template <> struct math_binary_function_return_type< long long, bool> { using type = double; }; +template <> struct math_binary_function_return_type< long long, short> { using type = double; }; +template <> struct math_binary_function_return_type< long long, int> { using type = double; }; +template <> struct math_binary_function_return_type< long long, long> { using type = double; }; +template <> struct math_binary_function_return_type< long long, long long> { using type = double; }; +template <> struct math_binary_function_return_type< long long, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< long long, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< long long, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< long long, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, float> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, double> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, bool> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, short> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, int> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, long long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned short, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, float> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, double> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, bool> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, short> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, int> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, long long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned int, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, float> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, double> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, bool> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, short> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, int> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, long long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type< unsigned long, unsigned long long> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, float> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, double> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, bool> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, short> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, int> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, long> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, long long> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, unsigned short> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, unsigned int> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, unsigned long> { using type = double; }; +template <> struct math_binary_function_return_type<unsigned long long, unsigned long long> { using type = double; }; +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS +template <> struct math_binary_function_return_type< float, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< double, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, float> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, double> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, bool> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, short> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, int> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, long> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, long long> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, unsigned short> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, unsigned int> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, unsigned long> { using type = long double; }; +template <> struct math_binary_function_return_type< long double, unsigned long long> { using type = long double; }; +template <> struct math_binary_function_return_type< short, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< int, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< long, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< long long, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< unsigned short, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< unsigned int, long double> { using type = long double; }; +template <> struct math_binary_function_return_type< unsigned long, long double> { using type = long double; }; +template <> struct math_binary_function_return_type<unsigned long long, long double> { using type = long double; }; +#endif +template <class T, class U> +using math_binary_function_return_type_t = typename math_binary_function_return_type<T, U>::type; +// clang-format on + +struct FloatingPointComparison { + private: + template <class T> + KOKKOS_FUNCTION double eps(T) const { + return DBL_EPSILON; + } + KOKKOS_FUNCTION + double eps(float) const { return FLT_EPSILON; } + KOKKOS_FUNCTION + double eps(long double) const { return LDBL_EPSILON; } + + // Using absolute here instead of abs, since we actually test abs ... + template <class T> + KOKKOS_FUNCTION typename std::enable_if<std::is_signed<T>::value, T>::type + absolute(T val) const { + return val < T(0) ? -val : val; + } + + template <class T> + KOKKOS_FUNCTION typename std::enable_if<!std::is_signed<T>::value, T>::type + absolute(T val) const { + return val; + } + + public: + template <class FPT> + KOKKOS_FUNCTION bool compare_near_zero(FPT const& fpv, double ulp) const { + auto abs_tol = eps(fpv) * ulp; + + bool ar = absolute(fpv) < abs_tol; + if (!ar) { +#if !defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ENABLE_HIP) + printf("absolute value exceeds tolerance [|%e| > %e]\n", (double)fpv, + abs_tol); +#endif + } + + return ar; + } + + template <class Lhs, class Rhs> + KOKKOS_FUNCTION bool compare(Lhs const& lhs, Rhs const& rhs, + double ulp) const { + if (lhs == 0) { + return compare_near_zero(rhs, ulp); + } else if (rhs == 0) { + return compare_near_zero(lhs, ulp); + } else { + auto rel_tol = (eps(lhs) < eps(rhs) ? eps(lhs) : eps(rhs)) * ulp; + double abs_diff = static_cast<double>(rhs > lhs ? rhs - lhs : lhs - rhs); + double min_denom = static_cast<double>( + absolute(rhs) < absolute(lhs) ? absolute(rhs) : absolute(lhs)); + double rel_diff = abs_diff / min_denom; + bool ar = rel_diff < rel_tol; + if (!ar) { +#if !defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ENABLE_HIP) + printf("relative difference exceeds tolerance [%e > %e]\n", + (double)rel_diff, rel_tol); +#endif + } + + return ar; + } + } +}; + +template <class> +struct math_function_name; + +#define DEFINE_UNARY_FUNCTION_EVAL(FUNC, ULP_FACTOR) \ + struct MathUnaryFunction_##FUNC { \ + template <typename T> \ + static KOKKOS_FUNCTION auto eval(T x) { \ + static_assert(std::is_same<decltype(Kokkos::Experimental::FUNC((T)0)), \ + math_unary_function_return_type_t<T>>::value, \ + ""); \ + return Kokkos::Experimental::FUNC(x); \ + } \ + template <typename T> \ + static auto eval_std(T x) { \ + static_assert(std::is_same<decltype(std::FUNC((T)0)), \ + math_unary_function_return_type_t<T>>::value, \ + ""); \ + return std::FUNC(x); \ + } \ + static KOKKOS_FUNCTION double ulp_factor() { return ULP_FACTOR; } \ + }; \ + using kk_##FUNC = MathUnaryFunction_##FUNC; \ + template <> \ + struct math_function_name<MathUnaryFunction_##FUNC> { \ + static constexpr char name[] = #FUNC; \ + }; \ + constexpr char math_function_name<MathUnaryFunction_##FUNC>::name[] + +// Generally the expected ULP error should come from here: +// https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html +// For now 1s largely seem to work ... +DEFINE_UNARY_FUNCTION_EVAL(exp, 2); +DEFINE_UNARY_FUNCTION_EVAL(exp2, 2); +DEFINE_UNARY_FUNCTION_EVAL(expm1, 2); +DEFINE_UNARY_FUNCTION_EVAL(log, 2); +DEFINE_UNARY_FUNCTION_EVAL(log10, 2); +DEFINE_UNARY_FUNCTION_EVAL(log2, 2); +DEFINE_UNARY_FUNCTION_EVAL(log1p, 2); + +DEFINE_UNARY_FUNCTION_EVAL(sqrt, 2); +DEFINE_UNARY_FUNCTION_EVAL(cbrt, 2); + +DEFINE_UNARY_FUNCTION_EVAL(sin, 2); +DEFINE_UNARY_FUNCTION_EVAL(cos, 2); +DEFINE_UNARY_FUNCTION_EVAL(tan, 2); +DEFINE_UNARY_FUNCTION_EVAL(asin, 2); +DEFINE_UNARY_FUNCTION_EVAL(acos, 2); +DEFINE_UNARY_FUNCTION_EVAL(atan, 2); + +DEFINE_UNARY_FUNCTION_EVAL(sinh, 2); +DEFINE_UNARY_FUNCTION_EVAL(cosh, 2); +DEFINE_UNARY_FUNCTION_EVAL(tanh, 2); +DEFINE_UNARY_FUNCTION_EVAL(asinh, 4); +DEFINE_UNARY_FUNCTION_EVAL(acosh, 2); +DEFINE_UNARY_FUNCTION_EVAL(atanh, 2); + +DEFINE_UNARY_FUNCTION_EVAL(erf, 2); +DEFINE_UNARY_FUNCTION_EVAL(erfc, 5); +// has a larger error due to some impls doing integer exact. +// We cast always to double leading to larger difference when comparing our +// tgamma to std::tgamma on the host. +DEFINE_UNARY_FUNCTION_EVAL(tgamma, 200); +DEFINE_UNARY_FUNCTION_EVAL(lgamma, 2); + +DEFINE_UNARY_FUNCTION_EVAL(ceil, 2); +DEFINE_UNARY_FUNCTION_EVAL(floor, 2); +DEFINE_UNARY_FUNCTION_EVAL(trunc, 2); +#ifndef KOKKOS_ENABLE_SYCL +DEFINE_UNARY_FUNCTION_EVAL(nearbyint, 2); +#endif + +#undef DEFINE_UNARY_FUNCTION_EVAL + +#define DEFINE_BINARY_FUNCTION_EVAL(FUNC, ULP_FACTOR) \ + struct MathBinaryFunction_##FUNC { \ + template <typename T, typename U> \ + static KOKKOS_FUNCTION auto eval(T x, U y) { \ + static_assert( \ + std::is_same<decltype(Kokkos::Experimental::FUNC((T)0, (U)0)), \ + math_binary_function_return_type_t<T, U>>::value, \ + ""); \ + return Kokkos::Experimental::FUNC(x, y); \ + } \ + template <typename T, typename U> \ + static auto eval_std(T x, U y) { \ + static_assert( \ + std::is_same<decltype(std::FUNC((T)0, (U)0)), \ + math_binary_function_return_type_t<T, U>>::value, \ + ""); \ + return std::FUNC(x, y); \ + } \ + static KOKKOS_FUNCTION double ulp_factor() { return ULP_FACTOR; } \ + }; \ + using kk_##FUNC = MathBinaryFunction_##FUNC; \ + template <> \ + struct math_function_name<MathBinaryFunction_##FUNC> { \ + static constexpr char name[] = #FUNC; \ + }; \ + constexpr char math_function_name<MathBinaryFunction_##FUNC>::name[] + +DEFINE_BINARY_FUNCTION_EVAL(pow, 2); +DEFINE_BINARY_FUNCTION_EVAL(hypot, 2); + +#undef DEFINE_BINARY_FUNCTION_EVAL + +// clang-format off +template <class> +struct type_helper; +#define DEFINE_TYPE_NAME(T) \ +template <> struct type_helper<T> { static char const * name() { return #T; } }; +DEFINE_TYPE_NAME(bool) +DEFINE_TYPE_NAME(int) +DEFINE_TYPE_NAME(long) +DEFINE_TYPE_NAME(long long) +DEFINE_TYPE_NAME(unsigned int) +DEFINE_TYPE_NAME(unsigned long) +DEFINE_TYPE_NAME(unsigned long long) +DEFINE_TYPE_NAME(float) +DEFINE_TYPE_NAME(double) +DEFINE_TYPE_NAME(long double) +#undef DEFINE_TYPE_NAME +// clang-format on + +template <class Space, class Func, class Arg, std::size_t N, + class Ret = math_unary_function_return_type_t<Arg>> +struct TestMathUnaryFunction : FloatingPointComparison { + Arg val_[N]; + Ret res_[N]; + TestMathUnaryFunction(const Arg (&val)[N]) { + std::cout << math_function_name<Func>::name << "(" + << type_helper<Arg>::name() << ")\n"; + std::copy(val, val + N, val_); + std::transform(val, val + N, res_, + [](auto x) { return Func::eval_std(x); }); + run(); + } + void run() { + int errors = 0; + Kokkos::parallel_reduce(Kokkos::RangePolicy<Space>(0, N), *this, errors); + ASSERT_EQ(errors, 0); + } + KOKKOS_FUNCTION void operator()(int i, int& e) const { + bool ar = compare(Func::eval(val_[i]), res_[i], Func::ulp_factor()); + if (!ar) { + ++e; +#if !defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ENABLE_HIP) + printf("value at %f which is %f was expected to be %f\n", (double)val_[i], + (double)Func::eval(val_[i]), (double)res_[i]); +#endif + } + } +}; + +template <class Space, class... Func, class Arg, std::size_t N> +void do_test_math_unary_function(const Arg (&x)[N]) { + (void)std::initializer_list<int>{ + (TestMathUnaryFunction<Space, Func, Arg, N>(x), 0)...}; +} + +#define TEST_MATH_FUNCTION(FUNC) \ + do_test_math_unary_function<TEST_EXECSPACE, MathUnaryFunction_##FUNC> + +template <class Space, class Func, class Arg1, class Arg2, + class Ret = math_binary_function_return_type_t<Arg1, Arg2>> +struct TestMathBinaryFunction : FloatingPointComparison { + Arg1 val1_; + Arg2 val2_; + Ret res_; + TestMathBinaryFunction(Arg1 val1, Arg2 val2) + : val1_(val1), val2_(val2), res_(Func::eval_std(val1, val2)) { + std::cout << math_function_name<Func>::name << "(" + << type_helper<Arg1>::name() << ", " << type_helper<Arg2>::name() + << ")\n"; + run(); + } + void run() { + int errors = 0; + Kokkos::parallel_reduce(Kokkos::RangePolicy<Space>(0, 1), *this, errors); + ASSERT_EQ(errors, 0); + } + KOKKOS_FUNCTION void operator()(int, int& e) const { + bool ar = compare(Func::eval(val1_, val2_), res_, Func::ulp_factor()); + if (!ar) { + ++e; +#if !defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ENABLE_HIP) + printf("value at %f, %f which is %f was expected to be %f\n", + (double)val1_, (double)val2_, (double)Func::eval(val1_, val2_), + (double)res_); +#endif + } + } +}; + +template <class Space, class... Func, class Arg1, class Arg2> +void do_test_math_binary_function(Arg1 arg1, Arg2 arg2) { + (void)std::initializer_list<int>{ + (TestMathBinaryFunction<Space, Func, Arg1, Arg2>(arg1, arg2), 0)...}; +} + +TEST(TEST_CATEGORY, mathematical_functions_trigonometric_functions) { + TEST_MATH_FUNCTION(sin)({true, false}); + TEST_MATH_FUNCTION(sin)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(sin)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(sin)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(sin)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(sin)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(sin)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(sin)({.1f, .2f, .3f}); + TEST_MATH_FUNCTION(sin)({.4, .5, .6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(sin)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(cos)({true, false}); + TEST_MATH_FUNCTION(cos)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(cos)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(cos)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(cos)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(cos)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(cos)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(cos)({.1f, .2f, .3f}); + TEST_MATH_FUNCTION(cos)({.4, .5, .6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(cos)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(tan)({true, false}); + TEST_MATH_FUNCTION(tan)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(tan)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(tan)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(tan)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(tan)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(tan)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(tan)({.1f, .2f, .3f}); + TEST_MATH_FUNCTION(tan)({.4, .5, .6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(tan)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(asin)({true, false}); + TEST_MATH_FUNCTION(asin)({-1, 0, 1}); + TEST_MATH_FUNCTION(asin)({-1l, 0l, 1l}); + TEST_MATH_FUNCTION(asin)({-1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(asin)({0u, 1u}); + TEST_MATH_FUNCTION(asin)({0ul, 1ul}); + TEST_MATH_FUNCTION(asin)({0ull, 1ull}); + TEST_MATH_FUNCTION(asin)({-1.f, .9f, -.8f, .7f, -.6f}); + TEST_MATH_FUNCTION(asin)({-.5, .4, -.3, .2, -.1, 0.}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(asin)({-.5l, .3l, 0.l, .2l, .4l, .6l}); +#endif + + TEST_MATH_FUNCTION(acos)({true, false}); + TEST_MATH_FUNCTION(acos)({-1, 0, 1}); + TEST_MATH_FUNCTION(acos)({-1l, 0l, 1l}); + TEST_MATH_FUNCTION(acos)({-1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(acos)({0u, 1u}); + TEST_MATH_FUNCTION(acos)({0ul, 1ul}); + TEST_MATH_FUNCTION(acos)({0ull, 1ull}); + TEST_MATH_FUNCTION(acos)({-1.f, .9f, -.8f, .7f, -.6f}); + TEST_MATH_FUNCTION(acos)({-.5, .4, -.3, .2, -.1, 0.}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(acos)({-.5l, .3l, 0.l, .2l, .4l, .6l}); +#endif + + TEST_MATH_FUNCTION(atan)({true, false}); + TEST_MATH_FUNCTION(atan)({-1, 0, 1}); + TEST_MATH_FUNCTION(atan)({-1l, 0l, 1l}); + TEST_MATH_FUNCTION(atan)({-1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(atan)({0u, 1u}); + TEST_MATH_FUNCTION(atan)({0ul, 1ul}); + TEST_MATH_FUNCTION(atan)({0ull, 1ull}); + TEST_MATH_FUNCTION(atan)({-1.5f, 1.3f, -1.1f, .9f, -.7f, .5f}); + TEST_MATH_FUNCTION(atan)({1.4, -1.2, 1., -.8, .6, -.4, .2, -0.}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(atan)({-.98l, .67l, -54.l, .34l, -.21l}); +#endif + + // TODO atan2 +} + +TEST(TEST_CATEGORY, mathematical_functions_power_functions) { + TEST_MATH_FUNCTION(sqrt)({0, 1, 2, 3, 5, 7, 11}); + TEST_MATH_FUNCTION(sqrt)({0l, 1l, 2l, 3l, 5l, 7l, 11l}); + TEST_MATH_FUNCTION(sqrt)({0ll, 1ll, 2ll, 3ll, 5ll, 7ll, 11ll}); + TEST_MATH_FUNCTION(sqrt)({0u, 1u, 2u, 3u, 5u, 7u}); + TEST_MATH_FUNCTION(sqrt)({0ul, 1ul, 2ul, 3ul, 5ul, 7ul}); + TEST_MATH_FUNCTION(sqrt)({0ull, 1ull, 2ull, 3ull, 5ull, 7ull}); + TEST_MATH_FUNCTION(sqrt)({10.f, 20.f, 30.f, 40.f}); + TEST_MATH_FUNCTION(sqrt)({11.1, 22.2, 33.3, 44.4}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(sqrt)({10.l, 20.l, 30.l, 40.l}); +#endif + + TEST_MATH_FUNCTION(cbrt)({-5, -3, -1, 2, 4, 6}); + TEST_MATH_FUNCTION(cbrt)({-5l, -3l, -1l, 2l, 4l, 6l}); + TEST_MATH_FUNCTION(cbrt)({-5ll, -3ll, -1ll, 2ll, 4ll, 6ll}); + TEST_MATH_FUNCTION(cbrt)({0u, 1u, 2u, 3u, 4u, 5u}); + TEST_MATH_FUNCTION(cbrt)({0ul, 1ul, 2ul, 3ul, 4ul, 5ul}); + TEST_MATH_FUNCTION(cbrt)({0ull, 1ull, 2ull, 3ull, 4ull, 5ull}); + TEST_MATH_FUNCTION(cbrt)({-1.f, .2f, -3.f, .4f, -5.f}); + TEST_MATH_FUNCTION(cbrt)({11.1, -2.2, 33.3, -4.4, 55.5}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(cbrt)({-10.l, 20.l, -30.l, 40.l, -50.l}); +#endif + + do_test_math_binary_function<TEST_EXECSPACE, kk_pow>(2.f, 3.f); + do_test_math_binary_function<TEST_EXECSPACE, kk_pow>(2., 3.); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + do_test_math_binary_function<TEST_EXECSPACE, kk_pow>(2.l, 3.l); +#endif + + do_test_math_binary_function<TEST_EXECSPACE, kk_hypot>(2.f, 3.f); + do_test_math_binary_function<TEST_EXECSPACE, kk_hypot>(2., 3.); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS +#if !(defined(KOKKOS_ARCH_POWER8) || defined(KOKKOS_ARCH_POWER9)) // FIXME + do_test_math_binary_function<TEST_EXECSPACE, kk_hypot>(2.l, 3.l); +#endif +#endif +} + +TEST(TEST_CATEGORY, mathematical_functions_exponential_functions) { + TEST_MATH_FUNCTION(exp)({-9, -8, -7, -6, -5, 4, 3, 2, 1, 0}); + TEST_MATH_FUNCTION(exp)({-9l, -8l, -7l, -6l, -5l, 4l, 3l, 2l, 1l, 0l}); + TEST_MATH_FUNCTION(exp)({-9ll, -8ll, -7ll, -6ll, -5ll, 4ll, 3ll, 2ll, 1ll}); + TEST_MATH_FUNCTION(exp)({0u, 1u, 2u, 3u, 4u, 5u}); + TEST_MATH_FUNCTION(exp)({0ul, 1ul, 2ul, 3ul, 4ul, 5ul}); + TEST_MATH_FUNCTION(exp)({0ull, 1ull, 2ull, 3ull, 4ull, 5ull}); + TEST_MATH_FUNCTION(exp)({-98.f, -7.6f, -.54f, 3.2f, 1.f, -0.f}); + TEST_MATH_FUNCTION(exp)({-98., -7.6, -.54, 3.2, 1., -0.}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(exp)({-98.l, -7.6l, -.54l, 3.2l, 1.l, -0.l}); +#endif + + TEST_MATH_FUNCTION(exp2)({-9, -8, -7, -6, -5, 4, 3, 2, 1, 0}); + TEST_MATH_FUNCTION(exp2)({-9l, -8l, -7l, -6l, -5l, 4l, 3l, 2l, 1l, 0l}); + TEST_MATH_FUNCTION(exp2)({-9ll, -8ll, -7ll, -6ll, -5ll, 4ll, 3ll, 2ll, 1ll}); + TEST_MATH_FUNCTION(exp2)({0u, 1u, 2u, 3u, 4u, 5u}); + TEST_MATH_FUNCTION(exp2)({0ul, 1ul, 2ul, 3ul, 4ul, 5ul}); + TEST_MATH_FUNCTION(exp2)({0ull, 1ull, 2ull, 3ull, 4ull, 5ull}); + TEST_MATH_FUNCTION(exp2)({-98.f, -7.6f, -.54f, 3.2f, 1.f, -0.f}); + TEST_MATH_FUNCTION(exp2)({-98., -7.6, -.54, 3.2, 1., -0.}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(exp2)({-98.l, -7.6l, -.54l, 3.2l, 1.l, -0.l}); +#endif + + TEST_MATH_FUNCTION(expm1)({-9, -8, -7, -6, -5, 4, 3, 2, 1, 0}); + TEST_MATH_FUNCTION(expm1)({-9l, -8l, -7l, -6l, -5l, 4l, 3l, 2l, 1l, 0l}); + TEST_MATH_FUNCTION(expm1)({-9ll, -8ll, -7ll, -6ll, -5ll, 4ll, 3ll, 2ll, 1ll}); + TEST_MATH_FUNCTION(expm1)({0u, 1u, 2u, 3u, 4u, 5u}); + TEST_MATH_FUNCTION(expm1)({0ul, 1ul, 2ul, 3ul, 4ul, 5ul}); + TEST_MATH_FUNCTION(expm1)({0ull, 1ull, 2ull, 3ull, 4ull, 5ull}); + TEST_MATH_FUNCTION(expm1)({-98.f, -7.6f, -.54f, 3.2f, 1.f, -0.f}); + TEST_MATH_FUNCTION(expm1)({-98., -7.6, -.54, 3.2, 1., -0.}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(expm1)({-98.l, -7.6l, -.54l, 3.2l, 1.l, -0.l}); +#endif + + TEST_MATH_FUNCTION(log)({1, 23, 456, 7890}); + TEST_MATH_FUNCTION(log)({1l, 23l, 456l, 7890l}); + TEST_MATH_FUNCTION(log)({1ll, 23ll, 456ll, 7890ll}); + TEST_MATH_FUNCTION(log)({1u, 23u, 456u, 7890u}); + TEST_MATH_FUNCTION(log)({1ul, 23ul, 456ul, 7890ul}); + TEST_MATH_FUNCTION(log)({1ull, 23ull, 456ull, 7890ull}); + TEST_MATH_FUNCTION(log)({1234.f, 567.f, 89.f, .1f}); + TEST_MATH_FUNCTION(log)({1234., 567., 89., .02}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(log)({1234.l, 567.l, 89.l, .003l}); +#endif + + TEST_MATH_FUNCTION(log10)({1, 23, 456, 7890}); + TEST_MATH_FUNCTION(log10)({1l, 23l, 456l, 7890l}); + TEST_MATH_FUNCTION(log10)({1ll, 23ll, 456ll, 7890ll}); + TEST_MATH_FUNCTION(log10)({1u, 23u, 456u, 7890u}); + TEST_MATH_FUNCTION(log10)({1ul, 23ul, 456ul, 7890ul}); + TEST_MATH_FUNCTION(log10)({1ull, 23ull, 456ull, 7890ull}); + TEST_MATH_FUNCTION(log10)({1234.f, 567.f, 89.f, .1f}); + TEST_MATH_FUNCTION(log10)({1234., 567., 89., .02}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(log10)({1234.l, 567.l, 89.l, .003l}); +#endif + + TEST_MATH_FUNCTION(log2)({1, 23, 456, 7890}); + TEST_MATH_FUNCTION(log2)({1l, 23l, 456l, 7890l}); + TEST_MATH_FUNCTION(log2)({1ll, 23ll, 456ll, 7890ll}); + TEST_MATH_FUNCTION(log2)({1u, 23u, 456u, 7890u}); + TEST_MATH_FUNCTION(log2)({1ul, 23ul, 456ul, 7890ul}); + TEST_MATH_FUNCTION(log2)({1ull, 23ull, 456ull, 7890ull}); + TEST_MATH_FUNCTION(log2)({1234.f, 567.f, 89.f, .1f}); + TEST_MATH_FUNCTION(log2)({1234., 567., 89., .02}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(log2)({1234.l, 567.l, 89.l, .003l}); +#endif + + TEST_MATH_FUNCTION(log1p)({1, 23, 456, 7890, 0}); + TEST_MATH_FUNCTION(log1p)({1l, 23l, 456l, 7890l, 0l}); + TEST_MATH_FUNCTION(log1p)({1ll, 23ll, 456ll, 7890ll, 0ll}); + TEST_MATH_FUNCTION(log1p)({1u, 23u, 456u, 7890u, 0u}); + TEST_MATH_FUNCTION(log1p)({1ul, 23ul, 456ul, 7890ul, 0ul}); + TEST_MATH_FUNCTION(log1p)({1ull, 23ull, 456ull, 7890ull, 0ull}); + TEST_MATH_FUNCTION(log1p)({1234.f, 567.f, 89.f, -.9f}); + TEST_MATH_FUNCTION(log1p)({1234., 567., 89., -.08}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(log1p)({1234.l, 567.l, 89.l, -.007l}); +#endif +} + +TEST(TEST_CATEGORY, mathematical_functions_hyperbolic_functions) { + TEST_MATH_FUNCTION(sinh)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(sinh)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(sinh)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(sinh)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(sinh)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(sinh)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(sinh)({.1f, -2.f, 3.f}); + TEST_MATH_FUNCTION(sinh)({-4., .5, -.6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(sinh)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(cosh)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(cosh)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(cosh)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(cosh)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(cosh)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(cosh)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(cosh)({.1f, -2.f, 3.f}); + TEST_MATH_FUNCTION(cosh)({-4., .5, -.6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(cosh)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(tanh)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(tanh)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(tanh)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(tanh)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(tanh)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(tanh)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(tanh)({.1f, -2.f, 3.f}); + TEST_MATH_FUNCTION(tanh)({-4., .5, -.6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(tanh)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(asinh)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(asinh)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(asinh)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(asinh)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(asinh)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(asinh)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(asinh)({.1f, -2.f, 3.f}); + TEST_MATH_FUNCTION(asinh)({-4., .5, -.6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(asinh)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(acosh)({1, 2, 3, 4, 5, 6}); + TEST_MATH_FUNCTION(acosh)({1l, 2l, 3l, 4l, 5l, 6l}); + TEST_MATH_FUNCTION(acosh)({1ll, 2ll, 3ll, 4ll, 5ll, 6ll}); + TEST_MATH_FUNCTION(acosh)({1u, 2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(acosh)({1ul, 2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(acosh)({1ull, 2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(acosh)({1.2f, 34.f, 56.f, 789.f}); + TEST_MATH_FUNCTION(acosh)({1.2, 34., 56., 789.}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(acosh)({1.2l, 34.l, 56.l, 789.l}); +#endif + + TEST_MATH_FUNCTION(atanh)({0}); + TEST_MATH_FUNCTION(atanh)({0l}); + TEST_MATH_FUNCTION(atanh)({0ll}); + TEST_MATH_FUNCTION(atanh)({0u}); + TEST_MATH_FUNCTION(atanh)({0ul}); + TEST_MATH_FUNCTION(atanh)({0ull}); + TEST_MATH_FUNCTION(atanh)({-.97f, .86f, -.53f, .42f, -.1f, 0.f}); + TEST_MATH_FUNCTION(atanh)({-.97, .86, -.53, .42, -.1, 0.}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(atanh)({-.97l, .86l, -.53l, .42l, -.1l, 0.l}); +#endif +} + +TEST(TEST_CATEGORY, mathematical_functions_error_and_gamma_functions) { + TEST_MATH_FUNCTION(erf)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(erf)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(erf)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(erf)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(erf)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(erf)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(erf)({.1f, -2.f, 3.f}); + TEST_MATH_FUNCTION(erf)({-4., .5, -.6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(erf)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(erfc)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(erfc)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(erfc)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(erfc)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(erfc)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(erfc)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(erfc)({.1f, -2.f, 3.f}); + TEST_MATH_FUNCTION(erfc)({-4., .5, -.6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(erfc)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(tgamma)({1, 2, 3, 4, 56, 78}); + TEST_MATH_FUNCTION(tgamma)({1l, 2l, 3l, 4l, 56l, 78l}); + TEST_MATH_FUNCTION(tgamma)({1ll, 2ll, 3ll, 4ll, 56ll, 78ll}); + TEST_MATH_FUNCTION(tgamma)({1u, 2u, 3u, 4u, 56u, 78u}); + TEST_MATH_FUNCTION(tgamma)({1ul, 2ul, 3ul, 4ul, 56ul, 78ul}); + TEST_MATH_FUNCTION(tgamma)({1ull, 2ull, 3ull, 4ull, 56ull, 78ull}); + TEST_MATH_FUNCTION(tgamma)({.1f, -2.2f, 3.f}); + TEST_MATH_FUNCTION(tgamma)({-4.4, .5, -.6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(tgamma)({.7l, .8l, .9l}); +#endif + + TEST_MATH_FUNCTION(lgamma)({1, 2, 3, 4, 56, 78}); + TEST_MATH_FUNCTION(lgamma)({1l, 2l, 3l, 4l, 56l, 78l}); + TEST_MATH_FUNCTION(lgamma)({1ll, 2ll, 3ll, 4ll, 56ll, 78ll}); + TEST_MATH_FUNCTION(lgamma)({1u, 2u, 3u, 4u, 56u, 78u}); + TEST_MATH_FUNCTION(lgamma)({1ul, 2ul, 3ul, 4ul, 56ul, 78ul}); + TEST_MATH_FUNCTION(lgamma)({1ull, 2ull, 3ull, 4ull, 56ull, 78ull}); + TEST_MATH_FUNCTION(lgamma)({.1f, -2.2f, 3.f}); + TEST_MATH_FUNCTION(lgamma)({-4.4, .5, -.6}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(lgamma)({.7l, .8l, .9l}); +#endif +} + +TEST(TEST_CATEGORY, + mathematical_functions_nearest_interger_floating_point_operations) { + TEST_MATH_FUNCTION(ceil)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(ceil)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(ceil)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(ceil)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(ceil)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(ceil)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(ceil)({-1.1f, 2.2f, -3.3f, 4.4f, -5.5f}); + TEST_MATH_FUNCTION(ceil)({-6.6, 7.7, -8.8, 9.9}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(ceil)({12.3l, 4.56l, 789.l}); +#endif + + TEST_MATH_FUNCTION(floor)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(floor)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(floor)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(floor)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(floor)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(floor)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(floor)({-1.1f, 2.2f, -3.3f, 4.4f, -5.5f}); + TEST_MATH_FUNCTION(floor)({-6.6, 7.7, -8.8, 9.9}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(floor)({12.3l, 4.56l, 789.l}); +#endif + + TEST_MATH_FUNCTION(trunc)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(trunc)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(trunc)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(trunc)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(trunc)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(trunc)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(trunc)({-1.1f, 2.2f, -3.3f, 4.4f, -5.5f}); + TEST_MATH_FUNCTION(trunc)({-6.6, 7.7, -8.8, 9.9}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(trunc)({12.3l, 4.56l, 789.l}); +#endif + +#ifndef KOKKOS_ENABLE_SYCL + TEST_MATH_FUNCTION(nearbyint)({-3, -2, -1, 0, 1}); + TEST_MATH_FUNCTION(nearbyint)({-3l, -2l, -1l, 0l, 1l}); + TEST_MATH_FUNCTION(nearbyint)({-3ll, -2ll, -1ll, 0ll, 1ll}); + TEST_MATH_FUNCTION(nearbyint)({2u, 3u, 4u, 5u, 6u}); + TEST_MATH_FUNCTION(nearbyint)({2ul, 3ul, 4ul, 5ul, 6ul}); + TEST_MATH_FUNCTION(nearbyint)({2ull, 3ull, 4ull, 5ull, 6ull}); + TEST_MATH_FUNCTION(nearbyint)({-1.1f, 2.2f, -3.3f, 4.4f, -5.5f}); + TEST_MATH_FUNCTION(nearbyint)({-6.6, 7.7, -8.8, 9.9}); +#ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS + TEST_MATH_FUNCTION(nearbyint)({12.3l, 4.56l, 789.l}); +#endif +#endif +} diff --git a/packages/kokkos/core/unit_test/TestNonTrivialScalarTypes.hpp b/packages/kokkos/core/unit_test/TestNonTrivialScalarTypes.hpp index 3ee4a25ec03b257ad7a13e8045baaa0fd9be1e9f..6c8a47a5861dd361364a94551abcfd50d0e85153 100644 --- a/packages/kokkos/core/unit_test/TestNonTrivialScalarTypes.hpp +++ b/packages/kokkos/core/unit_test/TestNonTrivialScalarTypes.hpp @@ -173,17 +173,17 @@ struct my_complex { } KOKKOS_INLINE_FUNCTION - bool operator==(const my_complex &src) { + bool operator==(const my_complex &src) const { return (re == src.re) && (im == src.im) && (dummy == src.dummy); } KOKKOS_INLINE_FUNCTION - bool operator!=(const my_complex &src) { + bool operator!=(const my_complex &src) const { return (re != src.re) || (im != src.im) || (dummy != src.dummy); } KOKKOS_INLINE_FUNCTION - bool operator!=(const double &val) { + bool operator!=(const double &val) const { return (re != val) || (im != 0) || (dummy != 0); } diff --git a/packages/kokkos/core/unit_test/TestNumericTraits.hpp b/packages/kokkos/core/unit_test/TestNumericTraits.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fe01b83834f26eddc15e71360d77e85452ef0238 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestNumericTraits.hpp @@ -0,0 +1,336 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <type_traits> +#include "Kokkos_NumericTraits.hpp" +#include "Kokkos_ExecPolicy.hpp" + +struct extrema { +#define DEFINE_EXTREMA(T, m, M) \ + KOKKOS_FUNCTION static T min(T) { return m; } \ + KOKKOS_FUNCTION static T max(T) { return M; } + + DEFINE_EXTREMA(char, CHAR_MIN, CHAR_MAX); + DEFINE_EXTREMA(signed char, SCHAR_MIN, SCHAR_MAX); + DEFINE_EXTREMA(unsigned char, 0, UCHAR_MAX); + DEFINE_EXTREMA(short, SHRT_MIN, SHRT_MAX); + DEFINE_EXTREMA(unsigned short, 0, USHRT_MAX); + DEFINE_EXTREMA(int, INT_MIN, INT_MAX); + DEFINE_EXTREMA(unsigned, 0U, UINT_MAX); + DEFINE_EXTREMA(long, LONG_MIN, LONG_MAX); + DEFINE_EXTREMA(unsigned long, 0UL, ULONG_MAX); + DEFINE_EXTREMA(long long, LLONG_MIN, LLONG_MAX); + DEFINE_EXTREMA(unsigned long long, 0ULL, ULLONG_MAX); + + DEFINE_EXTREMA(float, -FLT_MAX, FLT_MAX); + DEFINE_EXTREMA(double, -DBL_MAX, DBL_MAX); + DEFINE_EXTREMA(long double, -LDBL_MAX, LDBL_MAX); + +#undef DEFINE_EXTREMA +}; + +// clang-format off +struct Infinity { template <class T> using trait = Kokkos::Experimental::infinity<T>; }; +struct Epsilon { template <class T> using trait = Kokkos::Experimental::epsilon<T>; }; +struct FiniteMin { template <class T> using trait = Kokkos::Experimental::finite_min<T>; }; +struct FiniteMax { template <class T> using trait = Kokkos::Experimental::finite_max<T>; }; +struct RoundError { template <class T> using trait = Kokkos::Experimental::round_error<T>; }; +struct NormMin { template <class T> using trait = Kokkos::Experimental::norm_min<T>; }; +struct Digits { template <class T> using trait = Kokkos::Experimental::digits<T>; }; +struct Digits10 { template <class T> using trait = Kokkos::Experimental::digits10<T>; }; +struct MaxDigits10 { template <class T> using trait = Kokkos::Experimental::max_digits10<T>; }; +struct Radix { template <class T> using trait = Kokkos::Experimental::radix<T>; }; +struct MinExponent { template <class T> using trait = Kokkos::Experimental::min_exponent<T>; }; +struct MaxExponent { template <class T> using trait = Kokkos::Experimental::max_exponent<T>; }; +struct MinExponent10 { template <class T> using trait = Kokkos::Experimental::min_exponent10<T>; }; +struct MaxExponent10 { template <class T> using trait = Kokkos::Experimental::max_exponent10<T>; }; +// clang-format on + +template <class T> +KOKKOS_FUNCTION T* take_address_of(T& arg) { + return &arg; +} + +template <class T> +KOKKOS_FUNCTION void take_by_value(T) {} + +template <class Space, class T, class Tag> +struct TestNumericTraits { + template <class U> + using trait = typename Tag::template trait<U>; + + Kokkos::View<T, Space> compare; + TestNumericTraits() { + compare = Kokkos::View<T, Space>("C"); + run(); + } + + void run() const { + int errors = 0; + Kokkos::parallel_reduce(Kokkos::RangePolicy<Space, Tag>(0, 1), *this, + errors); + ASSERT_EQ(errors, 0); + (void)take_address_of(trait<T>::value); // use on host + } + + KOKKOS_FUNCTION void operator()(Infinity, int, int& e) const { + using Kokkos::Experimental::infinity; + auto const inf = infinity<T>::value; + auto const zero = T(0); + e += (int)!(inf + inf == inf); + e += (int)!(inf != zero); + use_on_device(); + } + + KOKKOS_FUNCTION void operator()(Epsilon, int, int& e) const { + using Kokkos::Experimental::epsilon; + auto const eps = epsilon<T>::value; + auto const one = T(1); + // Avoid higher precision intermediate representation + compare() = one + eps; + e += (int)!(compare() != one); + compare() = one + eps / 2; + e += (int)!(compare() == one); + use_on_device(); + } + + KOKKOS_FUNCTION void operator()(FiniteMin, int, int& e) const { + using Kokkos::Experimental::finite_max; + using Kokkos::Experimental::finite_min; + auto const min = finite_min<T>::value; + auto const max = finite_max<T>::value; + e += (int)!(min == extrema::min(T{})); + e += (int)!(max == extrema::max(T{})); + use_on_device(); + } + + // clang-format off + KOKKOS_FUNCTION void operator()(FiniteMax, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(RoundError, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(NormMin, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(Digits, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(Digits10, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(MaxDigits10, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(Radix, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(MinExponent, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(MaxExponent, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(MinExponent10, int, int&) const { use_on_device(); } + KOKKOS_FUNCTION void operator()(MaxExponent10, int, int&) const { use_on_device(); } + // clang-format on + + KOKKOS_FUNCTION void use_on_device() const { +#if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_OPENMPTARGET) + take_by_value(trait<T>::value); +#else + (void)take_address_of(trait<T>::value); +#endif + } +}; + +#if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_SYCL) || \ + defined(KOKKOS_ENABLE_OPENMPTARGET) +template <class Tag> +struct TestNumericTraits< +#if defined(KOKKOS_ENABLE_CUDA) + Kokkos::Cuda, +#elif defined(KOKKOS_ENABLE_SYCL) + Kokkos::Experimental::SYCL, +#else + Kokkos::Experimental::OpenMPTarget, +#endif + long double, Tag> { + template <class T> + using trait = typename Tag::template trait<T>; + TestNumericTraits() { + (void)take_address_of(trait<long double>::value); + // Do nothing on the device. + // According to the doc + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#constexpr-variables + // the traits member constant value cannot be directly used in device code. + } +}; +#endif + +TEST(TEST_CATEGORY, numeric_traits_infinity) { + TestNumericTraits<TEST_EXECSPACE, float, Infinity>(); + TestNumericTraits<TEST_EXECSPACE, double, Infinity>(); + TestNumericTraits<TEST_EXECSPACE, long double, Infinity>(); +} + +TEST(TEST_CATEGORY, numeric_traits_epsilon) { + TestNumericTraits<TEST_EXECSPACE, float, Epsilon>(); + TestNumericTraits<TEST_EXECSPACE, double, Epsilon>(); +#ifndef KOKKOS_COMPILER_IBM // fails with XL 16.1.1 + TestNumericTraits<TEST_EXECSPACE, long double, Epsilon>(); +#endif +} + +TEST(TEST_CATEGORY, numeric_traits_round_error) { + TestNumericTraits<TEST_EXECSPACE, float, RoundError>(); + TestNumericTraits<TEST_EXECSPACE, double, RoundError>(); + TestNumericTraits<TEST_EXECSPACE, long double, RoundError>(); +} + +TEST(TEST_CATEGORY, numeric_traits_norm_min) { + TestNumericTraits<TEST_EXECSPACE, float, NormMin>(); + TestNumericTraits<TEST_EXECSPACE, double, NormMin>(); + TestNumericTraits<TEST_EXECSPACE, long double, NormMin>(); +} + +TEST(TEST_CATEGORY, numeric_traits_finite_min_max) { + TestNumericTraits<TEST_EXECSPACE, char, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, char, FiniteMax>(); + TestNumericTraits<TEST_EXECSPACE, signed char, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, signed char, FiniteMax>(); + TestNumericTraits<TEST_EXECSPACE, unsigned char, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, unsigned char, FiniteMax>(); + + TestNumericTraits<TEST_EXECSPACE, short, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, short, FiniteMax>(); + TestNumericTraits<TEST_EXECSPACE, unsigned short, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, unsigned short, FiniteMax>(); + + TestNumericTraits<TEST_EXECSPACE, int, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, int, FiniteMax>(); + TestNumericTraits<TEST_EXECSPACE, unsigned int, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, unsigned int, FiniteMax>(); + + TestNumericTraits<TEST_EXECSPACE, long, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, long, FiniteMax>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long, FiniteMax>(); + + TestNumericTraits<TEST_EXECSPACE, long long, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, long long, FiniteMax>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long long, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long long, FiniteMax>(); + + TestNumericTraits<TEST_EXECSPACE, float, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, float, FiniteMax>(); + TestNumericTraits<TEST_EXECSPACE, double, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, double, FiniteMax>(); + TestNumericTraits<TEST_EXECSPACE, long double, FiniteMin>(); + TestNumericTraits<TEST_EXECSPACE, long double, FiniteMax>(); +} + +TEST(TEST_CATEGORY, numeric_traits_digits) { + TestNumericTraits<TEST_EXECSPACE, bool, Digits>(); + TestNumericTraits<TEST_EXECSPACE, char, Digits>(); + TestNumericTraits<TEST_EXECSPACE, signed char, Digits>(); + TestNumericTraits<TEST_EXECSPACE, unsigned char, Digits>(); + TestNumericTraits<TEST_EXECSPACE, short, Digits>(); + TestNumericTraits<TEST_EXECSPACE, unsigned short, Digits>(); + TestNumericTraits<TEST_EXECSPACE, int, Digits>(); + TestNumericTraits<TEST_EXECSPACE, unsigned int, Digits>(); + TestNumericTraits<TEST_EXECSPACE, long int, Digits>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long int, Digits>(); + TestNumericTraits<TEST_EXECSPACE, long long int, Digits>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long long int, Digits>(); + TestNumericTraits<TEST_EXECSPACE, float, Digits>(); + TestNumericTraits<TEST_EXECSPACE, double, Digits>(); + TestNumericTraits<TEST_EXECSPACE, long double, Digits>(); +} + +TEST(TEST_CATEGORY, numeric_traits_digits10) { + TestNumericTraits<TEST_EXECSPACE, bool, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, char, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, signed char, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, unsigned char, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, short, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, unsigned short, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, int, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, unsigned int, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, long int, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long int, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, long long int, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long long int, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, float, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, double, Digits10>(); + TestNumericTraits<TEST_EXECSPACE, long double, Digits10>(); +} + +TEST(TEST_CATEGORY, numeric_traits_max_digits10) { + TestNumericTraits<TEST_EXECSPACE, float, MaxDigits10>(); + TestNumericTraits<TEST_EXECSPACE, double, MaxDigits10>(); + TestNumericTraits<TEST_EXECSPACE, long double, MaxDigits10>(); +} + +TEST(TEST_CATEGORY, numeric_traits_radix) { + TestNumericTraits<TEST_EXECSPACE, bool, Radix>(); + TestNumericTraits<TEST_EXECSPACE, char, Radix>(); + TestNumericTraits<TEST_EXECSPACE, signed char, Radix>(); + TestNumericTraits<TEST_EXECSPACE, unsigned char, Radix>(); + TestNumericTraits<TEST_EXECSPACE, short, Radix>(); + TestNumericTraits<TEST_EXECSPACE, unsigned short, Radix>(); + TestNumericTraits<TEST_EXECSPACE, int, Radix>(); + TestNumericTraits<TEST_EXECSPACE, unsigned int, Radix>(); + TestNumericTraits<TEST_EXECSPACE, long int, Radix>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long int, Radix>(); + TestNumericTraits<TEST_EXECSPACE, long long int, Radix>(); + TestNumericTraits<TEST_EXECSPACE, unsigned long long int, Radix>(); + TestNumericTraits<TEST_EXECSPACE, float, Radix>(); + TestNumericTraits<TEST_EXECSPACE, double, Radix>(); + TestNumericTraits<TEST_EXECSPACE, long double, Radix>(); +} + +TEST(TEST_CATEGORY, numeric_traits_min_max_exponent) { + TestNumericTraits<TEST_EXECSPACE, float, MinExponent>(); + TestNumericTraits<TEST_EXECSPACE, float, MaxExponent>(); + TestNumericTraits<TEST_EXECSPACE, double, MinExponent>(); + TestNumericTraits<TEST_EXECSPACE, double, MaxExponent>(); + TestNumericTraits<TEST_EXECSPACE, long double, MinExponent>(); + TestNumericTraits<TEST_EXECSPACE, long double, MaxExponent>(); +} + +TEST(TEST_CATEGORY, numeric_traits_min_max_exponent10) { + TestNumericTraits<TEST_EXECSPACE, float, MinExponent10>(); + TestNumericTraits<TEST_EXECSPACE, float, MaxExponent10>(); + TestNumericTraits<TEST_EXECSPACE, double, MinExponent10>(); + TestNumericTraits<TEST_EXECSPACE, double, MaxExponent10>(); + TestNumericTraits<TEST_EXECSPACE, long double, MinExponent10>(); + TestNumericTraits<TEST_EXECSPACE, long double, MaxExponent10>(); +} diff --git a/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp b/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp index 405782b8f95fb031dd7eb5403c5556661170c6fc..0017c690e75c6e1bde1808e87203d8dbbea754cc 100644 --- a/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp +++ b/packages/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -48,6 +48,7 @@ #include <stdexcept> #include <sstream> #include <iostream> +#include <type_traits> namespace Test { struct SomeTag {}; @@ -579,7 +580,10 @@ class TestTeamPolicyConstruction { policy_t p1(league_size, team_size); ASSERT_EQ(p1.league_size(), league_size); ASSERT_EQ(p1.team_size(), team_size); +// FIXME_SYCL implement chunk_size +#ifndef KOKKOS_ENABLE_SYCL ASSERT_TRUE(p1.chunk_size() > 0); +#endif ASSERT_EQ(p1.scratch_size(0), 0); policy_t p2 = p1.set_chunk_size(chunk_size); @@ -692,10 +696,7 @@ TEST(TEST_CATEGORY, policy_construction) { check_semiregular<Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>>(); TestRangePolicyConstruction<TEST_EXECSPACE>(); - // FIXME_SYCL requires Team policy -#ifndef KOKKOS_ENABLE_SYCL TestTeamPolicyConstruction<TEST_EXECSPACE>(); -#endif } template <template <class...> class Policy, class... Args> @@ -709,13 +710,10 @@ void check_converting_constructor_add_work_tag(Policy<Args...> const& policy) { TEST(TEST_CATEGORY, policy_converting_constructor_from_other_policy) { check_converting_constructor_add_work_tag( Kokkos::RangePolicy<TEST_EXECSPACE>{}); - // FIXME_SYCL requires MDRange policy and Team policy -#ifndef KOKKOS_ENABLE_SYCL check_converting_constructor_add_work_tag( Kokkos::TeamPolicy<TEST_EXECSPACE>{}); check_converting_constructor_add_work_tag( Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>{}); -#endif } #ifndef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET @@ -767,32 +765,47 @@ void test_prefer_desired_occupancy(Policy const& policy) { template <class... Args> struct DummyPolicy : Kokkos::Impl::PolicyTraits<Args...> { using execution_policy = DummyPolicy; - using traits = Kokkos::Impl::PolicyTraits<Args...>; - template <class... OtherArgs> - DummyPolicy(DummyPolicy<OtherArgs...> const& p) : traits(p) {} - DummyPolicy() = default; + + using base_t = Kokkos::Impl::PolicyTraits<Args...>; + using base_t::base_t; }; TEST(TEST_CATEGORY, desired_occupancy_prefer) { test_prefer_desired_occupancy(DummyPolicy<TEST_EXECSPACE>{}); test_prefer_desired_occupancy(Kokkos::RangePolicy<TEST_EXECSPACE>{}); - // FIXME_SYCL requires MDRange policy and Team policy -#ifndef KOKKOS_ENABLE_SYCL test_prefer_desired_occupancy( Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>{}); test_prefer_desired_occupancy(Kokkos::TeamPolicy<TEST_EXECSPACE>{}); -#endif } +// For a more informative static assertion: +template <size_t> +struct static_assert_dummy_policy_must_be_size_one; +template <> +struct static_assert_dummy_policy_must_be_size_one<1> {}; +template <size_t, size_t> +struct static_assert_dummy_policy_must_be_size_of_desired_occupancy; +template <> +struct static_assert_dummy_policy_must_be_size_of_desired_occupancy< + sizeof(Kokkos::Experimental::DesiredOccupancy), + sizeof(Kokkos::Experimental::DesiredOccupancy)> {}; + TEST(TEST_CATEGORY, desired_occupancy_empty_base_optimization) { DummyPolicy<TEST_EXECSPACE> const policy{}; static_assert(sizeof(decltype(policy)) == 1, ""); + static_assert_dummy_policy_must_be_size_one<sizeof(decltype(policy))> + _assert1{}; + (void)_assert1; // avoid unused variable warning using Kokkos::Experimental::DesiredOccupancy; auto policy_with_occ = Kokkos::Experimental::prefer(policy, DesiredOccupancy{50}); static_assert(sizeof(decltype(policy_with_occ)) == sizeof(DesiredOccupancy), ""); + static_assert_dummy_policy_must_be_size_of_desired_occupancy< + sizeof(decltype(policy_with_occ)), sizeof(DesiredOccupancy)> + _assert2{}; + (void)_assert2; // avoid unused variable warning } template <typename Policy> @@ -809,16 +822,12 @@ void test_desired_occupancy_converting_constructors(Policy const& policy) { TEST(TEST_CATEGORY, desired_occupancy_converting_constructors) { test_desired_occupancy_converting_constructors( Kokkos::RangePolicy<TEST_EXECSPACE>{}); - // FIXME_SYCL requires MDRange policy and Team policy -#ifndef KOKKOS_ENABLE_SYCL test_desired_occupancy_converting_constructors( Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>{}); test_desired_occupancy_converting_constructors( Kokkos::TeamPolicy<TEST_EXECSPACE>{}); -#endif } -#ifndef KOKKOS_ENABLE_SYCL template <class T> void more_md_range_policy_construction_test() { (void)Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>{ @@ -878,6 +887,30 @@ TEST(TEST_CATEGORY, md_range_policy_construction_from_arrays) { more_md_range_policy_construction_test<unsigned long>(); more_md_range_policy_construction_test<std::int64_t>(); } -#endif +template <class WorkTag, class Policy> +constexpr auto set_worktag(Policy const& policy) { + static_assert(Kokkos::is_execution_policy<Policy>::value, ""); + using PolicyWithWorkTag = + Kokkos::Impl::WorkTagTrait::policy_with_trait<Policy, WorkTag>; + return PolicyWithWorkTag{policy}; +} + +TEST(TEST_CATEGORY, policy_set_worktag) { + struct SomeWorkTag {}; + struct OtherWorkTag {}; + + Kokkos::RangePolicy<> p1; + static_assert(std::is_void<decltype(p1)::work_tag>::value, ""); + + auto p2 = set_worktag<SomeWorkTag>(p1); + static_assert(std::is_same<decltype(p2)::work_tag, SomeWorkTag>::value, ""); + + auto p3 = set_worktag<OtherWorkTag>(p2); + static_assert(std::is_same<decltype(p3)::work_tag, OtherWorkTag>::value, ""); + + // NOTE this does not currently compile + // auto p4 = set_worktag<void>(p3); + // static_assert(std::is_void<decltype(p4)::work_tag>::value, ""); +} } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestRange.hpp b/packages/kokkos/core/unit_test/TestRange.hpp index 1f14ae4f30502089a3e90afab96b20ecc78a47e8..a6a6220f2dceea470414fb0d712796689f6d151c 100644 --- a/packages/kokkos/core/unit_test/TestRange.hpp +++ b/packages/kokkos/core/unit_test/TestRange.hpp @@ -162,9 +162,8 @@ struct TestRange { KOKKOS_INLINE_FUNCTION void operator()(const VerifyInitTag &, const int i) const { if (i != m_flags(i)) { -#ifndef __SYCL_DEVICE_ONLY__ - printf("TestRange::test_for_error at %d != %d\n", i, m_flags(i)); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF("TestRange::test_for_error at %d != %d\n", + i, m_flags(i)); } } @@ -176,9 +175,8 @@ struct TestRange { KOKKOS_INLINE_FUNCTION void operator()(const VerifyResetTag &, const int i) const { if (2 * i != m_flags(i)) { -#ifndef __SYCL_DEVICE_ONLY__ - printf("TestRange::test_for_error at %d != %d\n", i, m_flags(i)); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF("TestRange::test_for_error at %d != %d\n", + i, m_flags(i)); } } @@ -190,9 +188,8 @@ struct TestRange { KOKKOS_INLINE_FUNCTION void operator()(const VerifyOffsetTag &, const int i) const { if (i + offset != m_flags(i)) { -#ifndef __SYCL_DEVICE_ONLY__ - printf("TestRange::test_for_error at %d != %d\n", i + offset, m_flags(i)); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF("TestRange::test_for_error at %d != %d\n", + i + offset, m_flags(i)); } } @@ -275,10 +272,9 @@ struct TestRange { if (final) { if (update != (i * (i + 1)) / 2) { -#ifndef __SYCL_DEVICE_ONLY__ - printf("TestRange::test_scan error (%d,%d) : %d != %d\n", i, m_flags(i), - (i * (i + 1)) / 2, update); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "TestRange::test_scan error (%d,%d) : %d != %d\n", i, m_flags(i), + (i * (i + 1)) / 2, update); } result_view(i) = update; } diff --git a/packages/kokkos/core/unit_test/TestRangePolicyRequire.hpp b/packages/kokkos/core/unit_test/TestRangePolicyRequire.hpp index 18ff450a1a330c832d2bd3e3598391bfa703fc8e..693f19613db6beb8c1c2a551574808de26633726 100644 --- a/packages/kokkos/core/unit_test/TestRangePolicyRequire.hpp +++ b/packages/kokkos/core/unit_test/TestRangePolicyRequire.hpp @@ -170,9 +170,8 @@ struct TestRangeRequire { KOKKOS_INLINE_FUNCTION void operator()(const VerifyInitTag &, const int i) const { if (i != m_flags(i)) { -#ifndef KOKKOS_ENABLE_SYCL - printf("TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); } } @@ -184,9 +183,8 @@ struct TestRangeRequire { KOKKOS_INLINE_FUNCTION void operator()(const VerifyResetTag &, const int i) const { if (2 * i != m_flags(i)) { -#ifndef KOKKOS_ENABLE_SYCL - printf("TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); } } @@ -198,10 +196,9 @@ struct TestRangeRequire { KOKKOS_INLINE_FUNCTION void operator()(const VerifyOffsetTag &, const int i) const { if (i + offset != m_flags(i)) { -#ifndef KOKKOS_ENABLE_SYCL - printf("TestRangeRequire::test_for error at %d != %d\n", i + offset, - m_flags(i)); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "TestRangeRequire::test_for error at %d != %d\n", i + offset, + m_flags(i)); } } @@ -268,10 +265,9 @@ struct TestRangeRequire { if (final) { if (update != (i * (i + 1)) / 2) { -#ifndef KOKKOS_ENABLE_SYCL - printf("TestRangeRequire::test_scan error %d : %d != %d\n", i, - (i * (i + 1)) / 2, m_flags(i)); -#endif + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "TestRangeRequire::test_scan error %d : %d != %d\n", i, + (i * (i + 1)) / 2, m_flags(i)); } } } diff --git a/packages/kokkos/core/unit_test/TestReduce.hpp b/packages/kokkos/core/unit_test/TestReduce.hpp index 9fab5b1f0fa5c42b1ebc5de34dd02fd6f0bca361..5f7fbd5623d6e8e4c25c261a0f092d79c1573fba 100644 --- a/packages/kokkos/core/unit_test/TestReduce.hpp +++ b/packages/kokkos/core/unit_test/TestReduce.hpp @@ -51,6 +51,8 @@ namespace Test { +struct ReducerTag {}; + template <typename ScalarType, class DeviceType> class ReduceFunctor { public: @@ -110,6 +112,45 @@ class ReduceFunctorFinal : public ReduceFunctor<int64_t, DeviceType> { } }; +template <class DeviceType> +class ReduceFunctorFinalTag { + public: + using execution_space = DeviceType; + using size_type = typename execution_space::size_type; + using ScalarType = int64_t; + + struct value_type { + ScalarType value[3]; + }; + + const size_type nwork; + + KOKKOS_INLINE_FUNCTION + ReduceFunctorFinalTag(const size_type arg_nwork) : nwork(arg_nwork) {} + + KOKKOS_INLINE_FUNCTION + void join(const ReducerTag, volatile value_type& dst, + const volatile value_type& src) const { + dst.value[0] += src.value[0]; + dst.value[1] += src.value[1]; + dst.value[2] += src.value[2]; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const ReducerTag, size_type iwork, value_type& dst) const { + dst.value[0] -= 1; + dst.value[1] -= iwork + 1; + dst.value[2] -= nwork - iwork; + } + + KOKKOS_INLINE_FUNCTION + void final(const ReducerTag, value_type& dst) const { + ++dst.value[0]; + ++dst.value[1]; + ++dst.value[2]; + } +}; + template <typename ScalarType, class DeviceType> class RuntimeReduceFunctor { public: @@ -141,7 +182,7 @@ class RuntimeReduceFunctor { void operator()(size_type iwork, ScalarType dst[]) const { const size_type tmp[3] = {1, iwork + 1, nwork - iwork}; - for (size_type i = 0; i < value_count; ++i) { + for (size_type i = 0; i < static_cast<size_type>(value_count); ++i) { dst[i] += tmp[i % 3]; } } @@ -189,7 +230,7 @@ class RuntimeReduceMinMax { const ScalarType tmp[2] = {ScalarType(iwork + 1), ScalarType(nwork - iwork)}; - for (size_type i = 0; i < value_count; ++i) { + for (size_type i = 0; i < static_cast<size_type>(value_count); ++i) { dst[i] = i % 2 ? (dst[i] < tmp[i % 2] ? dst[i] : tmp[i % 2]) : (dst[i] > tmp[i % 2] ? dst[i] : tmp[i % 2]); } @@ -260,6 +301,7 @@ class TestReduce { TestReduce(const size_type& nwork) { run_test(nwork); run_test_final(nwork); + run_test_final_tag(nwork); } void run_test(const size_type& nwork) { @@ -314,6 +356,39 @@ class TestReduce { } } } + + void run_test_final_tag(const size_type& nwork) { + using functor_type = Test::ReduceFunctorFinalTag<execution_space>; + using value_type = typename functor_type::value_type; + + enum { Count = 3 }; + enum { Repeat = 100 }; + + value_type result[Repeat]; + + const uint64_t nw = nwork; + const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); + + for (unsigned i = 0; i < Repeat; ++i) { + if (i % 2 == 0) { + Kokkos::parallel_reduce( + Kokkos::RangePolicy<execution_space, ReducerTag>(0, nwork), + functor_type(nwork), result[i]); + } else { + Kokkos::parallel_reduce( + "Reduce", + Kokkos::RangePolicy<execution_space, ReducerTag>(0, nwork), + functor_type(nwork), result[i]); + } + } + + for (unsigned i = 0; i < Repeat; ++i) { + for (unsigned j = 0; j < Count; ++j) { + const uint64_t correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ((ScalarType)correct, 1 - result[i].value[j]); + } + } + } }; template <typename ScalarType, class DeviceType> diff --git a/packages/kokkos/core/unit_test/TestReduceCombinatorical.hpp b/packages/kokkos/core/unit_test/TestReduceCombinatorical.hpp index f8c693b9602fb5b11cd99999813583c772b6e70b..68e7d746dd91a68046c4d074884ef5aef7519427 100644 --- a/packages/kokkos/core/unit_test/TestReduceCombinatorical.hpp +++ b/packages/kokkos/core/unit_test/TestReduceCombinatorical.hpp @@ -102,8 +102,6 @@ struct FunctorScalar<0> { void operator()(const int& i, double& update) const { update += i; } }; -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL template <> struct FunctorScalar<1> { using team_type = Kokkos::TeamPolicy<>::member_type; @@ -117,7 +115,6 @@ struct FunctorScalar<1> { update += 1.0 / team.team_size() * team.league_rank(); } }; -#endif template <int ISTEAM> struct FunctorScalarInit; @@ -135,8 +132,6 @@ struct FunctorScalarInit<0> { void init(double& update) const { update = 0.0; } }; -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL template <> struct FunctorScalarInit<1> { using team_type = Kokkos::TeamPolicy<>::member_type; @@ -153,7 +148,6 @@ struct FunctorScalarInit<1> { KOKKOS_INLINE_FUNCTION void init(double& update) const { update = 0.0; } }; -#endif template <int ISTEAM> struct FunctorScalarFinal; @@ -171,8 +165,6 @@ struct FunctorScalarFinal<0> { void final(double& update) const { result() = update; } }; -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL template <> struct FunctorScalarFinal<1> { using team_type = Kokkos::TeamPolicy<>::member_type; @@ -189,7 +181,6 @@ struct FunctorScalarFinal<1> { KOKKOS_INLINE_FUNCTION void final(double& update) const { result() = update; } }; -#endif template <int ISTEAM> struct FunctorScalarJoin; @@ -209,8 +200,6 @@ struct FunctorScalarJoin<0> { } }; -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL template <> struct FunctorScalarJoin<1> { using team_type = Kokkos::TeamPolicy<>::member_type; @@ -229,7 +218,6 @@ struct FunctorScalarJoin<1> { dst += update; } }; -#endif template <int ISTEAM> struct FunctorScalarJoinFinal; @@ -252,8 +240,6 @@ struct FunctorScalarJoinFinal<0> { void final(double& update) const { result() = update; } }; -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL template <> struct FunctorScalarJoinFinal<1> { using team_type = Kokkos::TeamPolicy<>::member_type; @@ -275,7 +261,6 @@ struct FunctorScalarJoinFinal<1> { KOKKOS_INLINE_FUNCTION void final(double& update) const { result() = update; } }; -#endif template <int ISTEAM> struct FunctorScalarJoinInit; @@ -298,8 +283,6 @@ struct FunctorScalarJoinInit<0> { void init(double& update) const { update = 0.0; } }; -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL template <> struct FunctorScalarJoinInit<1> { using team_type = Kokkos::TeamPolicy<>::member_type; @@ -321,7 +304,6 @@ struct FunctorScalarJoinInit<1> { KOKKOS_INLINE_FUNCTION void init(double& update) const { update = 0.0; } }; -#endif template <int ISTEAM> struct FunctorScalarJoinFinalInit; @@ -347,8 +329,6 @@ struct FunctorScalarJoinFinalInit<0> { void init(double& update) const { update = 0.0; } }; -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL template <> struct FunctorScalarJoinFinalInit<1> { using team_type = Kokkos::TeamPolicy<>::member_type; @@ -373,7 +353,6 @@ struct FunctorScalarJoinFinalInit<1> { KOKKOS_INLINE_FUNCTION void init(double& update) const { update = 0.0; } }; -#endif struct Functor1 { KOKKOS_INLINE_FUNCTION diff --git a/packages/kokkos/core/unit_test/TestReducers.hpp b/packages/kokkos/core/unit_test/TestReducers.hpp index a8ffe3c0c245b90981ccbddca7f5085f7d0d18c0..35f0e231fd2a7b1e88bbf4be568532aa5c219e3f 100644 --- a/packages/kokkos/core/unit_test/TestReducers.hpp +++ b/packages/kokkos/core/unit_test/TestReducers.hpp @@ -1015,7 +1015,12 @@ struct TestReducers { test_minloc(10003); test_max(10007); test_maxloc(10007); + // FIXME_OPENMPTARGET - The minmaxloc test fails in the Release and + // RelWithDebInfo builds for the OPENMPTARGET backend but passes in Debug + // mode. +#if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_minmaxloc(10007); +#endif } // NOTE test_prod generates N random numbers between 1 and 4. @@ -1028,7 +1033,12 @@ struct TestReducers { test_minloc(10003); test_max(10007); test_maxloc(10007); + // FIXME_OPENMPTARGET - The minmaxloc test fails in the Release and + // RelWithDebInfo builds for the OPENMPTARGET backend but passes in Debug + // mode. +#if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_minmaxloc(10007); +#endif test_BAnd(35); test_BOr(35); test_LAnd(35); diff --git a/packages/kokkos/core/unit_test/TestReducers_d.hpp b/packages/kokkos/core/unit_test/TestReducers_d.hpp index 44545a89dd93c0b02d6f9130d2fa96a2dcaa93b3..e2254a1c1fe653b22c3e6b9a9ebad50d07a9eb89 100644 --- a/packages/kokkos/core/unit_test/TestReducers_d.hpp +++ b/packages/kokkos/core/unit_test/TestReducers_d.hpp @@ -54,8 +54,14 @@ TEST(TEST_CATEGORY, reducers_complex_double) { TEST(TEST_CATEGORY, reducers_struct) { TestReducers<array_reduce<float, 1>, TEST_EXECSPACE>::test_sum(1031); TestReducers<array_reduce<float, 2>, TEST_EXECSPACE>::test_sum(1031); - TestReducers<array_reduce<float, 3>, TEST_EXECSPACE>::test_sum(1031); TestReducers<array_reduce<float, 4>, TEST_EXECSPACE>::test_sum(1031); + // FIXME_OPENMPTARGET - The size of data in array_reduce has to be a power of + // 2 for OPENMPTARGET backend in Release and RelWithDebInfo builds. +#ifdef KOKKOS_ENABLE_OPENMPTARGET + TestReducers<array_reduce<float, 8>, TEST_EXECSPACE>::test_sum(1031); +#else + TestReducers<array_reduce<float, 3>, TEST_EXECSPACE>::test_sum(1031); TestReducers<array_reduce<float, 7>, TEST_EXECSPACE>::test_sum(1031); +#endif } } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestReductions_DeviceView.hpp b/packages/kokkos/core/unit_test/TestReductions_DeviceView.hpp index d82709b300dc52110a73d1850cb5cb6378955224..17563de335e5b6a6170985e392ea8ae0de5ae8c1 100644 --- a/packages/kokkos/core/unit_test/TestReductions_DeviceView.hpp +++ b/packages/kokkos/core/unit_test/TestReductions_DeviceView.hpp @@ -109,7 +109,12 @@ struct TeamPolicyFunctor { } // namespace TEST(TEST_CATEGORY, reduce_device_view_range_policy) { + // Avoid running out of memory +#ifdef KOKKOS_ENABLE_SYCL + int N = 100 * 1024 * 1024; +#else int N = 1000 * 1024 * 1024; +#endif test_reduce_device_view(N, Kokkos::RangePolicy<TEST_EXECSPACE>(0, N), RangePolicyFunctor()); } @@ -126,10 +131,19 @@ TEST(TEST_CATEGORY, reduce_device_view_mdrange_policy) { // FIXME_HIP #ifndef KOKKOS_ENABLE_HIP TEST(TEST_CATEGORY, reduce_device_view_team_policy) { +// FIXME_SYCL The number of workgroups on CUDA devices can not be larger than +// 65535 +#ifdef KOKKOS_ENABLE_SYCL + int N = 63 * 1024 * 1024; + test_reduce_device_view( + N, Kokkos::TeamPolicy<TEST_EXECSPACE>(63 * 1024, Kokkos::AUTO), + TeamPolicyFunctor(1024)); +#else int N = 1000 * 1024 * 1024; test_reduce_device_view( N, Kokkos::TeamPolicy<TEST_EXECSPACE>(1000 * 1024, Kokkos::AUTO), TeamPolicyFunctor(1024)); +#endif } #endif } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestResize.hpp b/packages/kokkos/core/unit_test/TestResize.hpp index 0ab6e10c49e95ec115fb76ed16ac7df10b256534..cf5c0df6f9163039fbd3ca1df8aee2a4b24ac882 100644 --- a/packages/kokkos/core/unit_test/TestResize.hpp +++ b/packages/kokkos/core/unit_test/TestResize.hpp @@ -76,8 +76,6 @@ void impl_testResize() { const int* newPointer = view_1d.data(); EXPECT_TRUE(oldPointer == newPointer); } - // FIXME_SYCL needs MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL { using view_type = Kokkos::View<int**, DeviceType>; view_type view_2d("view_2d", sizes[0], sizes[1]); @@ -149,7 +147,6 @@ void impl_testResize() { const int* newPointer = view_8d.data(); EXPECT_TRUE(oldPointer == newPointer); } -#endif // Resize without initialization: check if data preserved { using view_type = Kokkos::View<int*, DeviceType>; @@ -172,8 +169,6 @@ void impl_testResize() { } EXPECT_TRUE(test == true); } - // FIXME_SYCL requires MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL { using view_type = Kokkos::View<int**, DeviceType>; view_type view_2d("view_2d", sizes[0], sizes[1]); @@ -389,7 +384,6 @@ void impl_testResize() { } EXPECT_TRUE(test == true); } -#endif } template <class DeviceType> diff --git a/packages/kokkos/core/unit_test/TestScan.hpp b/packages/kokkos/core/unit_test/TestScan.hpp index 138570f445ea69efcae9311abeabb036d68af45b..67cb85553d6bf7ccd9cc76b85f7bc32bb0e2e5a7 100644 --- a/packages/kokkos/core/unit_test/TestScan.hpp +++ b/packages/kokkos/core/unit_test/TestScan.hpp @@ -75,15 +75,11 @@ struct TestScan { if (answer != update) { int fail = errors()++; - // FIXME_SYCL -#ifndef KOKKOS_ENABLE_SYCL if (fail < 20) { - printf("TestScan(%d,%ld) != %ld\n", iwork, static_cast<long>(update), - static_cast<long>(answer)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF("TestScan(%d,%ld) != %ld\n", iwork, + static_cast<long>(update), + static_cast<long>(answer)); } -#else - (void)fail; -#endif } } } diff --git a/packages/kokkos/core/unit_test/TestSharedAlloc.hpp b/packages/kokkos/core/unit_test/TestSharedAlloc.hpp index bb00a95824909cde25b557f652670753cb8c12f1..b5eb77dc2a964fe1066048b2edfac61d531b4fab 100644 --- a/packages/kokkos/core/unit_test/TestSharedAlloc.hpp +++ b/packages/kokkos/core/unit_test/TestSharedAlloc.hpp @@ -239,6 +239,9 @@ TEST(TEST_CATEGORY, impl_shared_alloc) { #elif (TEST_CATEGORY_NUMBER == 6) // hip test_shared_alloc<Kokkos::Experimental::HIPSpace, Kokkos::DefaultHostExecutionSpace>(); +#elif (TEST_CATEGORY_NUMBER == 7) // sycl + test_shared_alloc<Kokkos::Experimental::SYCLDeviceUSMSpace, + Kokkos::DefaultHostExecutionSpace>(); #endif #else test_shared_alloc<TEST_EXECSPACE, Kokkos::DefaultHostExecutionSpace>(); diff --git a/packages/kokkos/core/unit_test/TestCuda_Category.hpp b/packages/kokkos/core/unit_test/TestSubView_c14.hpp similarity index 88% rename from packages/kokkos/core/unit_test/TestCuda_Category.hpp rename to packages/kokkos/core/unit_test/TestSubView_c14.hpp index 7c572e3a0887527e48ae32ea1ca9f641f2fbb22e..e6510c83a603481a9b8de0367894ec98407faba3 100644 --- a/packages/kokkos/core/unit_test/TestCuda_Category.hpp +++ b/packages/kokkos/core/unit_test/TestSubView_c14.hpp @@ -42,14 +42,15 @@ //@HEADER */ -#ifndef KOKKOS_TEST_CUDA_HPP -#define KOKKOS_TEST_CUDA_HPP +#ifndef KOKKOS_TEST_SUBVIEW_C14_HPP +#define KOKKOS_TEST_SUBVIEW_C14_HPP +#include <TestViewSubview.hpp> -#include <gtest/gtest.h> +namespace Test { -#define TEST_CATEGORY cuda -#define TEST_CATEGORY_NUMBER 5 -#define TEST_CATEGORY_DEATH cuda_DeathTest -#define TEST_EXECSPACE Kokkos::Cuda +TEST(TEST_CATEGORY, view_subview_memory_traits_construction) { + TestViewSubview::test_subview_memory_traits_construction(); +} +} // namespace Test #endif diff --git a/packages/kokkos/core/unit_test/TestTeam.hpp b/packages/kokkos/core/unit_test/TestTeam.hpp index 628def9be5b71635a260b6773a6b347858742677..97ddfd4cf58518bfa494eedf4445ba68fdb1132a 100644 --- a/packages/kokkos/core/unit_test/TestTeam.hpp +++ b/packages/kokkos/core/unit_test/TestTeam.hpp @@ -62,10 +62,18 @@ struct TestTeamPolicy { view_type m_flags; TestTeamPolicy(const size_t league_size) - : m_flags(Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), - Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 1).team_size_max( - *this, Kokkos::ParallelReduceTag()), - league_size) {} + : m_flags( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), + // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 +#ifdef KOKKOS_ENABLE_OPENMPTARGET + Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 32).team_size_max( + *this, Kokkos::ParallelReduceTag()), +#else + Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 1).team_size_max( + *this, Kokkos::ParallelReduceTag()), +#endif + league_size) { + } struct VerifyInitTag {}; @@ -87,9 +95,10 @@ struct TestTeamPolicy { member.team_rank() + member.team_size() * member.league_rank(); if (tid != m_flags(member.team_rank(), member.league_rank())) { - printf("TestTeamPolicy member(%d,%d) error %d != %d\n", - member.league_rank(), member.team_rank(), tid, - m_flags(member.team_rank(), member.league_rank())); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "TestTeamPolicy member(%d,%d) error %d != %d\n", member.league_rank(), + member.team_rank(), tid, + m_flags(member.team_rank(), member.league_rank())); } } @@ -120,31 +129,57 @@ struct TestTeamPolicy { static void test_constructors() { constexpr const int smallest_work = 1; + // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 +#ifdef KOKKOS_ENABLE_OPENMPTARGET + Kokkos::TeamPolicy<ExecSpace, NoOpTag> none_auto(smallest_work, 32, + smallest_work); +#else Kokkos::TeamPolicy<ExecSpace, NoOpTag> none_auto( smallest_work, smallest_work, smallest_work); +#endif Kokkos::TeamPolicy<ExecSpace, NoOpTag> both_auto( smallest_work, Kokkos::AUTO(), Kokkos::AUTO()); + // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 +#ifdef KOKKOS_ENABLE_OPENMPTARGET + Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_vector(smallest_work, 32, + Kokkos::AUTO()); +#else Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_vector( smallest_work, smallest_work, Kokkos::AUTO()); +#endif Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_team( smallest_work, Kokkos::AUTO(), smallest_work); } static void test_for(const size_t league_size) { - TestTeamPolicy functor(league_size); - using policy_type = Kokkos::TeamPolicy<ScheduleType, ExecSpace>; - using policy_type_init = - Kokkos::TeamPolicy<ScheduleType, ExecSpace, VerifyInitTag>; - - const int team_size = policy_type(league_size, 1) - .team_size_max(functor, Kokkos::ParallelForTag()); - const int team_size_init = - policy_type_init(league_size, 1) - .team_size_max(functor, Kokkos::ParallelForTag()); + { + TestTeamPolicy functor(league_size); + using policy_type = Kokkos::TeamPolicy<ScheduleType, ExecSpace>; + using policy_type_init = + Kokkos::TeamPolicy<ScheduleType, ExecSpace, VerifyInitTag>; + + // FIXME_OPENMPTARGET temporary restriction for team size to be at least + // 32 +#ifdef KOKKOS_ENABLE_OPENMPTARGET + const int team_size = + policy_type(league_size, 32) + .team_size_max(functor, Kokkos::ParallelForTag()); + const int team_size_init = + policy_type_init(league_size, 32) + .team_size_max(functor, Kokkos::ParallelForTag()); +#else + const int team_size = + policy_type(league_size, 1) + .team_size_max(functor, Kokkos::ParallelForTag()); + const int team_size_init = + policy_type_init(league_size, 1) + .team_size_max(functor, Kokkos::ParallelForTag()); +#endif - Kokkos::parallel_for(policy_type(league_size, team_size), functor); - Kokkos::parallel_for(policy_type_init(league_size, team_size_init), - functor); + Kokkos::parallel_for(policy_type(league_size, team_size), functor); + Kokkos::parallel_for(policy_type_init(league_size, team_size_init), + functor); + } test_small_league_size(); test_constructors(); @@ -173,9 +208,16 @@ struct TestTeamPolicy { using policy_type_reduce = Kokkos::TeamPolicy<ScheduleType, ExecSpace, ReduceTag>; + // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 +#ifdef KOKKOS_ENABLE_OPENMPTARGET + const int team_size = + policy_type_reduce(league_size, 32) + .team_size_max(functor, Kokkos::ParallelReduceTag()); +#else const int team_size = policy_type_reduce(league_size, 1) .team_size_max(functor, Kokkos::ParallelReduceTag()); +#endif const int64_t N = team_size * league_size; @@ -353,7 +395,7 @@ class ScanTeamFunctor { ind.team_reduce(Kokkos::Max<int64_t>(m)); if (m != ind.league_rank() + (ind.team_size() - 1)) { - printf( + KOKKOS_IMPL_DO_NOT_USE_PRINTF( "ScanTeamFunctor[%i.%i of %i.%i] reduce_max_answer(%li) != " "reduce_max(%li)\n", static_cast<int>(ind.league_rank()), @@ -375,7 +417,7 @@ class ScanTeamFunctor { ind.team_scan(ind.league_rank() + 1 + ind.team_rank() + 1); if (answer != result || answer != result2) { - printf( + KOKKOS_IMPL_DO_NOT_USE_PRINTF( "ScanTeamFunctor[%i.%i of %i.%i] answer(%li) != scan_first(%li) or " "scan_second(%li)\n", static_cast<int>(ind.league_rank()), @@ -476,7 +518,7 @@ struct SharedTeamFunctor { if ((shared_A.data() == nullptr && SHARED_COUNT > 0) || (shared_B.data() == nullptr && SHARED_COUNT > 0)) { - printf( + KOKKOS_IMPL_DO_NOT_USE_PRINTF( "member( %i/%i , %i/%i ) Failed to allocate shared memory of size " "%lu\n", static_cast<int>(ind.league_rank()), @@ -522,12 +564,21 @@ struct TestSharedTeam { Kokkos::View<typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>; +#ifdef KOKKOS_ENABLE_OPENMPTARGET + const size_t team_size = + Kokkos::TeamPolicy<ScheduleType, ExecSpace>(64, 32).team_size_max( + Functor(), Kokkos::ParallelReduceTag()); + + Kokkos::TeamPolicy<ScheduleType, ExecSpace> team_exec(32 / team_size, + team_size); +#else const size_t team_size = Kokkos::TeamPolicy<ScheduleType, ExecSpace>(8192, 1).team_size_max( Functor(), Kokkos::ParallelReduceTag()); Kokkos::TeamPolicy<ScheduleType, ExecSpace> team_exec(8192 / team_size, team_size); +#endif typename Functor::value_type error_count = 0; @@ -559,7 +610,11 @@ struct TestLambdaSharedTeam { Kokkos::View<int *, shmem_space, Kokkos::MemoryUnmanaged>; const int SHARED_COUNT = 1000; - int team_size = 1; +#ifdef KOKKOS_ENABLE_OPENMPTARGET + int team_size = 32; +#else + int team_size = 1; +#endif #ifdef KOKKOS_ENABLE_CUDA if (std::is_same<ExecSpace, Kokkos::Cuda>::value) team_size = 128; @@ -583,8 +638,9 @@ struct TestLambdaSharedTeam { if ((shared_A.data() == nullptr && SHARED_COUNT > 0) || (shared_B.data() == nullptr && SHARED_COUNT > 0)) { - printf("Failed to allocate shared memory of size %lu\n", - static_cast<unsigned long>(SHARED_COUNT)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "Failed to allocate shared memory of size %lu\n", + static_cast<unsigned long>(SHARED_COUNT)); ++update; // Failure to allocate is an error. } else { @@ -650,8 +706,9 @@ struct ScratchTeamFunctor { if ((scratch_ptr.data() == nullptr) || (scratch_A.data() == nullptr && SHARED_TEAM_COUNT > 0) || (scratch_B.data() == nullptr && SHARED_THREAD_COUNT > 0)) { - printf("Failed to allocate shared memory of size %lu\n", - static_cast<unsigned long>(SHARED_TEAM_COUNT)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "Failed to allocate shared memory of size %lu\n", + static_cast<unsigned long>(SHARED_TEAM_COUNT)); ++update; // Failure to allocate is an error. } else { @@ -713,11 +770,19 @@ struct TestScratchTeam { int thread_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_THREAD_COUNT); +#ifdef KOKKOS_ENABLE_OPENMPTARGET + p_type team_exec = p_type(64, 32).set_scratch_size( + 1, + Kokkos::PerTeam(Functor::shared_int_array_type::shmem_size( + Functor::SHARED_TEAM_COUNT)), + Kokkos::PerThread(thread_scratch_size + 3 * sizeof(int))); +#else p_type team_exec = p_type(8192, 1).set_scratch_size( 1, Kokkos::PerTeam(Functor::shared_int_array_type::shmem_size( Functor::SHARED_TEAM_COUNT)), Kokkos::PerThread(thread_scratch_size + 3 * sizeof(int))); +#endif const size_t team_size = team_exec.team_size_max(Functor(), Kokkos::ParallelReduceTag()); @@ -726,7 +791,11 @@ struct TestScratchTeam { Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) + Functor::shared_int_array_type::shmem_size(3 * team_size); - team_exec = p_type(8192 / team_size, team_size); +#ifdef KOKKOS_ENABLE_OPENMPTARGET + team_exec = p_type(64 / team_size, team_size); +#else + team_exec = p_type(8192 / team_size, team_size); +#endif Kokkos::parallel_reduce( team_exec.set_scratch_size(1, Kokkos::PerTeam(team_scratch_size), @@ -781,16 +850,17 @@ KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body( }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, 16), [&](const int &i) { - a_thread1(i) = 1000000 + 100000 * team.team_rank() + 16 - i + - team.league_rank() * 100000; - a_thread2(i) = 2000000 + 100000 * team.team_rank() + 16 - i + - team.league_rank() * 100000; - a_thread3(i) = 3000000 + 100000 * team.team_rank() + 16 - i + - team.league_rank() * 100000; - }); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, int(0), unsigned(16)), + [&](const int &i) { + a_thread1(i) = 1000000 + 100000 * team.team_rank() + + 16 - i + team.league_rank() * 100000; + a_thread2(i) = 2000000 + 100000 * team.team_rank() + + 16 - i + team.league_rank() * 100000; + a_thread3(i) = 3000000 + 100000 * team.team_rank() + + 16 - i + team.league_rank() * 100000; + }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0, 12800), + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, int(0), unsigned(12800)), [&](const int &i) { b_team1(i) = 1000000 + i + team.league_rank() * 100000; b_team2(i) = 2000000 + i + team.league_rank() * 100000; @@ -1218,8 +1288,16 @@ struct TestTeamBroadcast< using policy_type_f = Kokkos::TeamPolicy<ScheduleType, ExecSpace, BroadcastTag>; + // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 +#ifdef KOKKOS_ENABLE_OPENMPTARGET + int fake_team_size = + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value ? 32 + : 1; +#else + int fake_team_size = 1; +#endif const int team_size = - policy_type_f(league_size, 1) + policy_type_f(league_size, fake_team_size) .team_size_max( functor, Kokkos:: @@ -1364,13 +1442,20 @@ struct TestTeamBroadcast< using policy_type_f = Kokkos::TeamPolicy<ScheduleType, ExecSpace, BroadcastTag>; + // FIXME_OPENMPTARGET temporary restriction for team size to be at least 32 +#ifdef KOKKOS_ENABLE_OPENMPTARGET + int fake_team_size = + std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value ? 32 + : 1; +#else + int fake_team_size = 1; +#endif const int team_size = - policy_type_f(league_size, 1) + policy_type_f(league_size, fake_team_size) .team_size_max( functor, Kokkos:: ParallelReduceTag()); // printf("team_size=%d\n",team_size); - // team_broadcast with value value_type total = 0; @@ -1422,10 +1507,15 @@ struct TestScratchAlignment { Kokkos::View<int *, typename ExecSpace::scratch_memory_space>; void test(bool allocate_small) { int shmem_size = ScratchView::shmem_size(11); +#ifdef KOKKOS_ENABLE_OPENMPTARGET + int team_size = 32; +#else + int team_size = 1; +#endif if (allocate_small) shmem_size += ScratchViewInt::shmem_size(1); Kokkos::parallel_for( - Kokkos::TeamPolicy<ExecSpace>(1, 1).set_scratch_size( - 0, Kokkos::PerTeam(shmem_size)), + Kokkos::TeamPolicy<ExecSpace>(1, team_size) + .set_scratch_size(0, Kokkos::PerTeam(shmem_size)), KOKKOS_LAMBDA( const typename Kokkos::TeamPolicy<ExecSpace>::member_type &team) { if (allocate_small) ScratchViewInt p(team.team_scratch(0), 1); @@ -1439,6 +1529,38 @@ struct TestScratchAlignment { } // namespace +namespace { + +template <class ExecSpace> +struct TestTeamPolicyHandleByValue { + using scalar = double; + using exec_space = ExecSpace; + using mem_space = typename ExecSpace::memory_space; + + TestTeamPolicyHandleByValue() { test(); } + + void test() { +#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + const int M = 1, N = 1; + Kokkos::View<scalar **, mem_space> a("a", M, N); + Kokkos::View<scalar **, mem_space> b("b", M, N); + Kokkos::deep_copy(a, 0.0); + Kokkos::deep_copy(b, 1.0); + Kokkos::parallel_for( + "test_tphandle_by_value", + Kokkos::TeamPolicy<exec_space>(M, Kokkos::AUTO(), 1), + KOKKOS_LAMBDA( + const typename Kokkos::TeamPolicy<exec_space>::member_type team) { + const int i = team.league_rank(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0, N), + [&](const int j) { a(i, j) += b(i, j); }); + }); +#endif + } +}; + +} // namespace + } // namespace Test /*--------------------------------------------------------------------------*/ diff --git a/packages/kokkos/core/unit_test/TestTeamBasic.hpp b/packages/kokkos/core/unit_test/TestTeamBasic.hpp index 1700a74124d377b17944203cad01a8a31c6c16cd..87c010ac2a0c5701916049532a715c6a5addce15 100644 --- a/packages/kokkos/core/unit_test/TestTeamBasic.hpp +++ b/packages/kokkos/core/unit_test/TestTeamBasic.hpp @@ -65,6 +65,8 @@ TEST(TEST_CATEGORY, team_for) { 1000); } +// FIXME_OPENMPTARGET wrong results +#ifndef KOKKOS_ENABLE_OPENMPTARGET TEST(TEST_CATEGORY, team_reduce) { TestTeamPolicy<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); @@ -79,42 +81,31 @@ TEST(TEST_CATEGORY, team_reduce) { TestTeamPolicy<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); } - -TEST(TEST_CATEGORY, team_broadcast_long) { - // FIXME_OPENMPTARGET -#ifdef KOKKOS_ENABLE_OPENMPTARGET - if constexpr (!std::is_same<TEST_EXECSPACE, - Kokkos::Experimental::OpenMPTarget>::value) #endif - { - TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, - long>::test_teambroadcast(0, 1); - TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>, - long>::test_teambroadcast(0, 1); - - TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, - long>::test_teambroadcast(2, 1); - TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>, - long>::test_teambroadcast(2, 1); - - TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, - long>::test_teambroadcast(16, 1); - TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>, - long>::test_teambroadcast(16, 1); - TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, - long>::test_teambroadcast(1000, 1); - TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>, - long>::test_teambroadcast(1000, 1); - } +TEST(TEST_CATEGORY, team_broadcast_long) { + TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, + long>::test_teambroadcast(0, 1); + TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>, + long>::test_teambroadcast(0, 1); + + TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, + long>::test_teambroadcast(2, 1); + TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>, + long>::test_teambroadcast(2, 1); + + TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, + long>::test_teambroadcast(16, 1); + TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>, + long>::test_teambroadcast(16, 1); + + TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, + long>::test_teambroadcast(1000, 1); + TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic>, + long>::test_teambroadcast(1000, 1); } TEST(TEST_CATEGORY, team_broadcast_char) { - // FIXME_OPENMPTARGET -#ifdef KOKKOS_ENABLE_OPENMPTARGET - if constexpr (!std::is_same<TEST_EXECSPACE, - Kokkos::Experimental::OpenMPTarget>::value) -#endif { TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, unsigned char>::test_teambroadcast(0, 1); @@ -139,11 +130,6 @@ TEST(TEST_CATEGORY, team_broadcast_char) { } TEST(TEST_CATEGORY, team_broadcast_float) { - // FIXME_OPENMPTARGET -#ifdef KOKKOS_ENABLE_OPENMPTARGET - if constexpr (!std::is_same<TEST_EXECSPACE, - Kokkos::Experimental::OpenMPTarget>::value) -#endif { TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, float>::test_teambroadcast(0, 1.3); @@ -178,11 +164,6 @@ TEST(TEST_CATEGORY, team_broadcast_float) { } TEST(TEST_CATEGORY, team_broadcast_double) { - // FIXME_OPENMPTARGET -#ifdef KOKKOS_ENABLE_OPENMPTARGET - if constexpr (!std::is_same<TEST_EXECSPACE, - Kokkos::Experimental::OpenMPTarget>::value) -#endif { TestTeamBroadcast<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static>, double>::test_teambroadcast(0, 1.3); @@ -217,6 +198,10 @@ TEST(TEST_CATEGORY, team_broadcast_double) { } } +TEST(TEST_CATEGORY, team_handle_by_value) { + { TestTeamPolicyHandleByValue<TEST_EXECSPACE>(); } +} + } // namespace Test #ifndef KOKKOS_ENABLE_OPENMPTARGET diff --git a/packages/kokkos/core/unit_test/TestTeamScratch.hpp b/packages/kokkos/core/unit_test/TestTeamScratch.hpp index fd0f052b765253b96fd283861314d68d6f2d81a4..75ca3587629ded5f5cc2dd2f3b8ef6623e8a07f7 100644 --- a/packages/kokkos/core/unit_test/TestTeamScratch.hpp +++ b/packages/kokkos/core/unit_test/TestTeamScratch.hpp @@ -80,6 +80,10 @@ TEST(TEST_CATEGORY, shmem_size) { TestShmemSize<TEST_EXECSPACE>(); } TEST(TEST_CATEGORY, multi_level_scratch) { // FIXME_HIP the parallel_for and the parallel_reduce in this test requires a // team size larger than 256. Fixed In ROCm 3.9 + // FIXME_OPENMPTARGET This unit test needs ~350KB of scratch memory for L0 and + // L1 combined per team. Currently OpenMPTarget cannot allocate this high + // amount of scratch memory. +#if !defined(KOKKOS_ENABLE_OPENMPTARGET) #if defined(KOKKOS_ENABLE_HIP) && (HIP_VERSION < 309) if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::HIP>::value) #endif @@ -89,6 +93,7 @@ TEST(TEST_CATEGORY, multi_level_scratch) { TestMultiLevelScratchTeam<TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); } +#endif } } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp index 68f99fa3a0f99a376ff355a4ac9fb5b4f3fefd84..992e80397bacb9b5dc9a0746ca2543a1792cce22 100644 --- a/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp +++ b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp @@ -145,8 +145,14 @@ template <class T, int N, class PolicyType> void test_team_policy_max_recommended(int scratch_size) { test_team_policy_max_recommended_static_size<T, N, PolicyType, 1>( scratch_size); + // FIXME_SYCL prevent running out of total kernel argument size limit +#ifdef KOKKOS_ENABLE_SYCL + test_team_policy_max_recommended_static_size<T, N, PolicyType, 100>( + scratch_size); +#else test_team_policy_max_recommended_static_size<T, N, PolicyType, 1000>( scratch_size); +#endif } TEST(TEST_CATEGORY, team_policy_max_recommended) { @@ -186,7 +192,8 @@ template <typename TeamHandleType, typename ReducerValueType> struct PrintFunctor1 { KOKKOS_INLINE_FUNCTION void operator()(const TeamHandleType& team, ReducerValueType&) const { - printf("Test %i %i\n", int(team.league_rank()), int(team.team_rank())); + KOKKOS_IMPL_DO_NOT_USE_PRINTF("Test %i %i\n", int(team.league_rank()), + int(team.team_rank())); } }; @@ -194,7 +201,8 @@ template <typename TeamHandleType, typename ReducerValueType> struct PrintFunctor2 { KOKKOS_INLINE_FUNCTION void operator()(const TeamHandleType& team, ReducerValueType& teamVal) const { - printf("Test %i %i\n", int(team.league_rank()), int(team.team_rank())); + KOKKOS_IMPL_DO_NOT_USE_PRINTF("Test %i %i\n", int(team.league_rank()), + int(team.team_rank())); teamVal += 1; } }; diff --git a/packages/kokkos/core/unit_test/TestTeamVector.hpp b/packages/kokkos/core/unit_test/TestTeamVector.hpp index c2f47c9ec74e3c81e37a824758c3e292c0d58322..ba11dc07a962989f2826a3d0def3649112c00da6 100644 --- a/packages/kokkos/core/unit_test/TestTeamVector.hpp +++ b/packages/kokkos/core/unit_test/TestTeamVector.hpp @@ -76,9 +76,11 @@ struct functor_team_for { const size_type shmemSize = team.team_size() * 13; shared_int values = shared_int(team.team_shmem(), shmemSize); - if (values.data() == nullptr || values.extent(0) < shmemSize) { - printf("FAILED to allocate shared memory of size %u\n", - static_cast<unsigned int>(shmemSize)); + if (values.data() == nullptr || + static_cast<size_type>(values.extent(0)) < shmemSize) { + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED to allocate shared memory of size %u\n", + static_cast<unsigned int>(shmemSize)); } else { // Initialize shared memory. values(team.team_rank()) = 0; @@ -108,9 +110,10 @@ struct functor_team_for { } if (test != value) { - printf("FAILED team_parallel_for %i %i %f %f\n", team.league_rank(), - team.team_rank(), static_cast<double>(test), - static_cast<double>(value)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED team_parallel_for %i %i %f %f\n", team.league_rank(), + team.team_rank(), static_cast<double>(test), + static_cast<double>(value)); flag() = 1; } }); @@ -166,17 +169,18 @@ struct functor_team_reduce { if (test != value) { if (team.league_rank() == 0) { - printf("FAILED team_parallel_reduce %i %i %lf %lf %lu\n", - team.league_rank(), team.team_rank(), - static_cast<double>(test), static_cast<double>(value), - static_cast<unsigned long>(sizeof(Scalar))); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED team_parallel_reduce %i %i %lf %lf %lu\n", + team.league_rank(), team.team_rank(), static_cast<double>(test), + static_cast<double>(value), + static_cast<unsigned long>(sizeof(Scalar))); } flag() = 1; } if (test != shared_value(0)) { if (team.league_rank() == 0) { - printf( + KOKKOS_IMPL_DO_NOT_USE_PRINTF( "FAILED team_parallel_reduce with shared result %i %i %lf %lf " "%lu\n", team.league_rank(), team.team_rank(), static_cast<double>(test), @@ -237,14 +241,15 @@ struct functor_team_reduce_reducer { } if (test != value) { - printf("FAILED team_vector_parallel_reduce_reducer %i %i %lf %lf\n", - team.league_rank(), team.team_rank(), static_cast<double>(test), - static_cast<double>(value)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED team_vector_parallel_reduce_reducer %i %i %lf %lf\n", + team.league_rank(), team.team_rank(), static_cast<double>(test), + static_cast<double>(value)); flag() = 1; } if (test != shared_value(0)) { - printf( + KOKKOS_IMPL_DO_NOT_USE_PRINTF( "FAILED team_vector_parallel_reduce_reducer shared value %i %i %lf " "%lf\n", team.league_rank(), team.team_rank(), static_cast<double>(test), @@ -281,9 +286,11 @@ struct functor_team_vector_for { const size_type shmemSize = team.team_size() * 13; shared_int values = shared_int(team.team_shmem(), shmemSize); - if (values.data() == nullptr || values.extent(0) < shmemSize) { - printf("FAILED to allocate shared memory of size %u\n", - static_cast<unsigned int>(shmemSize)); + if (values.data() == nullptr || + static_cast<size_type>(values.extent(0)) < shmemSize) { + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED to allocate shared memory of size %u\n", + static_cast<unsigned int>(shmemSize)); } else { team.team_barrier(); @@ -313,9 +320,10 @@ struct functor_team_vector_for { } if (test != value) { - printf("FAILED team_vector_parallel_for %i %i %f %f\n", - team.league_rank(), team.team_rank(), - static_cast<double>(test), static_cast<double>(value)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED team_vector_parallel_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), static_cast<double>(test), + static_cast<double>(value)); flag() = 1; } @@ -363,10 +371,11 @@ struct functor_team_vector_reduce { if (test != value) { if (team.league_rank() == 0) { - printf("FAILED team_vector_parallel_reduce %i %i %f %f %lu\n", - team.league_rank(), team.team_rank(), - static_cast<double>(test), static_cast<double>(value), - static_cast<unsigned long>(sizeof(Scalar))); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED team_vector_parallel_reduce %i %i %f %f %lu\n", + team.league_rank(), team.team_rank(), static_cast<double>(test), + static_cast<double>(value), + static_cast<unsigned long>(sizeof(Scalar))); } flag() = 1; @@ -414,9 +423,10 @@ struct functor_team_vector_reduce_reducer { } if (test != value) { - printf("FAILED team_vector_parallel_reduce_reducer %i %i %f %f\n", - team.league_rank(), team.team_rank(), static_cast<double>(test), - static_cast<double>(value)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED team_vector_parallel_reduce_reducer %i %i %f %f\n", + team.league_rank(), team.team_rank(), static_cast<double>(test), + static_cast<double>(value)); flag() = 1; } @@ -460,8 +470,9 @@ struct functor_vec_single { [&](int /*i*/, Scalar &val) { val += value; }, value2); if (value2 != (value * Scalar(nEnd - nStart))) { - printf("FAILED vector_single broadcast %i %i %f %f\n", team.league_rank(), - team.team_rank(), (double)value2, (double)value); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED vector_single broadcast %i %i %f %f\n", team.league_rank(), + team.team_rank(), (double)value2, (double)value); flag() = 1; } @@ -491,8 +502,8 @@ struct functor_vec_for { if (values.data() == nullptr || values.extent(0) < (unsigned)team.team_size() * 13) { - printf("FAILED to allocate memory of size %i\n", - static_cast<int>(team.team_size() * 13)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF("FAILED to allocate memory of size %i\n", + static_cast<int>(team.team_size() * 13)); flag() = 1; } else { Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, 13), [&](int i) { @@ -512,9 +523,10 @@ struct functor_vec_for { } if (test != value) { - printf("FAILED vector_par_for %i %i %f %f\n", team.league_rank(), - team.team_rank(), static_cast<double>(test), - static_cast<double>(value)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF("FAILED vector_par_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast<double>(test), + static_cast<double>(value)); flag() = 1; } @@ -548,8 +560,9 @@ struct functor_vec_red { for (int i = 0; i < 13; i++) test += i; if (test != value) { - printf("FAILED vector_par_reduce %i %i %f %f\n", team.league_rank(), - team.team_rank(), (double)test, (double)value); + KOKKOS_IMPL_DO_NOT_USE_PRINTF("FAILED vector_par_reduce %i %i %f %f\n", + team.league_rank(), team.team_rank(), + (double)test, (double)value); flag() = 1; } @@ -586,9 +599,9 @@ struct functor_vec_red_reducer { for (int i = 0; i < 13; i++) test *= (i % 5 + 1); if (test != value) { - printf("FAILED vector_par_reduce_reducer %i %i %f %f\n", - team.league_rank(), team.team_rank(), (double)test, - (double)value); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED vector_par_reduce_reducer %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double)test, (double)value); flag() = 1; } @@ -616,9 +629,10 @@ struct functor_vec_scan { for (int k = 0; k <= i; k++) test += k; if (test != val) { - printf("FAILED vector_par_scan %i %i %f %f\n", - team.league_rank(), team.team_rank(), - (double)test, (double)val); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED vector_par_scan %i %i %f %f\n", + team.league_rank(), team.team_rank(), + (double)test, (double)val); flag() = 1; } @@ -850,6 +864,120 @@ class TestTripleNestedReduce { #endif +namespace VectorScanReducer { +enum class ScanType : bool { Inclusive, Exclusive }; + +template <typename ExecutionSpace, ScanType scan_type, int n, + int n_vector_range, class Reducer> +struct checkScan { + const int n_team_thread_range = 1000; + const int n_per_team = n_team_thread_range * n_vector_range; + + using size_type = typename ExecutionSpace::size_type; + using value_type = typename Reducer::value_type; + using view_type = Kokkos::View<value_type[n], ExecutionSpace>; + + view_type inputs = view_type{"inputs"}; + view_type outputs = view_type{"outputs"}; + + value_type result; + Reducer reducer = {result}; + + struct ThreadVectorFunctor { + KOKKOS_FUNCTION void operator()(const size_type j, value_type &update, + const bool final) const { + const size_type element = j + m_team_offset + m_thread_offset; + const auto tmp = m_inputs(element); + if (scan_type == ScanType::Inclusive) { + m_reducer.join(update, tmp); + if (final) { + m_outputs(element) = update; + } + } else { + if (final) { + m_outputs(element) = update; + } + m_reducer.join(update, tmp); + } + } + + const Reducer &m_reducer; + const size_type &m_team_offset; + const size_type &m_thread_offset; + const view_type &m_outputs; + const view_type &m_inputs; + }; + + struct TeamThreadRangeFunctor { + KOKKOS_FUNCTION void operator()(const size_type i) const { + const size_type thread_offset = i * n_vector_range; + Kokkos::parallel_scan( + Kokkos::ThreadVectorRange(m_team, n_vector_range), + ThreadVectorFunctor{m_reducer, m_team_offset, thread_offset, + m_outputs, m_inputs}, + m_reducer); + } + + const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &m_team; + const Reducer &m_reducer; + const size_type &m_team_offset; + const view_type &m_outputs; + const view_type &m_inputs; + }; + + KOKKOS_FUNCTION void operator()( + const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type &team) + const { + const size_type iTeam = team.league_rank(); + const size_type iTeamOffset = iTeam * n_per_team; + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, n_team_thread_range), + TeamThreadRangeFunctor{team, reducer, iTeamOffset, outputs, inputs}); + } + + KOKKOS_FUNCTION void operator()(size_type i) const { inputs(i) = i * 1. / n; } + + void run() { + const int n_teams = n / n_per_team; + + Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0, n), *this); + + // run ThreadVectorRange parallel_scan + Kokkos::TeamPolicy<ExecutionSpace> policy(n_teams, Kokkos::AUTO, + Kokkos::AUTO); + const std::string label = + (scan_type == ScanType::Inclusive ? std::string("inclusive") + : std::string("exclusive")) + + "Scan" + typeid(Reducer).name(); + Kokkos::parallel_for(label, policy, *this); + Kokkos::fence(); + + auto host_outputs = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, outputs); + auto host_inputs = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, inputs); + + Kokkos::View<value_type[n], Kokkos::HostSpace> expected("expected"); + { + value_type identity; + reducer.init(identity); + for (int i = 0; i < expected.extent_int(0); ++i) { + const int vector = i % n_vector_range; + const value_type accum = vector == 0 ? identity : expected(i - 1); + const value_type val = + scan_type == ScanType::Inclusive + ? host_inputs(i) + : (vector == 0 ? identity : host_inputs(i - 1)); + expected(i) = accum; + reducer.join(expected(i), val); + } + } + for (int i = 0; i < host_outputs.extent_int(0); ++i) + ASSERT_EQ(host_outputs(i), expected(i)); + } +}; +} // namespace VectorScanReducer + #if !(defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) || defined(KOKKOS_ENABLE_HIP)) TEST(TEST_CATEGORY, team_vector) { ASSERT_TRUE((TestTeamVector::Test<TEST_EXECSPACE>(0))); @@ -887,4 +1015,33 @@ TEST(TEST_CATEGORY, triple_nested_parallelism) { } #endif +TEST(TEST_CATEGORY, parallel_scan_with_reducers) { + using T = double; + using namespace VectorScanReducer; + + static constexpr int n = 1000000; + static constexpr int n_vector_range = 100; + + checkScan<TEST_EXECSPACE, ScanType::Exclusive, n, n_vector_range, + Kokkos::Prod<T, TEST_EXECSPACE>>() + .run(); + checkScan<TEST_EXECSPACE, ScanType::Inclusive, n, n_vector_range, + Kokkos::Prod<T, TEST_EXECSPACE>>() + .run(); + + checkScan<TEST_EXECSPACE, ScanType::Exclusive, n, n_vector_range, + Kokkos::Max<T, TEST_EXECSPACE>>() + .run(); + checkScan<TEST_EXECSPACE, ScanType::Inclusive, n, n_vector_range, + Kokkos::Max<T, TEST_EXECSPACE>>() + .run(); + + checkScan<TEST_EXECSPACE, ScanType::Exclusive, n, n_vector_range, + Kokkos::Min<T, TEST_EXECSPACE>>() + .run(); + checkScan<TEST_EXECSPACE, ScanType::Inclusive, n, n_vector_range, + Kokkos::Min<T, TEST_EXECSPACE>>() + .run(); +} + } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestTeamVectorRange.hpp b/packages/kokkos/core/unit_test/TestTeamVectorRange.hpp index 1b64fef0507ac45863ce62e8d2f375a877035891..7342ebad8433526719b52058ff6d6b75e41a107a 100644 --- a/packages/kokkos/core/unit_test/TestTeamVectorRange.hpp +++ b/packages/kokkos/core/unit_test/TestTeamVectorRange.hpp @@ -169,17 +169,17 @@ struct my_complex { } KOKKOS_INLINE_FUNCTION - bool operator==(const my_complex& src) { + bool operator==(const my_complex& src) const { return (re == src.re) && (im == src.im) && (dummy == src.dummy); } KOKKOS_INLINE_FUNCTION - bool operator!=(const my_complex& src) { + bool operator!=(const my_complex& src) const { return (re != src.re) || (im != src.im) || (dummy != src.dummy); } KOKKOS_INLINE_FUNCTION - bool operator!=(const double& val) { + bool operator!=(const double& val) const { return (re != val) || (im != 0) || (dummy != 0); } @@ -244,8 +244,9 @@ struct functor_teamvector_for { shared_int values = shared_int(team.team_shmem(), shmemSize); if (values.data() == nullptr || values.extent(0) < shmemSize) { - printf("FAILED to allocate shared memory of size %u\n", - static_cast<unsigned int>(shmemSize)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED to allocate shared memory of size %u\n", + static_cast<unsigned int>(shmemSize)); } else { // Initialize shared memory. Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 131), @@ -278,9 +279,10 @@ struct functor_teamvector_for { } if (test != value) { - printf("FAILED teamvector_parallel_for %i %i %f %f\n", - team.league_rank(), team.team_rank(), - static_cast<double>(test), static_cast<double>(value)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED teamvector_parallel_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), static_cast<double>(test), + static_cast<double>(value)); flag() = 1; } }); @@ -344,17 +346,18 @@ struct functor_teamvector_reduce { if (test != value) { if (team.league_rank() == 0) { - printf("FAILED teamvector_parallel_reduce %i %i %lf %lf %lu\n", - (int)team.league_rank(), (int)team.team_rank(), - static_cast<double>(test), static_cast<double>(value), - static_cast<unsigned long>(sizeof(Scalar))); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED teamvector_parallel_reduce %i %i %lf %lf %lu\n", + (int)team.league_rank(), (int)team.team_rank(), + static_cast<double>(test), static_cast<double>(value), + static_cast<unsigned long>(sizeof(Scalar))); } flag() = 1; } if (test != shared_value(0)) { if (team.league_rank() == 0) { - printf( + KOKKOS_IMPL_DO_NOT_USE_PRINTF( "FAILED teamvector_parallel_reduce with shared result %i %i %lf " "%lf %lu\n", static_cast<int>(team.league_rank()), @@ -416,14 +419,15 @@ struct functor_teamvector_reduce_reducer { } if (test != value) { - printf("FAILED teamvector_parallel_reduce_reducer %i %i %lf %lf\n", - team.league_rank(), team.team_rank(), static_cast<double>(test), - static_cast<double>(value)); + KOKKOS_IMPL_DO_NOT_USE_PRINTF( + "FAILED teamvector_parallel_reduce_reducer %i %i %lf %lf\n", + team.league_rank(), team.team_rank(), static_cast<double>(test), + static_cast<double>(value)); flag() = 1; } if (test != shared_value(0)) { - printf( + KOKKOS_IMPL_DO_NOT_USE_PRINTF( "FAILED teamvector_parallel_reduce_reducer shared value %i %i %lf " "%lf\n", team.league_rank(), team.team_rank(), static_cast<double>(test), @@ -443,15 +447,35 @@ bool test_scalar(int nteams, int team_size, int test) { h_flag() = 0; Kokkos::deep_copy(d_flag, h_flag); + Kokkos::TeamPolicy<ExecutionSpace> policy(nteams, team_size, 8); + + // FIXME_OPENMPTARGET - Need to allocate scratch space via set_scratch_space + // for the OPENMPTARGET backend. +#ifdef KOKKOS_ENABLE_OPENMPTARGET + using scratch_t = Kokkos::View<Scalar*, ExecutionSpace, + Kokkos::MemoryTraits<Kokkos::Unmanaged> >; + + int scratch_size = 0; + if (test == 0) { + scratch_size = scratch_t::shmem_size(131); + } else { + // FIXME_OPENMPTARGET - Currently allocating more than one team for nested + // reduction leads to runtime errors of illegal memory access, caused mostly + // due to the OpenMP memory allocation constraints. + policy = Kokkos::TeamPolicy<ExecutionSpace>(1, team_size, 8); + scratch_size = scratch_t::shmem_size(1); + } + + policy.set_scratch_size(0, Kokkos::PerTeam(scratch_size)); +#endif + if (test == 0) { Kokkos::parallel_for( - "Test::TeamVectorFor", - Kokkos::TeamPolicy<ExecutionSpace>(nteams, team_size, 8), + "Test::TeamVectorFor", policy, functor_teamvector_for<Scalar, ExecutionSpace>(d_flag)); } else if (test == 1) { Kokkos::parallel_for( - "Test::TeamVectorReduce", - Kokkos::TeamPolicy<ExecutionSpace>(nteams, team_size, 8), + "Test::TeamVectorReduce", policy, functor_teamvector_reduce<Scalar, ExecutionSpace>(d_flag)); } else if (test == 2) { Kokkos::parallel_for( @@ -477,8 +501,12 @@ bool Test(int test) { test_scalar<long long int, ExecutionSpace>(317, team_size, test); passed = passed && test_scalar<float, ExecutionSpace>(317, team_size, test); passed = passed && test_scalar<double, ExecutionSpace>(317, team_size, test); + // FIXME_OPENMPTARGET - Use of custom reducers currently results in runtime + // memory errors. +#if !defined(KOKKOS_ENABLE_OPENMPTARGET) passed = passed && test_scalar<my_complex, ExecutionSpace>(317, team_size, test); +#endif return passed; } @@ -490,6 +518,10 @@ namespace Test { TEST(TEST_CATEGORY, team_teamvector_range) { ASSERT_TRUE((TestTeamVectorRange::Test<TEST_EXECSPACE>(0))); ASSERT_TRUE((TestTeamVectorRange::Test<TEST_EXECSPACE>(1))); + // FIXME_OPENMPTARGET - Use of kokkos reducers currently results in runtime + // memory errors. +#if !defined(KOKKOS_ENABLE_OPENMPTARGET) ASSERT_TRUE((TestTeamVectorRange::Test<TEST_EXECSPACE>(2))); +#endif } } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestUniqueToken.hpp b/packages/kokkos/core/unit_test/TestUniqueToken.hpp index d78c35c6815d07bbbe37d83b0798955e07f9ce4a..4ba48bf73f069c6097a079ce1bcde5fd9452155c 100644 --- a/packages/kokkos/core/unit_test/TestUniqueToken.hpp +++ b/packages/kokkos/core/unit_test/TestUniqueToken.hpp @@ -132,6 +132,8 @@ class TestUniqueToken { } } + // FIXME_SYCL wrong result on NVIDIA GPUs but correct on host and Intel GPUs +#ifndef KOKKOS_ENABLE_SYCL // Count test for pull request #3260 { constexpr int N = 1000000; @@ -148,6 +150,7 @@ class TestUniqueToken { self, sum); ASSERT_EQ(sum, int64_t(N) * R); } +#endif std::cout << "TestUniqueToken max reuse = " << max << std::endl; @@ -233,7 +236,13 @@ class TestAcquireTeamUniqueToken { { const int duplicate = 100; - const long n = duplicate * self.tokens.size(); + // FIXME_SYCL The number of workgroups on CUDA devices can not be larger + // than 65535 +#ifdef KOKKOS_ENABLE_SYCL + const long n = std::min(65535, duplicate * self.tokens.size()); +#else + const long n = duplicate * self.tokens.size(); +#endif team_policy_type team_policy(n, team_size); team_policy.set_scratch_size( @@ -271,7 +280,10 @@ class TestAcquireTeamUniqueToken { }; TEST(TEST_CATEGORY, acquire_team_unique_token) { + // FIXME_OPENMPTARGET - Not yet implemented. +#if !defined(KOKKOS_ENABLE_OPENMPTARGET) TestAcquireTeamUniqueToken<TEST_EXECSPACE>::run(); +#endif } } // namespace Test diff --git a/packages/kokkos/core/unit_test/TestUtilities.hpp b/packages/kokkos/core/unit_test/TestUtilities.hpp index c9352c0d7fa25a5dda7fc8910609b7158e251f67..1d3e19da105161e0b71c733ad2bb1232add1d8aa 100644 --- a/packages/kokkos/core/unit_test/TestUtilities.hpp +++ b/packages/kokkos/core/unit_test/TestUtilities.hpp @@ -70,307 +70,6 @@ void test_is_specialization_of() { ""); } -inline void test_utilities() { - using namespace Kokkos::Impl; - - { - using i = integer_sequence<int>; - using j = make_integer_sequence<int, 0>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 0u, "Error: integer_sequence.size()"); - } - - { - using i = integer_sequence<int, 0>; - using j = make_integer_sequence<int, 1>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 1u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1>; - using j = make_integer_sequence<int, 2>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 2u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1, 2>; - using j = make_integer_sequence<int, 3>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 3u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<2, i>::value == 2, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - static_assert(at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1, 2, 3>; - using j = make_integer_sequence<int, 4>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 4u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<2, i>::value == 2, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<3, i>::value == 3, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - static_assert(at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)"); - static_assert(at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1, 2, 3, 4>; - using j = make_integer_sequence<int, 5>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 5u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<2, i>::value == 2, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<3, i>::value == 3, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<4, i>::value == 4, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - static_assert(at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)"); - static_assert(at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)"); - static_assert(at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1, 2, 3, 4, 5>; - using j = make_integer_sequence<int, 6>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 6u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<2, i>::value == 2, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<3, i>::value == 3, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<4, i>::value == 4, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<5, i>::value == 5, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - static_assert(at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)"); - static_assert(at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)"); - static_assert(at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)"); - static_assert(at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1, 2, 3, 4, 5, 6>; - using j = make_integer_sequence<int, 7>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 7u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<2, i>::value == 2, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<3, i>::value == 3, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<4, i>::value == 4, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<5, i>::value == 5, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<6, i>::value == 6, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - static_assert(at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)"); - static_assert(at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)"); - static_assert(at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)"); - static_assert(at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)"); - static_assert(at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1, 2, 3, 4, 5, 6, 7>; - using j = make_integer_sequence<int, 8>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 8u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<2, i>::value == 2, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<3, i>::value == 3, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<4, i>::value == 4, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<5, i>::value == 5, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<6, i>::value == 6, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<7, i>::value == 7, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - static_assert(at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)"); - static_assert(at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)"); - static_assert(at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)"); - static_assert(at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)"); - static_assert(at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)"); - static_assert(at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1, 2, 3, 4, 5, 6, 7, 8>; - using j = make_integer_sequence<int, 9>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 9u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<2, i>::value == 2, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<3, i>::value == 3, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<4, i>::value == 4, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<5, i>::value == 5, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<6, i>::value == 6, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<7, i>::value == 7, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<8, i>::value == 8, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - static_assert(at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)"); - static_assert(at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)"); - static_assert(at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)"); - static_assert(at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)"); - static_assert(at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)"); - static_assert(at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)"); - static_assert(at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = integer_sequence<int, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9>; - using j = make_integer_sequence<int, 10>; - - static_assert(std::is_same<i, j>::value, "Error: make_integer_sequence"); - static_assert(i::size() == 10u, "Error: integer_sequence.size()"); - - static_assert(integer_sequence_at<0, i>::value == 0, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<1, i>::value == 1, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<2, i>::value == 2, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<3, i>::value == 3, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<4, i>::value == 4, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<5, i>::value == 5, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<6, i>::value == 6, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<7, i>::value == 7, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<8, i>::value == 8, - "Error: integer_sequence_at"); - static_assert(integer_sequence_at<9, i>::value == 9, - "Error: integer_sequence_at"); - - static_assert(at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)"); - static_assert(at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)"); - static_assert(at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)"); - static_assert(at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)"); - static_assert(at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)"); - static_assert(at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)"); - static_assert(at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)"); - static_assert(at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)"); - static_assert(at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)"); - static_assert(at(9, i{}) == 9, "Error: at(unsigned, integer_sequence)"); - } - - { - using i = make_integer_sequence<int, 5>; - using r = reverse_integer_sequence<i>; - using gr = integer_sequence<int, 4, 3, 2, 1, 0>; - - static_assert(std::is_same<r, gr>::value, - "Error: reverse_integer_sequence"); - } - - { - using s = make_integer_sequence<int, 10>; - using e = exclusive_scan_integer_sequence<s>; - using i = inclusive_scan_integer_sequence<s>; - - using ge = integer_sequence<int, 0, 0, 1, 3, 6, 10, 15, 21, 28, 36>; - using gi = integer_sequence<int, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45>; - - static_assert(e::value == 45, "Error: scan value"); - static_assert(i::value == 45, "Error: scan value"); - - static_assert(std::is_same<e::type, ge>::value, "Error: exclusive_scan"); - static_assert(std::is_same<i::type, gi>::value, "Error: inclusive_scan"); - } -} - template <std::size_t... Idxs, class... Args> std::size_t do_comma_emulation_test(std::integer_sequence<std::size_t, Idxs...>, Args... args) { diff --git a/packages/kokkos/core/unit_test/TestViewAPI.hpp b/packages/kokkos/core/unit_test/TestViewAPI.hpp index e85942a8de54a8068937c6ba9280a8c045e2902e..570281f9fd66a230e69b9bb924a84a0078e12168 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI.hpp @@ -1145,8 +1145,6 @@ class TestViewAPI { // T v2 = hx( 0, 0 ); // Generates compile error as intended. // hx( 0, 0 ) = v2; // Generates compile error as intended. - // FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL // Testing with asynchronous deep copy with respect to device { size_t count = 0; @@ -1249,7 +1247,6 @@ class TestViewAPI { ASSERT_EQ(hx(ip, i1, i2, i3), T(0)); } } -#endif dz = dx; ASSERT_EQ(dx, dz); @@ -1481,12 +1478,6 @@ class TestViewAPI { if (std::is_same<typename dView1::memory_space, Kokkos::Experimental::OpenMPTargetSpace>::value) return; -#endif -// FIXME_SYCL -#ifdef KOKKOS_ENABLE_SYCL - if (std::is_same<typename dView1::memory_space, - Kokkos::Experimental::SYCLDeviceUSMSpace>::value) - return; #endif auto alloc_size = std::numeric_limits<size_t>::max() - 42; try { @@ -1504,10 +1495,21 @@ class TestViewAPI { // quickly. if (msg.find("is not a valid size") != std::string::npos) { ASSERT_PRED_FORMAT2(::testing::IsSubstring, "is not a valid size", msg); - } else { - // Otherwise, there has to be some sort of "insufficient memory" error + } else +#ifdef KOKKOS_ENABLE_SYCL + if (msg.find("insufficient memory") != std::string::npos) +#endif + { ASSERT_PRED_FORMAT2(::testing::IsSubstring, "insufficient memory", msg); } + // SYCL cannot tell the reason why a memory allocation failed +#ifdef KOKKOS_ENABLE_SYCL + else { + // Otherwise, there has to be some sort of "unknown error" error + ASSERT_PRED_FORMAT2(::testing::IsSubstring, + "because of an unknown error.", msg); + } +#endif } } }; diff --git a/packages/kokkos/core/unit_test/TestViewAPI_c.hpp b/packages/kokkos/core/unit_test/TestViewAPI_c.hpp index 0cfe1b8c0eb5190fbcbae44777dd398319beb900..a70792dc623b63bb8aa1a84fec93ca413ffa94a1 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_c.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_c.hpp @@ -47,10 +47,7 @@ namespace Test { TEST(TEST_CATEGORY, view_api_c) { - // FIXME_SYCL requires deep_copy on the default memory space -#ifndef KOKKOS_ENABLE_SYCL TestViewAPI<double, TEST_EXECSPACE>::run_test_deep_copy_empty(); -#endif TestViewAPI<double, TEST_EXECSPACE>::run_test_view_operator_b(); } diff --git a/packages/kokkos/core/unit_test/TestViewAPI_e.hpp b/packages/kokkos/core/unit_test/TestViewAPI_e.hpp index cb586c76a700e1acad69c185e13f496c61a9f6c2..a5dc6cf29a467bd576bd96bca52f90b3db26324b 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_e.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_e.hpp @@ -98,8 +98,6 @@ TEST(TEST_CATEGORY, view_remap) { Kokkos::fence(); // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. - // FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL Kokkos::deep_copy(output, input); Kokkos::fence(); @@ -112,7 +110,6 @@ TEST(TEST_CATEGORY, view_remap) { ++value; ASSERT_EQ(value, ((int)output(i0, i1, i2, i3))); } -#endif } TEST(TEST_CATEGORY, view_mirror_nonconst) { diff --git a/packages/kokkos/core/unit_test/TestViewCopy_a.hpp b/packages/kokkos/core/unit_test/TestViewCopy_a.hpp index f0b5b8ff9f9ce750fe2476037c67de1a3541578f..e25cb9e39ca6fd4c3cd45ef2b60b404ed82c03e7 100644 --- a/packages/kokkos/core/unit_test/TestViewCopy_a.hpp +++ b/packages/kokkos/core/unit_test/TestViewCopy_a.hpp @@ -104,8 +104,6 @@ TEST(TEST_CATEGORY, view_copy_tests) { typename TEST_EXECSPACE::memory_space>::accessible; // Contiguous copies - // FIXME_SYCL requires MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL { Kokkos::deep_copy(defaulted, defaulted); } { Kokkos::deep_copy(a, 1); @@ -151,7 +149,6 @@ TEST(TEST_CATEGORY, view_copy_tests) { Kokkos::deep_copy(b, h_b); ASSERT_TRUE(run_check(b, 4)); } -#endif // Non contiguous copies { Kokkos::deep_copy(s_a, 5); @@ -180,8 +177,6 @@ TEST(TEST_CATEGORY, view_copy_tests) { } } - // FIXME_SYCL requires MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL // Contiguous copies { Kokkos::deep_copy(dev, defaulted, defaulted); } { @@ -228,9 +223,6 @@ TEST(TEST_CATEGORY, view_copy_tests) { Kokkos::deep_copy(dev, b, h_b); ASSERT_TRUE(run_check(b, 4)); } -#endif - - // WORKS if commenting out below stuff // Non contiguous copies { Kokkos::deep_copy(dev, s_a, 5); @@ -259,8 +251,6 @@ TEST(TEST_CATEGORY, view_copy_tests) { } } - // FIXME_SYCL requires MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL // Contiguous copies { Kokkos::deep_copy(host, defaulted, defaulted); } { @@ -307,7 +297,6 @@ TEST(TEST_CATEGORY, view_copy_tests) { Kokkos::deep_copy(host, b, h_b); ASSERT_TRUE(run_check(b, 4)); } -#endif // Non contiguous copies { Kokkos::deep_copy(host, s_a, 5); diff --git a/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp index 31108af38e0c8ceed163cee5db77220fdbe855f5..9ce3a34236956572b5a63c38765c05564a536140 100644 --- a/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp +++ b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp @@ -95,8 +95,6 @@ TEST(TEST_CATEGORY, view_layoutstride_left_to_layoutleft_assignment) { ASSERT_EQ(dst.span(), src.span()); ASSERT_EQ(test, true); } - // FIXME_SYCL requires MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL { // Assignment of rank-2 LayoutLeft = LayoutStride int ndims = 2; int dims[] = {10, 9}; @@ -335,7 +333,6 @@ TEST(TEST_CATEGORY, view_layoutstride_left_to_layoutleft_assignment) { ASSERT_EQ(dst.span(), src.span()); ASSERT_EQ(test, true); } -#endif } TEST(TEST_CATEGORY, view_layoutstride_right_to_layoutright_assignment) { @@ -380,8 +377,6 @@ TEST(TEST_CATEGORY, view_layoutstride_right_to_layoutright_assignment) { ASSERT_EQ(dst.span(), src.span()); ASSERT_EQ(test, true); } - // FIXME_SYCL requires MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL { // Assignment of rank-2 LayoutRight = LayoutStride int ndims = 2; int dims[] = {10, 9}; @@ -620,7 +615,6 @@ TEST(TEST_CATEGORY, view_layoutstride_right_to_layoutright_assignment) { ASSERT_EQ(dst.span(), src.span()); ASSERT_EQ(test, true); } -#endif } TEST(TEST_CATEGORY_DEATH, view_layoutstride_right_to_layoutleft_assignment) { @@ -667,9 +661,8 @@ TEST(TEST_CATEGORY_DEATH, view_layoutstride_right_to_layoutleft_assignment) { ASSERT_EQ(dst.span(), src.span()); ASSERT_EQ(test, true); } -// FIXME_SYCL deadlocks // WORKAROUND OPENMPTARGET : death tests don't seem to work ... -#if defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_SYCL) +#if defined(KOKKOS_ENABLE_OPENMPTARGET) return; #endif { // Assignment of rank-2 LayoutLeft = LayoutStride (LayoutRight compatible) @@ -823,9 +816,8 @@ TEST(TEST_CATEGORY_DEATH, view_layoutstride_left_to_layoutright_assignment) { ASSERT_EQ(dst.span(), src.span()); ASSERT_EQ(test, true); } -// FIXME_SYCL deadlocks // WORKAROUND OPENMPTARGET : death tests don't seem to work ... -#if defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_SYCL) +#if defined(KOKKOS_ENABLE_OPENMPTARGET) return; #endif { // Assignment of rank-2 LayoutRight = LayoutStride (LayoutLeft compatible) diff --git a/packages/kokkos/core/unit_test/TestViewMapping_a.hpp b/packages/kokkos/core/unit_test/TestViewMapping_a.hpp index 15d2976499d4533ce632bfefb654895fc0c7c9eb..fdbda099176c79410c1be6599546f09aba3269dc 100644 --- a/packages/kokkos/core/unit_test/TestViewMapping_a.hpp +++ b/packages/kokkos/core/unit_test/TestViewMapping_a.hpp @@ -905,8 +905,6 @@ void test_view_mapping() { Kokkos::Impl::ViewCtorProp<int*>(nullptr), stride); } - // FIXME_SYCL requires MDRangePolicy -#ifndef KOKKOS_ENABLE_SYCL { using V = Kokkos::View<int**, Space>; using M = typename V::HostMirror; @@ -1033,7 +1031,6 @@ void test_view_mapping() { ASSERT_EQ(d.extent(0), 7); ASSERT_EQ(d.extent(1), 8); } -#endif { using V = Kokkos::View<int*, Space>; diff --git a/packages/kokkos/core/unit_test/TestViewSubview.hpp b/packages/kokkos/core/unit_test/TestViewSubview.hpp index b28f09934d598881e35748ef86a60771cfb42e81..0125017d93786101e2a23a866effe9d8a5e5242d 100644 --- a/packages/kokkos/core/unit_test/TestViewSubview.hpp +++ b/packages/kokkos/core/unit_test/TestViewSubview.hpp @@ -2036,8 +2036,6 @@ template <class Space, class MemTraits = void> void test_layoutleft_to_layoutleft() { Impl::test_subview_legal_args_left(); - // FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL using view3D_t = Kokkos::View<int***, Kokkos::LayoutLeft, Space>; using view4D_t = Kokkos::View<int****, Kokkos::LayoutLeft, Space>; { @@ -2075,15 +2073,12 @@ void test_layoutleft_to_layoutleft() { 1); check.run(); } -#endif } template <class Space, class MemTraits = void> void test_layoutright_to_layoutright() { Impl::test_subview_legal_args_right(); - // FIXME_SYCL requires MDRange policy -#ifndef KOKKOS_ENABLE_SYCL using view3D_t = Kokkos::View<int***, Kokkos::LayoutRight, Space>; using view4D_t = Kokkos::View<int****, Kokkos::LayoutRight, Space>; { @@ -2107,7 +2102,6 @@ void test_layoutright_to_layoutright() { 0); check.run(); } -#endif } //---------------------------------------------------------------------------- @@ -2139,6 +2133,51 @@ void test_unmanaged_subview_reset() { //---------------------------------------------------------------------------- +template <std::underlying_type_t<Kokkos::MemoryTraitsFlags> MTF> +struct TestSubviewMemoryTraitsConstruction { + void operator()() const noexcept { + using view_type = Kokkos::View<double*, Kokkos::HostSpace>; + using size_type = view_type::size_type; + using memory_traits_type = Kokkos::MemoryTraits<MTF>; + + view_type v("v", 7); + for (size_type i = 0; i != v.size(); ++i) v[i] = static_cast<double>(i); + + std::pair<int, int> range(3, 5); + auto sv = Kokkos::subview<memory_traits_type>(v, range); + + ASSERT_EQ(2u, sv.size()); + EXPECT_EQ(3., sv[0]); + EXPECT_EQ(4., sv[1]); + } +}; + +inline void test_subview_memory_traits_construction() { + // Test all combinations of MemoryTraits: + // Unmanaged (1) + // RandomAccess (2) + // Atomic (4) + // Restricted (8) + TestSubviewMemoryTraitsConstruction<0>()(); + TestSubviewMemoryTraitsConstruction<1>()(); + TestSubviewMemoryTraitsConstruction<2>()(); + TestSubviewMemoryTraitsConstruction<3>()(); + TestSubviewMemoryTraitsConstruction<4>()(); + TestSubviewMemoryTraitsConstruction<5>()(); + TestSubviewMemoryTraitsConstruction<6>()(); + TestSubviewMemoryTraitsConstruction<7>()(); + TestSubviewMemoryTraitsConstruction<8>()(); + TestSubviewMemoryTraitsConstruction<9>()(); + TestSubviewMemoryTraitsConstruction<10>()(); + TestSubviewMemoryTraitsConstruction<11>()(); + TestSubviewMemoryTraitsConstruction<12>()(); + TestSubviewMemoryTraitsConstruction<13>()(); + TestSubviewMemoryTraitsConstruction<14>()(); + TestSubviewMemoryTraitsConstruction<15>()(); +} + +//---------------------------------------------------------------------------- + template <class T> struct get_view_type; diff --git a/packages/kokkos/core/unit_test/TestView_64bit.hpp b/packages/kokkos/core/unit_test/TestView_64bit.hpp index 7dc47ccb0f299133a30f042626c2f45151de19ee..50626718b5774ddefa03a453402564986e831ed1 100644 --- a/packages/kokkos/core/unit_test/TestView_64bit.hpp +++ b/packages/kokkos/core/unit_test/TestView_64bit.hpp @@ -49,7 +49,12 @@ namespace Test { template <class Device> void test_64bit() { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) - int64_t N = 5000000000; + // FIXME_SYCL The SYCL CUDA backend throws an error +#ifdef KOKKOS_ENABLE_SYCL + int64_t N = 1000000000; +#else + int64_t N = 5000000000; +#endif int64_t sum = 0; { Kokkos::parallel_reduce( diff --git a/packages/kokkos/core/unit_test/Test_InterOp_Streams.hpp b/packages/kokkos/core/unit_test/Test_InterOp_Streams.hpp index 4c16147a360c0f6797cd4d7c3b3b541a75470b03..6af731b9fa3e037598123add65071c1efa341187 100644 --- a/packages/kokkos/core/unit_test/Test_InterOp_Streams.hpp +++ b/packages/kokkos/core/unit_test/Test_InterOp_Streams.hpp @@ -46,12 +46,14 @@ namespace Test { +#ifndef KOKKOS_ENABLE_SYCL __global__ void offset_streams(int* p) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < 100) { p[idx] += idx; } } +#endif template <typename MemorySpace> struct FunctorRange { diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestCudaHostPinned_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestCudaHostPinned_Category.hpp diff --git a/packages/kokkos/core/unit_test/TestCudaUVM_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestCudaUVM_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/TestCudaUVM_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestCudaUVM_Category.hpp diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestCuda_Category.hpp similarity index 98% rename from packages/kokkos/core/unit_test/cuda/TestCuda_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestCuda_Category.hpp index 6831200df9068a3f93ca54bdd7977d3a621d8b0c..22666dc82fab611ee08aa7555e9b56ae0b2f148a 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestCuda_Category.hpp @@ -48,6 +48,7 @@ #include <gtest/gtest.h> #define TEST_CATEGORY cuda +#define TEST_CATEGORY_NUMBER 5 #define TEST_CATEGORY_DEATH cuda_DeathTest #define TEST_EXECSPACE Kokkos::Cuda #define TEST_CATEGORY_FIXTURE(name) cuda_##name diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestDefaultDeviceType_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/default/TestDefaultDeviceType_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestDefaultDeviceType_Category.hpp diff --git a/packages/kokkos/core/unit_test/TestHIPHostPinned_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestHIPHostPinned_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/TestHIPHostPinned_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestHIPHostPinned_Category.hpp diff --git a/packages/kokkos/core/unit_test/TestHIP_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestHIP_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/TestHIP_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestHIP_Category.hpp diff --git a/packages/kokkos/core/unit_test/TestHPX_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestHPX_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/TestHPX_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestHPX_Category.hpp diff --git a/packages/kokkos/core/unit_test/TestOpenMPTarget_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/TestOpenMPTarget_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp diff --git a/packages/kokkos/core/unit_test/TestOpenMP_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestOpenMP_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/TestOpenMP_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestOpenMP_Category.hpp diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestSYCLSharedUSMSpace_Category.hpp similarity index 91% rename from packages/kokkos/core/unit_test/hip/TestHIP_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestSYCLSharedUSMSpace_Category.hpp index 8cae165c3374330148023ac58d832b317692a752..1ec89fc61a594989f58b5076af6477be051183e8 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestSYCLSharedUSMSpace_Category.hpp @@ -42,12 +42,12 @@ //@HEADER */ -#ifndef KOKKOS_TEST_HIP_HPP -#define KOKKOS_TEST_HIP_HPP +#ifndef KOKKOS_TEST_SYCL_SHARED_USM_SPACE_HPP +#define KOKKOS_TEST_SYCL_SHARED_USM_SPACE_HPP #include <gtest/gtest.h> -#define TEST_CATEGORY hip -#define TEST_EXECSPACE Kokkos::Experimental::HIP +#define TEST_CATEGORY sycl_shared_usm +#define TEST_EXECSPACE Kokkos::Experimental::SYCLSharedUSMSpace #endif diff --git a/packages/kokkos/core/unit_test/TestSYCL_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp similarity index 98% rename from packages/kokkos/core/unit_test/TestSYCL_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp index cd4c0ed22aac43d594df917de084bac9869bf737..345f40d1c39f403dd62369c8cfa668ed1c75a951 100644 --- a/packages/kokkos/core/unit_test/TestSYCL_Category.hpp +++ b/packages/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp @@ -48,6 +48,7 @@ #include <gtest/gtest.h> #define TEST_CATEGORY sycl +#define TEST_CATEGORY_NUMBER 7 #define TEST_EXECSPACE Kokkos::Experimental::SYCL #endif diff --git a/packages/kokkos/core/unit_test/TestSerial_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestSerial_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/TestSerial_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestSerial_Category.hpp diff --git a/packages/kokkos/core/unit_test/TestThreads_Category.hpp b/packages/kokkos/core/unit_test/category_files/TestThreads_Category.hpp similarity index 100% rename from packages/kokkos/core/unit_test/TestThreads_Category.hpp rename to packages/kokkos/core/unit_test/category_files/TestThreads_Category.hpp diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_SharedAlloc.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_SharedAlloc.cpp index cbc8894203b8d0bfde1783fd19b0cd153a7be33c..4228b5181a0ccd68dfde87f71f92fd0a471a8e96 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_SharedAlloc.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_SharedAlloc.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestSharedAlloc.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_a.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_a.cpp index eeb9f3fa3a44ac2f992f95477098fb938cbaea1b..316a2b5d0fe0dba2c9b74f3f6f7a6d61342d2c4c 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_a.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewAPI_a.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_b.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_b.cpp index 04949cf5739e9ebff78acf5ed025adb8eaea1679..5eed2ca0d77b828b2431bfce0fe69c4da457bb95 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_b.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewAPI_b.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_c.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_c.cpp index bf259ef734130780ab84b874fec1ea927582559d..26dc9b0e000096ab1809412c4a29fc563844cbd1 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_c.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_c.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewAPI_c.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_d.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_d.cpp index 84d81e3b41fc843522ef4f5a1969d9a4ae7b5131..bab29610a3d4ad2e812405ba96ed06c7e2dfb3b8 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_d.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_d.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewAPI_d.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_e.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_e.cpp index db9e990ae5a6738cbcfb100670709ee7ef65254b..fd227186d5668239b9d9fe3f6a1ae2b3d5510b32 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_e.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI_e.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewAPI_e.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy_a.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy_a.cpp index 8f5fd4e3e89f257abfb2190934bdd46da1602646..669761df979cfd1458f1d5ea78acfb5738af0d38 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy_a.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewCopy_a.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy_b.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy_b.cpp index 8d288cf71538d8f9cefe9f81f11ccf329de22519..d367fd7e051f49495ce747f6f490bad795f94d86 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy_b.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewCopy_b.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_a.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_a.cpp index 923f4df965c597d87b3f92abfa26b69dcd9a7f8b..01b284b2f562299b4f23cc197693c2baad40f38e 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_a.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewMapping_a.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_b.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_b.cpp index 1efe65b21db58392bad6c7ef36b3808c763d3f81..e15228b1d772a5dba97ee434e17fdb18188a709a 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_b.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewMapping_b.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_subview.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_subview.cpp index 9e1034c5b72b80592d647aae17fd35dd77faf8ba..52bbd42f292f4b865def36856913dfc6bbe0028f 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_subview.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_subview.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestCudaHostPinned_Category.hpp> #include <TestViewMapping_subview.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_SharedAlloc.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_SharedAlloc.cpp index 5024b30f90cb559d5278e7e217ace4d84da928ef..6602d7396a7c2fdec7e16e83079764962dbeab75 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_SharedAlloc.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_SharedAlloc.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestSharedAlloc.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_a.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_a.cpp index 3a48b2495e716bfd4b92bbf9a76ce91be399b4ea..4aeac8f13f4d28672c671a51c1eacfedbf0e92fd 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_a.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewAPI_a.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_b.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_b.cpp index 7f0effb5e8d8a1538f656798a956f6edf967d5bd..e5cb0103424fd022290998307f086aedaea0cb29 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_b.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewAPI_b.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_c.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_c.cpp index 9d8cbf0c773f8c5ff293178cf7f9b3fc092a6fe1..a52fcb833ed2a0e959a25e36195460c1ed914a78 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_c.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_c.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewAPI_c.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_d.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_d.cpp index 8d2b5268bf5394e76e0ea8db7e0bce9b04fbb383..e345cd9667526671ef898a0d1247343b47f6296c 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_d.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_d.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewAPI_d.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_e.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_e.cpp index 5c11b2a322ca10a0251476babb82a7a1207253e4..61547df4f523969f8c93da8315fddb4467e5ade9 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_e.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI_e.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewAPI_e.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy_a.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy_a.cpp index 3344c71136195386679c151aab13fb53715172f4..75a769bb947485e6e7459c1cb95b7b3b1c26f9b1 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy_a.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewCopy_a.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy_b.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy_b.cpp index 1234a4b649474ea13ce756e5a49e7bd8dc47c722..7d09f5c9f397b3723599aec64c3c50a6aa77a769 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy_b.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewCopy_b.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_a.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_a.cpp index aa9b513d7aab8033dfbe8595f8ae3ebe8c9ad2dc..ea03f43bd69a318095e6277f4db226241fc9a482 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_a.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewMapping_a.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_b.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_b.cpp index f24e9ba4f1f0432eace053ca72b5ecf577c2d231..1f754e8f4996cbc3c0fbefd7000bff65451b19f0 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_b.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewMapping_b.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_subview.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_subview.cpp index 47b7d1f46cbab75ae8a6215fd72094472e7240c5..4af7057d2aa47db99a8325159e0ee737feff7767 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_subview.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_subview.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <cuda/TestCudaUVM_Category.hpp> +#include <TestCudaUVM_Category.hpp> #include <TestViewMapping_subview.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_DebugPinUVMSpace.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_DebugPinUVMSpace.cpp index 4313cece0ca18ef120966230666eb16c847dc986..5b6fccdbd0a500cbb0d45574879a797c866d1b55 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_DebugPinUVMSpace.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_DebugPinUVMSpace.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_DebugSerialExecution.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_DebugSerialExecution.cpp index 5472ef9fd169a5a43347e98519c8ab32e04ca5eb..f1d3dfc5245d971b6b90ca3ef11731e34b538f67 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_DebugSerialExecution.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_DebugSerialExecution.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp index 38f4336a86c499723c21a985f7a8445a81df74d1..77b1e58a1586482b029f89298c7273cfccc95a7d 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> #include <TestGraph.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Init.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Init.cpp index 6073b9cd7eff97cf67122086031ff95c00304a8c..ee7181e1180fdb887a87190605565e42e897409c 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Init.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Init.cpp @@ -43,7 +43,9 @@ */ #include <Kokkos_Core.hpp> -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> + +#include <array> namespace Test { @@ -58,7 +60,7 @@ __global__ void offset(int* p) { // Cuda. TEST(cuda, raw_cuda_interop) { int* p; - cudaMalloc(&p, sizeof(int) * 100); + CUDA_SAFE_CALL(cudaMalloc(&p, sizeof(int) * 100)); Kokkos::InitArguments arguments{-1, -1, -1, false}; Kokkos::initialize(arguments); @@ -70,8 +72,8 @@ TEST(cuda, raw_cuda_interop) { offset<<<100, 64>>>(p); CUDA_SAFE_CALL(cudaDeviceSynchronize()); - int* h_p = new int[100]; - cudaMemcpy(h_p, p, sizeof(int) * 100, cudaMemcpyDefault); + std::array<int, 100> h_p; + cudaMemcpy(h_p.data(), p, sizeof(int) * 100, cudaMemcpyDefault); CUDA_SAFE_CALL(cudaDeviceSynchronize()); int64_t sum = 0; int64_t sum_expect = 0; @@ -81,5 +83,6 @@ TEST(cuda, raw_cuda_interop) { } ASSERT_EQ(sum, sum_expect); + CUDA_SAFE_CALL(cudaFree(p)); } } // namespace Test diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp index 57c0e454d32ee89f03484ba95f46f7bcc1b5d190..526b985c00f2eec2eab6cafb8e862eff5024d575 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp @@ -42,7 +42,7 @@ //@HEADER */ -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> #include <Test_InterOp_Streams.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp index d68ffb0865507c356247a8b3a5fcad6f9090b019..646b37908654d2af6327158cb49f7d4257e8f8bf 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Task.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Task.cpp index e2591c9b127518b21e96be95a02297fa5696294a..42fa615bc6f65f0661ceaad12c3613781a133a52 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_Task.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Task.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> #include <TestTaskScheduler.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratchStreams.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratchStreams.cpp index 93225377d341721ba6d700cc741a4bb280822af0..eb9077aaf423b2bf9bdfa919d4d45cd18805d069 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratchStreams.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratchStreams.cpp @@ -42,7 +42,7 @@ //@HEADER */ -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> #include <Kokkos_Core.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceDevelop.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceDevelop.cpp index a80aded124fac55b6324697e7d043cb5b085c35e..b312f42b24369a725a44bdd1de1a2771e794959f 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceDevelop.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceDevelop.cpp @@ -47,7 +47,7 @@ #include <Kokkos_Core.hpp> -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType.cpp index c4320a137e5fc7651aab44cc4fe73d8e171f8561..5dcbe566e299c0f013843216b0854dc51582dd6d 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType.cpp @@ -45,7 +45,7 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestHalfConversion.hpp> #include <TestHalfOperators.hpp> diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeResize.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeResize.cpp index bc048596a07fed0e0c8c9fb9e8a30fd231228c19..7f53034557dca1c06bcbc6588ff7fdce6ddbb4c4 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeResize.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceTypeResize.cpp @@ -45,8 +45,6 @@ #include <gtest/gtest.h> #include "TestResize.hpp" -// FIXME_SYCL requires parallel_for -#ifndef KOKKOS_ENABLE_SYCL namespace Test { TEST(kokkosresize, host_space_access) { @@ -57,4 +55,3 @@ TEST(kokkosresize, host_space_access) { } } // namespace Test -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a1.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a1.cpp index 248b074fd502a18be61e606180ba3df42388c130..9b57de712183a7ff1fd72533f578c25947901f39 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a1.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a1.cpp @@ -48,16 +48,9 @@ #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> -// FIXME_SYCL -// C++ exception with description "Global_work_size not evenly divisible by -// local_work_size. Non-uniform work-groups are not allowed by default. -// Underlying OpenCL 2.x implementation supports this feature and to enable it, -// build device program with -cl-std=CL2.0 -54 (CL_INVALID_WORK_GROUP_SIZE)" -// thrown in the test body. -#ifndef KOKKOS_ENABLE_SYCL namespace Test { TEST(defaultdevicetype, reduce_instantiation_a1) { @@ -66,4 +59,3 @@ TEST(defaultdevicetype, reduce_instantiation_a1) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a2.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a2.cpp index 663371a765a1b8d9f9cbc29a462c5bbb9e6038dc..314891433693df21689fedb2827dc8d614896383 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a2.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a2.cpp @@ -48,16 +48,9 @@ #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> -// FIXME_SYCL -// C++ exception with description "Global_work_size not evenly divisible by -// local_work_size. Non-uniform work-groups are not allowed by default. -// Underlying OpenCL 2.x implementation supports this feature and to enable it, -// build device program with -cl-std=CL2.0 -54 (CL_INVALID_WORK_GROUP_SIZE)" -// thrown in the test body. -#ifndef KOKKOS_ENABLE_SYCL namespace Test { TEST(defaultdevicetype, reduce_instantiation_a2) { @@ -67,4 +60,3 @@ TEST(defaultdevicetype, reduce_instantiation_a2) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a3.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a3.cpp index 948d2586bd6043b7ae17bb51c6ef1467e3f55d2c..f9e36e298a227281683ceae0bf5bfe9179a6b8d5 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a3.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_a3.cpp @@ -46,11 +46,9 @@ #include <Kokkos_Core.hpp> -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> namespace Test { @@ -62,4 +60,3 @@ TEST(defaultdevicetype, reduce_instantiation_a3) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b1.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b1.cpp index 449c8cd5622eb1fb55f1bf230c9dbdedbd028561..1a34bef874f75ecf4a75a99166f7abc66ec1804c 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b1.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b1.cpp @@ -48,16 +48,9 @@ #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> -// FIXME_SYCL -// C++ exception with description "Global_work_size not evenly divisible by -// local_work_size. Non-uniform work-groups are not allowed by default. -// Underlying OpenCL 2.x implementation supports this feature and to enable it, -// build device program with -cl-std=CL2.0 -54 (CL_INVALID_WORK_GROUP_SIZE)" -// thrown in the test body. -#ifndef KOKKOS_ENABLE_SYCL namespace Test { TEST(defaultdevicetype, reduce_instantiation_b1) { @@ -66,4 +59,3 @@ TEST(defaultdevicetype, reduce_instantiation_b1) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b2.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b2.cpp index 551bd6c20739bfd6bcd536506fab2bd8f214dc5d..8bd7628243ab0e3c073d797cc2817ec2a4ba1185 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b2.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b2.cpp @@ -48,16 +48,9 @@ #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> -// FIXME_SYCL -// C++ exception with description "Global_work_size not evenly divisible by -// local_work_size. Non-uniform work-groups are not allowed by default. -// Underlying OpenCL 2.x implementation supports this feature and to enable it, -// build device program with -cl-std=CL2.0 -54 (CL_INVALID_WORK_GROUP_SIZE)" -// thrown in the test body. -#ifndef KOKKOS_ENABLE_SYCL namespace Test { TEST(defaultdevicetype, reduce_instantiation_b2) { @@ -67,4 +60,3 @@ TEST(defaultdevicetype, reduce_instantiation_b2) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b3.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b3.cpp index e1c5b16fee2e28a32e3404eab029fd4a46ff7ed9..bc1d763437d1f4d23ae688fe94c16fcd7f9367f9 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b3.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_b3.cpp @@ -46,11 +46,9 @@ #include <Kokkos_Core.hpp> -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> namespace Test { @@ -62,4 +60,3 @@ TEST(defaultdevicetype, reduce_instantiation_b3) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c1.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c1.cpp index 6e7587781f023d282bfdef4d769ecd7867d3beb8..ba4cca46fbb9d0883691a40ee53b8a11c739b4c5 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c1.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c1.cpp @@ -48,16 +48,9 @@ #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> -// FIXME_SYCL -// C++ exception with description "Global_work_size not evenly divisible by -// local_work_size. Non-uniform work-groups are not allowed by default. -// Underlying OpenCL 2.x implementation supports this feature and to enable it, -// build device program with -cl-std=CL2.0 -54 (CL_INVALID_WORK_GROUP_SIZE)" -// thrown in the test body. -#ifndef KOKKOS_ENABLE_SYCL namespace Test { TEST(defaultdevicetype, reduce_instantiation_c1) { @@ -66,4 +59,3 @@ TEST(defaultdevicetype, reduce_instantiation_c1) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c2.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c2.cpp index ae4f1082a2ab27eebe04afd405843dc770eec152..0459f98dddb20e7cae811502e4ebf5518b011c6b 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c2.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c2.cpp @@ -48,16 +48,9 @@ #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> -// FIXME_SYCL -// C++ exception with description "Global_work_size not evenly divisible by -// local_work_size. Non-uniform work-groups are not allowed by default. -// Underlying OpenCL 2.x implementation supports this feature and to enable it, -// build device program with -cl-std=CL2.0 -54 (CL_INVALID_WORK_GROUP_SIZE)" -// thrown in the test body. -#ifndef KOKKOS_ENABLE_SYCL namespace Test { TEST(defaultdevicetype, reduce_instantiation_c2) { @@ -67,4 +60,3 @@ TEST(defaultdevicetype, reduce_instantiation_c2) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c3.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c3.cpp index 92a956bee7f39046851d4752f580fe56295d50df..801dee83bbe16b6b25398b27068e5d8a3b3d29e2 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c3.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_c3.cpp @@ -46,11 +46,9 @@ #include <Kokkos_Core.hpp> -// FIXME_SYCL requires TeamPolicy -#ifndef KOKKOS_ENABLE_SYCL #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestReduceCombinatorical.hpp> namespace Test { @@ -62,4 +60,3 @@ TEST(defaultdevicetype, reduce_instantiation_c3) { } // namespace Test #endif -#endif diff --git a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_d.cpp b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_d.cpp index ff87b7802cfdababbd3ce6994f8f794d44bb4d24..bcd49e69bd3af022ede0ca0a188066288c9b1d35 100644 --- a/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_d.cpp +++ b/packages/kokkos/core/unit_test/default/TestDefaultDeviceType_d.cpp @@ -48,13 +48,11 @@ #if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestDefaultDeviceType_Category.hpp> #include <TestUtilities.hpp> namespace Test { -TEST(defaultdevicetype, test_utilities) { test_utilities(); } - TEST(defaultdevicetype, malloc) { int* data = (int*)Kokkos::kokkos_malloc(100 * sizeof(int)); ASSERT_NO_THROW(data = (int*)Kokkos::kokkos_realloc(data, 120 * sizeof(int))); diff --git a/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt b/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt index 485dd4d112271bc1897ab96f31c291dde1f4f96c..20b295650a610a601d73e88b2b116e5dda34c324 100644 --- a/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt +++ b/packages/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt @@ -1,25 +1,19 @@ # Create tests that contain each header separately. We do not run these tests # but we just try to compile them. -if(NOT KOKKOS_HAS_TRILINOS) - # Globbing all the header filenames to test for self-containment and presence of header guards - KOKKOS_OPTION(DEFAULT_DEVICE_MEMORY_SPACE "" STRING "Override default device memory space") - KOKKOS_OPTION(DEFAULT_HOST_MEMORY_SPACE "" STRING "Override default host memory space") - KOKKOS_OPTION(DEFAULT_DEVICE_EXECUTION_SPACE "" STRING "Override default device execution space") - KOKKOS_OPTION(DEFAULT_HOST_PARALLEL_EXECUTION_SPACE "" STRING "Override default host parallel execution space") - # Globbing all the header filenames to test for self-containment and presence of header guards - SET(BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../") - file(GLOB KOKKOS_CORE_HEADERS RELATIVE ${BASE_DIR}/core/src - ${BASE_DIR}/core/src/*.hpp ${BASE_DIR}/core/src/*.h) - file(GLOB KOKKOS_CONTAINERS_HEADERS RELATIVE ${BASE_DIR}/containers/src - ${BASE_DIR}/containers/src/*.hpp) - file(GLOB KOKKOS_ALGORITHMS_HEADERS RELATIVE ${BASE_DIR}/algorithms/src - ${BASE_DIR}/algorithms/src/*.hpp) - foreach (_header ${KOKKOS_CORE_HEADERS} ${KOKKOS_CONTAINERS_HEADERS} ${KOKKOS_ALGORITHMS_HEADERS}) - string(REGEX REPLACE "[\./]" "_" header_test_name ${_header}) - set(header_test_name Kokkos_HeaderSelfContained_${header_test_name}) - add_executable(${header_test_name} tstHeader.cpp) - target_link_libraries(${header_test_name} PRIVATE Kokkos::kokkos) - target_compile_definitions(${header_test_name} PRIVATE KOKKOS_HEADER_TEST_NAME=${_header}) - endforeach() -endif() +# Globbing all the header filenames to test for self-containment and presence of header guards +SET(BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../") +file(GLOB KOKKOS_CORE_HEADERS RELATIVE ${BASE_DIR}/core/src + ${BASE_DIR}/core/src/*.hpp ${BASE_DIR}/core/src/*.h) +file(GLOB KOKKOS_CONTAINERS_HEADERS RELATIVE ${BASE_DIR}/containers/src + ${BASE_DIR}/containers/src/*.hpp) +file(GLOB KOKKOS_ALGORITHMS_HEADERS RELATIVE ${BASE_DIR}/algorithms/src + ${BASE_DIR}/algorithms/src/*.hpp) + +foreach (_header ${KOKKOS_CORE_HEADERS} ${KOKKOS_CONTAINERS_HEADERS} ${KOKKOS_ALGORITHMS_HEADERS}) + string(REGEX REPLACE "[\./]" "_" header_test_name ${_header}) + set(header_test_name Kokkos_HeaderSelfContained_${header_test_name}) + add_executable(${header_test_name} tstHeader.cpp) + target_link_libraries(${header_test_name} PRIVATE Kokkos::kokkos) + target_compile_definitions(${header_test_name} PRIVATE KOKKOS_HEADER_TEST_NAME=${_header}) +endforeach() diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_Category.hpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_Category.hpp deleted file mode 100644 index 12c69926c7bfc10ec7fef02d9e96c39691c557d6..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_Category.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_HIPHOSTPINNED_HPP -#define KOKKOS_TEST_HIPHOSTPINNED_HPP - -#include <gtest/gtest.h> - -#define TEST_CATEGORY hip_hostpinned -#define TEST_EXECSPACE Kokkos::Experimental::HIPHostPinnedSpace - -#endif diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_a.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_a.cpp index 53f0371b23dfd0cd32044ba70a4b106f2f7b9a45..02157836b3f6075c6c18e2919d93ed4b541dbab8 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_a.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewAPI_a.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_b.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_b.cpp index 2e3685d6102d75b8516846f80bc4c3fb959a93b7..80e2fe3f93716c23979ede23aa81de9b2f694c9e 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_b.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewAPI_b.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_c.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_c.cpp index 079a244d43ee4e2570dc85cdb6bc6d8957769d55..9694e33ca0ce0f5c2fc6214613f4ae2f03c9750d 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_c.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_c.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewAPI_c.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_d.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_d.cpp index bc3843b2c1dcc0eff3282c40426af11c1a7e8098..0d773494ac6236ce0274cc844fb3369aec81d51d 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_d.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_d.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewAPI_d.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_e.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_e.cpp index 1c80e05fe0f9f5671f6c394f2ab37ba9fece0d48..cbbbc810b0e8e588be2892b83279a4137675de66 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_e.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewAPI_e.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewAPI_e.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewCopy_a.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewCopy_a.cpp index b630ad7016a194e5f9d290e0a5e04e66da571b21..444a3e6e95d2a62c1ad0e8bedba3767503dd4687 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewCopy_a.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewCopy_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewCopy_a.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewCopy_b.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewCopy_b.cpp index 1a88e7ebc6dec2bf9d23fee4b210eaea8a9d6869..f1f90e7acf13c7aaa4820f5bd50ecc403f2d6f5f 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewCopy_b.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewCopy_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewCopy_b.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_a.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_a.cpp index f9b5608d1b2b7a7a413d24b31e5f2f09e832fc54..5e83121e341db1da440c65cd5dce84dc1a6f6259 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_a.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewMapping_a.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_b.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_b.cpp index bff68fc07253e3602412a133abc61ee5f4a4e062..c024143d6c7b735dfa3b897e0a4503ee50e4caec 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_b.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewMapping_b.hpp> diff --git a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_subview.cpp b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_subview.cpp index cfa9da549593e817b3eb79cd33fd75ce87953e73..dcd6c1dc435982fdf44950c3b606847c29c30b37 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_subview.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIPHostPinned_ViewMapping_subview.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include <hip/TestHIPHostPinned_Category.hpp> +#include <TestHIPHostPinned_Category.hpp> #include <TestViewMapping_subview.hpp> diff --git a/packages/kokkos/algorithms/unit_tests/TestThreads.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_AsyncLauncher.cpp similarity index 61% rename from packages/kokkos/algorithms/unit_tests/TestThreads.cpp rename to packages/kokkos/core/unit_test/hip/TestHIP_AsyncLauncher.cpp index c75e6e8dfba9f8d69617b8ff44b4c095a9e55537..0a243e0e8e89c0ef5a7cec6195837909d092bc2a 100644 --- a/packages/kokkos/algorithms/unit_tests/TestThreads.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_AsyncLauncher.cpp @@ -42,47 +42,48 @@ //@HEADER */ -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_ENABLE_THREADS - -#include <gtest/gtest.h> - #include <Kokkos_Core.hpp> - -#include <TestRandom.hpp> -#include <TestSort.hpp> -#include <iomanip> - -//---------------------------------------------------------------------------- +#include <TestHIP_Category.hpp> namespace Test { -#define THREADS_RANDOM_XORSHIFT64(num_draws) \ - TEST(threads, Random_XorShift64) { \ - Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Threads> >( \ - num_draws); \ - } +struct TestAsyncLauncher { + size_t *m_flag; + size_t m_value; -#define THREADS_RANDOM_XORSHIFT1024(num_draws) \ - TEST(threads, Random_XorShift1024) { \ - Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Threads> >( \ - num_draws); \ + KOKKOS_INLINE_FUNCTION + void operator()(const int /*i*/) const { + // and update flag + Kokkos::atomic_add(m_flag, m_value); } -#define THREADS_SORT_UNSIGNED(size) \ - TEST(threads, SortUnsigned) { \ - Impl::test_sort<Kokkos::Threads, double>(size); \ - } + TestAsyncLauncher(size_t *flag, int value) : m_flag(flag), m_value(value) {} -THREADS_RANDOM_XORSHIFT64(10240000) -THREADS_RANDOM_XORSHIFT1024(10130144) -THREADS_SORT_UNSIGNED(171) + void run() { + Kokkos::parallel_for(Kokkos::RangePolicy<TEST_EXECSPACE>(0, 1), *this); + } +}; -#undef THREADS_RANDOM_XORSHIFT64 -#undef THREADS_RANDOM_XORSHIFT1024 -#undef THREADS_SORT_UNSIGNED +TEST(hip, async_launcher) { + size_t *flag; + HIP_SAFE_CALL(hipMalloc(&flag, sizeof(size_t))); + HIP_SAFE_CALL(hipMemset(flag, 0, sizeof(size_t))); + // launch # of cycles * 1000 kernels w/ distinct values + auto space = Kokkos::Experimental::HIP(); + auto instance = space.impl_internal_space_instance(); + size_t max_cycles = instance->m_maxDriverCycles; + size_t nkernels = max_cycles * 1000; + for (size_t i = 0; i < nkernels; ++i) { + TestAsyncLauncher(flag, i).run(); + } + // and check results -- if any of the driver types were overwritten + // the sum below should fail + instance->fence(); + size_t h_flag; + HIP_SAFE_CALL( + hipMemcpy(&h_flag, flag, sizeof(size_t), hipMemcpyHostToDevice)); + ASSERT_EQ(h_flag, (nkernels * (nkernels - 1)) / 2); + HIP_SAFE_CALL(hipFree(flag)); +} } // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTTHREADS_PREVENT_LINK_ERROR() {} -#endif diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Init.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Init.cpp index 9a58c99d2db39af91a8d61ec28be65f19fa9c9f2..3a76ca148cf683a83b84d351e4ebd8b2f7cdec94 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Init.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Init.cpp @@ -43,7 +43,9 @@ */ #include <Kokkos_Core.hpp> -#include <hip/TestHIP_Category.hpp> +#include <TestHIP_Category.hpp> + +#include <array> namespace Test { @@ -70,8 +72,8 @@ TEST(hip, raw_hip_interop) { offset<<<dim3(100), dim3(100), 0, nullptr>>>(p); HIP_SAFE_CALL(hipDeviceSynchronize()); - int* h_p = new int[100]; - HIP_SAFE_CALL(hipMemcpy(h_p, p, sizeof(int) * 100, hipMemcpyDefault)); + std::array<int, 100> h_p; + HIP_SAFE_CALL(hipMemcpy(h_p.data(), p, sizeof(int) * 100, hipMemcpyDefault)); HIP_SAFE_CALL(hipDeviceSynchronize()); int64_t sum = 0; int64_t sum_expect = 0; @@ -81,5 +83,6 @@ TEST(hip, raw_hip_interop) { } ASSERT_EQ(sum, sum_expect); + HIP_SAFE_CALL(hipFree(p)); } } // namespace Test diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Streams.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Streams.cpp index 8363765e36af1c1a73f8ae4c6b50ec1da712b6b8..8e0880ddbd0b15524be75ab97b90044e5315a8ff 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Streams.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_InterOp_Streams.cpp @@ -42,7 +42,7 @@ //@HEADER */ -#include <hip/TestHIP_Category.hpp> +#include <TestHIP_Category.hpp> #include <Test_InterOp_Streams.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_ScanUnit.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_ScanUnit.cpp index 73e9dec10f3d70fdb7a88caa262165f5182ce52e..b759d6f407a791fb3b88b86f502cc956780294f3 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_ScanUnit.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_ScanUnit.cpp @@ -44,7 +44,7 @@ */ #include <Kokkos_Core.hpp> -#include <hip/TestHIP_Category.hpp> +#include <TestHIP_Category.hpp> struct DummyFunctor { using value_type = int; diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp index f13400d096db8682083293ad56c08c57f1cbbea0..ae1de8ea2d304e41d672ff2e136d16c86cbb8068 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_Spaces.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <hip/TestHIP_Category.hpp> +#include <TestHIP_Category.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/hip/TestHIP_TeamScratchStreams.cpp b/packages/kokkos/core/unit_test/hip/TestHIP_TeamScratchStreams.cpp index ac729dbc055a52b7a431f92ff66a4493f8690ba0..db360a99d3d60977cf06479e7662e21350dd5f99 100644 --- a/packages/kokkos/core/unit_test/hip/TestHIP_TeamScratchStreams.cpp +++ b/packages/kokkos/core/unit_test/hip/TestHIP_TeamScratchStreams.cpp @@ -42,7 +42,7 @@ //@HEADER */ -#include <hip/TestHIP_Category.hpp> +#include <TestHIP_Category.hpp> #include <Kokkos_Core.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_Category.hpp b/packages/kokkos/core/unit_test/hpx/TestHPX_Category.hpp deleted file mode 100644 index bbdcfba5c7af2e7cd5c1734b8427bd724f5e240b..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_Category.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_HPX_HPP -#define KOKKOS_TEST_HPX_HPP - -#include <gtest/gtest.h> - -#define TEST_CATEGORY hpx -#define TEST_CATEGORY_DEATH hpx_DeathTest -#define TEST_EXECSPACE Kokkos::Experimental::HPX - -#endif diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp index a235e86ba49ec345587d4a6d849b100aedca98af..8e89d6d6a5da981b33eea9349ae3ace63ec3f684 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstances.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <hpx/TestHPX_Category.hpp> +#include <TestHPX_Category.hpp> #include <hpx/config.hpp> #include <hpx/include/lcos.hpp> diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp index 4f5569fc6b91192bf995ee0a7225ac0ab809e45f..0cedc068e594e70d750c9b515c4e08cbe527a1f4 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesDelayedExecution.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <hpx/TestHPX_Category.hpp> +#include <TestHPX_Category.hpp> #include <hpx/include/lcos.hpp> diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp index 26f419db86cf14560b7c3444c12e5abf8b5b26a0..de4cb01a7835d8b4e3d29920ed572edeeb9ef3fb 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesInstanceIds.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <hpx/TestHPX_Category.hpp> +#include <TestHPX_Category.hpp> #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesRefCounting.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesRefCounting.cpp index 89b03dc3677365dc321334667abf1ebd22df1678..a98c8b0d62339fa5c2e68124984d5b790b14f692 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesRefCounting.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_IndependentInstancesRefCounting.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <hpx/TestHPX_Category.hpp> +#include <TestHPX_Category.hpp> #ifdef KOKKOS_ENABLE_HPX_ASYNC_DISPATCH diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp index 872d1a0383802c90f99557c8ef51c57dd19be918..31c35ac9a7f0a3425948157cb7f2d3a4239691ad 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <hpx/TestHPX_Category.hpp> +#include <TestHPX_Category.hpp> namespace Test { diff --git a/packages/kokkos/core/unit_test/hpx/TestHPX_Task.cpp b/packages/kokkos/core/unit_test/hpx/TestHPX_Task.cpp index 4e059beef41dec1286db5062dd031e9d5e4084ef..57d0ac803bcd86b5499dd6c29348d88138088c15 100644 --- a/packages/kokkos/core/unit_test/hpx/TestHPX_Task.cpp +++ b/packages/kokkos/core/unit_test/hpx/TestHPX_Task.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include <hpx/TestHPX_Category.hpp> +#include <TestHPX_Category.hpp> #include <TestTaskScheduler.hpp> diff --git a/packages/kokkos/core/unit_test/incremental/Test12a_ThreadScratch.hpp b/packages/kokkos/core/unit_test/incremental/Test12a_ThreadScratch.hpp index 449b450a7c5508392cf0316689fd9c6a844d950d..5bf1860d8e4a6bcf739656bdc7e1f790ebf60512 100644 --- a/packages/kokkos/core/unit_test/incremental/Test12a_ThreadScratch.hpp +++ b/packages/kokkos/core/unit_test/incremental/Test12a_ThreadScratch.hpp @@ -42,7 +42,7 @@ //@HEADER */ -// @Kokkos_Feature_Level_Required:13 +// @Kokkos_Feature_Level_Required:12 // Unit test for hierarchical parallelism // Create concurrent work hierarchically and verify if // contributions of paticipating processing units corresponds to expected value @@ -63,10 +63,12 @@ struct ThreadScratch { int sX, sY; data_t v; + + const int scratch_level = 1; KOKKOS_FUNCTION void operator()(const team_t &team) const { // Allocate and use scratch pad memory - scratch_t v_S(team.thread_scratch(1), sY); + scratch_t v_S(team.thread_scratch(scratch_level), sY); int n = team.league_rank(); for (int i = 0; i < sY; ++i) v_S(i) = 0; @@ -90,8 +92,9 @@ struct ThreadScratch { int scratchSize = scratch_t::shmem_size(sY); // So this works with deprecated code enabled: - policy_t policy = policy_t(pN, Kokkos::AUTO) - .set_scratch_size(1, Kokkos::PerThread(scratchSize)); + policy_t policy = + policy_t(pN, Kokkos::AUTO) + .set_scratch_size(scratch_level, Kokkos::PerThread(scratchSize)); int max_team_size = policy.team_size_max(*this, Kokkos::ParallelForTag()); v = data_t("Matrix", pN, max_team_size); @@ -99,7 +102,7 @@ struct ThreadScratch { Kokkos::parallel_for( "Test12a_ThreadScratch", policy_t(pN, max_team_size) - .set_scratch_size(1, Kokkos::PerThread(scratchSize)), + .set_scratch_size(scratch_level, Kokkos::PerThread(scratchSize)), *this); Kokkos::fence(); @@ -117,9 +120,18 @@ struct ThreadScratch { TEST(TEST_CATEGORY, IncrTest_12a_ThreadScratch) { ThreadScratch<TEST_EXECSPACE> test; + // FIXME_OPENMPTARGET - team_size has to be a multiple of 32 for the tests to + // pass in the Release and RelWithDebInfo builds. Does not need the team_size + // to be a multiple of 32 for the Debug builds. +#ifdef KOKKOS_ENABLE_OPENMPTARGET + test.run(1, 32, 9); + test.run(2, 64, 22); + test.run(14, 128, 321); +#else test.run(1, 55, 9); test.run(2, 4, 22); test.run(14, 277, 321); +#endif } } // namespace Test diff --git a/packages/kokkos/core/unit_test/incremental/Test12b_TeamScratch.hpp b/packages/kokkos/core/unit_test/incremental/Test12b_TeamScratch.hpp index 913dce9995d1f0998c85f23ab174a6b8e8cdae80..b34f652e76d919f14c3afed0656b8bcd86dbc27f 100644 --- a/packages/kokkos/core/unit_test/incremental/Test12b_TeamScratch.hpp +++ b/packages/kokkos/core/unit_test/incremental/Test12b_TeamScratch.hpp @@ -42,7 +42,7 @@ //@HEADER */ -// @Kokkos_Feature_Level_Required:13 +// @Kokkos_Feature_Level_Required:12 // Unit test for hierarchical parallelism // Create concurrent work hierarchically and verify if // contributions of paticipating processing units corresponds to expected value @@ -64,13 +64,15 @@ struct TeamScratch { Kokkos::MemoryTraits<Kokkos::Unmanaged> >; int scratchSize = scratch_t::shmem_size(sX, sY); + const int scratch_level = 1; + Kokkos::parallel_for( "Team", policy_t(pN, Kokkos::AUTO) - .set_scratch_size(1, Kokkos::PerTeam(scratchSize)), + .set_scratch_size(scratch_level, Kokkos::PerTeam(scratchSize)), KOKKOS_LAMBDA(const team_t &team) { // Allocate and use scratch pad memory - scratch_t v_S(team.team_scratch(1), sX, sY); + scratch_t v_S(team.team_scratch(scratch_level), sX, sY); int n = team.league_rank(); Kokkos::parallel_for( @@ -105,9 +107,18 @@ struct TeamScratch { TEST(TEST_CATEGORY, IncrTest_12b_TeamScratch) { TeamScratch<TEST_EXECSPACE> test; + // FIXME_OPENMPTARGET - team_size has to be a multiple of 32 for the tests to + // pass in the Release and RelWithDebInfo builds. Does not need the team_size + // to be a multiple of 32 for the Debug builds. +#ifdef KOKKOS_ENABLE_OPENMPTARGET + test.run(1, 32, 4); + test.run(4, 64, 10); + test.run(14, 128, 20); +#else test.run(1, 4, 4); test.run(4, 7, 10); test.run(14, 277, 321); +#endif } } // namespace Test diff --git a/packages/kokkos/core/unit_test/incremental/Test13a_ParallelRed_TeamThreadRange.hpp b/packages/kokkos/core/unit_test/incremental/Test13a_ParallelRed_TeamThreadRange.hpp index 20f0b7884ed1b0f5063ef9623377eabb97722c1c..e32b0ed0fc92684072cf004b64240093e1b981fd 100644 --- a/packages/kokkos/core/unit_test/incremental/Test13a_ParallelRed_TeamThreadRange.hpp +++ b/packages/kokkos/core/unit_test/incremental/Test13a_ParallelRed_TeamThreadRange.hpp @@ -42,7 +42,7 @@ //@HEADER */ -// @Kokkos_Feature_Level_Required:12 +// @Kokkos_Feature_Level_Required:13 // Unit test for hierarchical parallelism // Create concurrent work hierarchically and verify if // sum of created processing units corresponds to expected value diff --git a/packages/kokkos/core/unit_test/incremental/Test13b_ParallelRed_TeamVectorRange.hpp b/packages/kokkos/core/unit_test/incremental/Test13b_ParallelRed_TeamVectorRange.hpp index 6b640632399cb9d2a23c68c491532e84c7b3afd3..0d37703e2b73d5ca22e73f2bfbd2f553e1fe0225 100644 --- a/packages/kokkos/core/unit_test/incremental/Test13b_ParallelRed_TeamVectorRange.hpp +++ b/packages/kokkos/core/unit_test/incremental/Test13b_ParallelRed_TeamVectorRange.hpp @@ -42,7 +42,7 @@ //@HEADER */ -// @Kokkos_Feature_Level_Required:12 +// @Kokkos_Feature_Level_Required:13 // Unit test for hierarchical parallelism // Create concurrent work hierarchically and verify if // sum of created processing units corresponds to expected value diff --git a/packages/kokkos/core/unit_test/incremental/Test13c_ParallelRed_ThreadVectorRange.hpp b/packages/kokkos/core/unit_test/incremental/Test13c_ParallelRed_ThreadVectorRange.hpp index aa82dd1f3996e2786ab253b59f9b356e5f0e3ef3..26f9d000914393a8af86d9ba1bc4bb5658a7244e 100644 --- a/packages/kokkos/core/unit_test/incremental/Test13c_ParallelRed_ThreadVectorRange.hpp +++ b/packages/kokkos/core/unit_test/incremental/Test13c_ParallelRed_ThreadVectorRange.hpp @@ -42,7 +42,7 @@ //@HEADER */ -// @Kokkos_Feature_Level_Required:12 +// @Kokkos_Feature_Level_Required:13 // Unit test for hierarchical parallelism // Create concurrent work hierarchically and verify if // sum of created processing units corresponds to expected value diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Category.hpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Category.hpp deleted file mode 100644 index 65efbc9b9713f8db87a36069c2416b0da4ee2554..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Category.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_OMP_HPP -#define KOKKOS_TEST_OMP_HPP - -#include <gtest/gtest.h> - -#define TEST_CATEGORY openmp -#define TEST_CATEGORY_DEATH openmp_DeathTest -#define TEST_EXECSPACE Kokkos::OpenMP -#define TEST_CATEGORY_FIXTURE(name) openmp_##name - -#endif diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp index 9ed647c287db884e0b5a058e8adbd10625c71cb2..e5ba9e8738275b4163a787518678c6615f91f0f7 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include <openmp/TestOpenMP_Category.hpp> +#include <TestOpenMP_Category.hpp> #include <TestGraph.hpp> diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp index 083c94860ba7018f4418a8c2eff7112afbaaf063..c3ee67673912bb8c8f022d03322d6e8b69adfd72 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp @@ -43,7 +43,7 @@ */ #include <Kokkos_Core.hpp> -#include <openmp/TestOpenMP_Category.hpp> +#include <TestOpenMP_Category.hpp> #include <omp.h> namespace Test { diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_PartitionMaster.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_PartitionMaster.cpp index ea1472b7575e9e135f098640269d1638f3a412f1..902150da5806d27768603ac71207ce2aaef5551f 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_PartitionMaster.cpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_PartitionMaster.cpp @@ -43,7 +43,7 @@ //@HEADER */ -#include <openmp/TestOpenMP_Category.hpp> +#include <TestOpenMP_Category.hpp> #include <Kokkos_Core.hpp> #include <mutex> diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp index 5e67a34710f1219eb0a13d90b686faa7c6938321..2ddc6a58419040f912ebbd0f9d4f60ae113b9368 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include <openmp/TestOpenMP_Category.hpp> +#include <TestOpenMP_Category.hpp> #include <TestTaskScheduler.hpp> diff --git a/packages/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Category.hpp b/packages/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Category.hpp deleted file mode 100644 index 58aa0cc782080e255264d57b5f9838fb60ab83cd..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Category.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_OMPTARGET_HPP -#define KOKKOS_TEST_OMPTARGET_HPP - -#include <gtest/gtest.h> - -#define TEST_CATEGORY openmptarget -#define TEST_CATEGORY_DEATH openmptarget_DeathTest -#define TEST_EXECSPACE Kokkos::Experimental::OpenMPTarget - -#endif diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Category.hpp b/packages/kokkos/core/unit_test/serial/TestSerial_Category.hpp deleted file mode 100644 index d7ae8a9f48afbae46a90cfaad26fff6621d45570..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/unit_test/serial/TestSerial_Category.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_SERIAL_HPP -#define KOKKOS_TEST_SERIAL_HPP - -#include <gtest/gtest.h> - -#define TEST_CATEGORY serial -#define TEST_CATEGORY_DEATH serial_DeathTest -#define TEST_EXECSPACE Kokkos::Serial -#define TEST_CATEGORY_FIXTURE(name) serial_##name - -#endif diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Graph.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Graph.cpp index 5933b1b86f9b782f4c42d850af5c60b95186133e..b2dba1c265cab5cfa4b982bf43f920ec666fcaa5 100644 --- a/packages/kokkos/core/unit_test/serial/TestSerial_Graph.cpp +++ b/packages/kokkos/core/unit_test/serial/TestSerial_Graph.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include <serial/TestSerial_Category.hpp> +#include <TestSerial_Category.hpp> #include <TestGraph.hpp> diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Task.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Task.cpp index 02f686e069dcd09e52c3a56cdd3d8cb8b0cde3d1..c08efbf447b6fe055f7f01e619b2a0b02de0cdf8 100644 --- a/packages/kokkos/core/unit_test/serial/TestSerial_Task.cpp +++ b/packages/kokkos/core/unit_test/serial/TestSerial_Task.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include <serial/TestSerial_Category.hpp> +#include <TestSerial_Category.hpp> #include <TestTaskScheduler.hpp> diff --git a/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp b/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp index 74e28a17de71d49f0287a4460e1238a23798d2e7..c1f7398c166bcf738111b1674a83a919293faf6d 100644 --- a/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp +++ b/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp @@ -48,23 +48,29 @@ #include <Kokkos_Core.hpp> #ifdef KOKKOS_ENABLE_CUDA -#include <cuda/TestCuda_Category.hpp> +#include <TestCuda_Category.hpp> +#endif +#ifdef KOKKOS_ENABLE_HIP +#include <TestHIP_Category.hpp> +#endif +#ifdef KOKKOS_ENABLE_SYCL +#include <TestSYCL_Category.hpp> #endif #ifdef KOKKOS_ENABLE_OPENMP -#include <openmp/TestOpenMP_Category.hpp> +#include <TestOpenMP_Category.hpp> #endif #ifdef KOKKOS_ENABLE_THREADS -#include <threads/TestThreads_Category.hpp> +#include <TestThreads_Category.hpp> #endif #ifdef KOKKOS_ENABLE_HPX -#include <hpx/TestHPX_Category.hpp> +#include <TestHPX_Category.hpp> #endif #ifdef KOKKOS_ENABLE_OPENMPTARGET -#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestOpenMPTarget_Category.hpp> #endif #ifndef TEST_EXECSPACE #ifdef KOKKOS_ENABLE_SERIAL -#include <serial/TestSerial_Category.hpp> +#include <TestSerial_Category.hpp> #endif #endif #include <TestReducers_d.hpp> diff --git a/packages/kokkos/algorithms/unit_tests/TestOpenMP_Random.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init.cpp similarity index 66% rename from packages/kokkos/algorithms/unit_tests/TestOpenMP_Random.cpp rename to packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init.cpp index 1ca8e0a828f06176c0959c744fe20045856534b2..018855963d35f8fef81a93985811dcc3d9b239fc 100644 --- a/packages/kokkos/algorithms/unit_tests/TestOpenMP_Random.cpp +++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init.cpp @@ -42,36 +42,47 @@ //@HEADER */ -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_ENABLE_OPENMP - -#include <gtest/gtest.h> #include <Kokkos_Core.hpp> +#include <TestSYCL_Category.hpp> -//---------------------------------------------------------------------------- -#include <TestRandom.hpp> -#include <iomanip> +#include <array> namespace Test { -#define OPENMP_RANDOM_XORSHIFT64(num_draws) \ - TEST(openmp, Random_XorShift64) { \ - Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >( \ - num_draws); \ - } +// Test whether allocations survive Kokkos initialize/finalize if done via Raw +// SYCL. +TEST(sycl, raw_sycl_interop) { + sycl::default_selector device_selector; + sycl::queue queue(device_selector); + constexpr int n = 100; + int* p = sycl::malloc_device<int>(n, queue); -#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \ - TEST(openmp, Random_XorShift1024) { \ - Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >( \ - num_draws); \ + Kokkos::InitArguments arguments{-1, -1, -1, false}; + Kokkos::initialize(arguments); + { + TEST_EXECSPACE space(queue); + Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v(p, n); + Kokkos::deep_copy(space, v, 5); } + Kokkos::finalize(); + + queue.submit([&](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(n), [=](int idx) { p[idx] += idx; }); + }); + queue.wait_and_throw(); -OPENMP_RANDOM_XORSHIFT64(10240000) -OPENMP_RANDOM_XORSHIFT1024(10130144) + std::array<int, n> h_p; + queue.memcpy(h_p.data(), p, sizeof(int) * n); + queue.wait_and_throw(); + sycl::free(p, queue); + + int64_t sum = 0; + int64_t sum_expect = 0; + for (int i = 0; i < n; i++) { + sum += h_p[i]; + sum_expect += 5 + i; + } -#undef OPENMP_RANDOM_XORSHIFT64 -#undef OPENMP_RANDOM_XORSHIFT1024 + ASSERT_EQ(sum, sum_expect); +} } // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} -#endif diff --git a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init_Context.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init_Context.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c12c5c07295d73ddb0600d366f9c50faa6ba96df --- /dev/null +++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Init_Context.cpp @@ -0,0 +1,120 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <TestSYCL_Category.hpp> + +#include <array> + +namespace Test { + +// Test whether external allocations can be accessed by the default queue. +TEST(sycl, raw_sycl_interop_context_1) { + Kokkos::Experimental::SYCL default_space; + sycl::context default_context = default_space.sycl_context(); + + sycl::default_selector device_selector; + sycl::queue queue(default_context, device_selector); + constexpr int n = 100; + int* p = sycl::malloc_device<int>(n, queue); + + Kokkos::Experimental::SYCL space(queue); + Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v(p, n); + Kokkos::deep_copy(v, 5); + + queue.submit([&](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(n), [=](int idx) { p[idx] += idx; }); + }); + queue.wait_and_throw(); + + std::array<int, n> h_p; + queue.memcpy(h_p.data(), p, sizeof(int) * n); + queue.wait_and_throw(); + sycl::free(p, queue); + + int64_t sum = 0; + int64_t sum_expect = 0; + for (int i = 0; i < n; i++) { + sum += h_p[i]; + sum_expect += 5 + i; + } + + ASSERT_EQ(sum, sum_expect); +} + +// Test whether regular View allocations can be accessed by non-default queues. +TEST(sycl, raw_sycl_interop_context_2) { + Kokkos::Experimental::SYCL default_space; + sycl::context default_context = default_space.sycl_context(); + + sycl::default_selector device_selector; + sycl::queue queue(default_context, device_selector); + constexpr int n = 100; + + Kokkos::Experimental::SYCL space(queue); + Kokkos::View<int*, Kokkos::Experimental::SYCLDeviceUSMSpace> v("default_view", + n); + Kokkos::deep_copy(space, v, 5); + + auto* v_ptr = v.data(); + queue.submit([&](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(n), [=](int idx) { v_ptr[idx] += idx; }); + }); + queue.wait_and_throw(); + + std::array<int, n> h_p; + queue.memcpy(h_p.data(), v_ptr, sizeof(int) * n); + queue.wait_and_throw(); + + int64_t sum = 0; + int64_t sum_expect = 0; + for (int i = 0; i < n; i++) { + sum += h_p[i]; + sum_expect += 5 + i; + } + + ASSERT_EQ(sum, sum_expect); +} + +} // namespace Test diff --git a/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f81b7073392cc192318187e2ac31aa632f428489 --- /dev/null +++ b/packages/kokkos/core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp @@ -0,0 +1,118 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <TestSYCL_Category.hpp> +#include <Test_InterOp_Streams.hpp> + +namespace Test { +// Test Interoperability with SYCL Streams +TEST(sycl, raw_sycl_queues) { + sycl::default_selector device_selector; + sycl::queue queue(device_selector); + Kokkos::InitArguments arguments{-1, -1, -1, false}; + Kokkos::initialize(arguments); + int* p = sycl::malloc_device<int>(100, queue); + using MemorySpace = typename TEST_EXECSPACE::memory_space; + + { + TEST_EXECSPACE space0(queue); + Kokkos::View<int*, TEST_EXECSPACE> v(p, 100); + Kokkos::deep_copy(space0, v, 5); + int sum = 0; + + Kokkos::parallel_for("Test::sycl::raw_sycl_queue::Range", + Kokkos::RangePolicy<TEST_EXECSPACE>(space0, 0, 100), + FunctorRange<MemorySpace>(v)); + Kokkos::parallel_reduce("Test::sycl::raw_sycl_queue::RangeReduce", + Kokkos::RangePolicy<TEST_EXECSPACE>(space0, 0, 100), + FunctorRangeReduce<MemorySpace>(v), sum); + space0.fence(); + ASSERT_EQ(6 * 100, sum); + + Kokkos::parallel_for("Test::sycl::raw_sycl_queue::MDRange", + Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>( + space0, {0, 0}, {10, 10}), + FunctorMDRange<MemorySpace>(v)); + space0.fence(); + Kokkos::parallel_reduce( + "Test::sycl::raw_sycl_queue::MDRangeReduce", + Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space0, {0, 0}, + {10, 10}), + FunctorMDRangeReduce<MemorySpace>(v), sum); + space0.fence(); + ASSERT_EQ(7 * 100, sum); + + Kokkos::parallel_for("Test::sycl::raw_sycl_queue::Team", + Kokkos::TeamPolicy<TEST_EXECSPACE>(space0, 10, 10), + FunctorTeam<MemorySpace, TEST_EXECSPACE>(v)); + space0.fence(); + Kokkos::parallel_reduce("Test::sycl::raw_sycl_queue::Team", + Kokkos::TeamPolicy<TEST_EXECSPACE>(space0, 10, 10), + FunctorTeamReduce<MemorySpace, TEST_EXECSPACE>(v), + sum); + space0.fence(); + ASSERT_EQ(8 * 100, sum); + } + Kokkos::finalize(); + + // Try to use the queue after Kokkos' copy got out-of-scope. + // This kernel corresponds to "offset_streams" in the HIP and CUDA tests. + queue.submit([&](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(100), [=](int idx) { p[idx] += idx; }); + }); + queue.wait_and_throw(); + + int h_p[100]; + queue.memcpy(h_p, p, sizeof(int) * 100); + queue.wait_and_throw(); + int64_t sum = 0; + int64_t sum_expect = 0; + for (int i = 0; i < 100; i++) { + sum += h_p[i]; + sum_expect += 8 + i; + } + + ASSERT_EQ(sum, sum_expect); +} +} // namespace Test diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_Category.hpp b/packages/kokkos/core/unit_test/threads/TestThreads_Category.hpp deleted file mode 100644 index 800772b42dd0d3523d3e9243917ae005665e0bed..0000000000000000000000000000000000000000 --- a/packages/kokkos/core/unit_test/threads/TestThreads_Category.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_TEST_THREADS_HPP -#define KOKKOS_TEST_THREADS_HPP - -#include <gtest/gtest.h> - -#define TEST_CATEGORY threads -#define TEST_CATEGORY_DEATH threads_DeathTest -#define TEST_EXECSPACE Kokkos::Threads - -#endif diff --git a/packages/kokkos/core/unit_test/tools/TestAllCalls.cpp b/packages/kokkos/core/unit_test/tools/TestAllCalls.cpp index 7e37816c5dc26286cb267dba327de205a75e3ecf..7ee8d68e30dd1de252866ff83c4aed8e07bd2ab5 100644 --- a/packages/kokkos/core/unit_test/tools/TestAllCalls.cpp +++ b/packages/kokkos/core/unit_test/tools/TestAllCalls.cpp @@ -46,11 +46,12 @@ // testing library this tests that our shared-library loading based profiling // mechanisms work -#include <iostream> #include <Kokkos_Core.hpp> +#include <iostream> +#include <sstream> -int main() { - Kokkos::initialize(); +int main(int argc, char** argv) { + Kokkos::initialize(argc, argv); { // This test only uses host kernel launch mechanisms. This is to allow for // the test to run on platforms where CUDA lambda launch isn't supported. @@ -84,6 +85,7 @@ int main() { Kokkos::Profiling::stopSection(sectionId); Kokkos::Profiling::destroyProfileSection(sectionId); Kokkos::Profiling::markEvent("profiling_event"); + Kokkos::Tools::declareMetadata("dogs", "good"); } Kokkos::finalize(); } diff --git a/packages/kokkos/core/unit_test/tools/printing-tool.cpp b/packages/kokkos/core/unit_test/tools/printing-tool.cpp index c2abada0a921b4a7b403fdd49ef3a6837cc47b58..76b7837d0365306201c83eb8e2ae92523d3a6670 100644 --- a/packages/kokkos/core/unit_test/tools/printing-tool.cpp +++ b/packages/kokkos/core/unit_test/tools/printing-tool.cpp @@ -4,6 +4,15 @@ struct Kokkos_Profiling_KokkosPDeviceInfo; +// just get the basename for print_help/parse_args +std::string get_basename(char* cmd, int idx = 0) { + if (idx > 0) return cmd; + std::string _cmd = cmd; + auto _pos = _cmd.find_last_of('/'); + if (_pos != std::string::npos) return _cmd.substr(_pos + 1); + return _cmd; +} + struct SpaceHandle { char name[64]; }; @@ -23,6 +32,16 @@ extern "C" void kokkosp_finalize_library() { std::cout << "kokkosp_finalize_library::"; } +extern "C" void kokkosp_print_help(char* exe) { + std::cout << "kokkosp_print_help:" << get_basename(exe) << "::"; +} + +extern "C" void kokkosp_parse_args(int argc, char** argv) { + std::cout << "kokkosp_parse_args:" << argc; + for (int i = 0; i < argc; ++i) std::cout << ":" << get_basename(argv[i], i); + std::cout << "::"; +} + extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { @@ -116,3 +135,6 @@ extern "C" void kokkosp_destroy_profile_section(uint32_t sec_id) { extern "C" void kokkosp_profile_event(const char* name) { std::cout << "kokkosp_profile_event:" << name << "::"; } +extern "C" void kokkosp_declare_metadata(const char* key, const char* value) { + std::cout << "kokkosp_declare_metadata:" << key << ":" << value << "::"; +} diff --git a/packages/kokkos/example/build_cmake_installed_different_compiler/CMakeLists.txt b/packages/kokkos/example/build_cmake_installed_different_compiler/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..df16774e742e9f60a116a5a8dcdf93bcc17b0606 --- /dev/null +++ b/packages/kokkos/example/build_cmake_installed_different_compiler/CMakeLists.txt @@ -0,0 +1,29 @@ +# Kokkos minimally requires 3.16 right now, +# but your project can set it higher +cmake_minimum_required(VERSION 3.16) + +# Projects can safely mix languages - must have C++ support +# Kokkos flags will only apply to C++ files +project(Example CXX Fortran) + +# You need this for using Kokkos_ROOT variable +message(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables") +cmake_policy(SET CMP0074 NEW) + +# Look for an installed Kokkos but force using the compiler launcher +# to ensure that targets depending on Kokkos use the same compiler +# as when kokkos was installed, e.g. if kokkos was built with +# g++ and the CMAKE_CXX_COMPILER=clang++ then example_with_kokkos +# will be compiled and linked with g++ whereas example_no_kokkos +# will be compiled and linked with clang++ +find_package(Kokkos REQUIRED COMPONENTS launch_compiler) + +add_executable(example_no_kokkos bar.cpp) +add_executable(example_with_kokkos foo.cpp) + +# This is the only thing required to set up compiler/linker flags +target_link_libraries(example_with_kokkos Kokkos::kokkos) + +enable_testing() +add_test(NAME KokkosLauncher_NoKokkos_Verify COMMAND example_no_kokkos 10) +add_test(NAME KokkosLauncher_WithKokkos_Verify COMMAND example_with_kokkos 10) diff --git a/packages/kokkos/example/build_cmake_installed_different_compiler/bar.cpp b/packages/kokkos/example/build_cmake_installed_different_compiler/bar.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e02c2b8c688650fe3c5e0beefb5ea1ce01de2fa8 --- /dev/null +++ b/packages/kokkos/example/build_cmake_installed_different_compiler/bar.cpp @@ -0,0 +1,7 @@ + +#include <cstdio> + +int main() { + puts("hello world!"); + return 0; +} diff --git a/packages/kokkos/algorithms/unit_tests/TestHIP.cpp b/packages/kokkos/example/build_cmake_installed_different_compiler/foo.cpp similarity index 63% rename from packages/kokkos/algorithms/unit_tests/TestHIP.cpp rename to packages/kokkos/example/build_cmake_installed_different_compiler/foo.cpp index 5e5ccb6a2eb8e988986eaa4ce06e34cf028bf6ed..fc10366f71bd9b0d421b18e935c2cea86925904b 100644 --- a/packages/kokkos/algorithms/unit_tests/TestHIP.cpp +++ b/packages/kokkos/example/build_cmake_installed_different_compiler/foo.cpp @@ -42,42 +42,52 @@ //@HEADER */ -#include <Kokkos_Macros.hpp> -#ifdef KOKKOS_ENABLE_HIP +#include <Kokkos_Core.hpp> +#include <cstdio> -#include <cstdint> -#include <iostream> -#include <iomanip> +struct CountFunctor { + KOKKOS_FUNCTION void operator()(const long i, long& lcount) const { + lcount += (i % 2) == 0; + } +}; -#include <gtest/gtest.h> +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); + Kokkos::DefaultExecutionSpace::print_configuration(std::cout); -#include <Kokkos_Core.hpp> + if (argc < 2) { + fprintf(stderr, "Usage: %s [<kokkos_options>] <size>\n", argv[0]); + Kokkos::finalize(); + exit(1); + } -#include <TestRandom.hpp> -#include <TestSort.hpp> + const long n = strtol(argv[1], nullptr, 10); -namespace Test { + printf("Number of even integers from 0 to %ld\n", n - 1); -void hip_test_random_xorshift64(size_t num_draws) { - Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Experimental::HIP>>( - num_draws); - Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Device< - Kokkos::Experimental::HIP, Kokkos::Experimental::HIPSpace>>>(num_draws); -} + Kokkos::Timer timer; + timer.reset(); -void hip_test_random_xorshift1024(size_t num_draws) { - Impl::test_random< - Kokkos::Random_XorShift1024_Pool<Kokkos::Experimental::HIP>>(num_draws); - Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Device< - Kokkos::Experimental::HIP, Kokkos::Experimental::HIPSpace>>>(num_draws); -} + // Compute the number of even integers from 0 to n-1, in parallel. + long count = 0; + CountFunctor functor; + Kokkos::parallel_reduce(n, functor, count); + + double count_time = timer.seconds(); + printf(" Parallel: %ld %10.6f\n", count, count_time); + + timer.reset(); + + // Compare to a sequential loop. + long seq_count = 0; + for (long i = 0; i < n; ++i) { + seq_count += (i % 2) == 0; + } + + count_time = timer.seconds(); + printf("Sequential: %ld %10.6f\n", seq_count, count_time); + + Kokkos::finalize(); -TEST(hip, Random_XorShift64) { hip_test_random_xorshift64(132141141); } -TEST(hip, Random_XorShift1024_0) { hip_test_random_xorshift1024(52428813); } -TEST(hip, SortUnsigned) { - Impl::test_sort<Kokkos::Experimental::HIP, unsigned>(171); + return (count == seq_count) ? 0 : -1; } -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTHIP_PREVENT_LINK_ERROR() {} -#endif /* #ifdef KOKKOS_ENABLE_HIP */ diff --git a/packages/kokkos/example/tutorial/01_hello_world/hello_world.cpp b/packages/kokkos/example/tutorial/01_hello_world/hello_world.cpp index bdb630a1ad9a1b77c91989272c65ab84218afcdf..5810e0ee7a267c79a823d00516937b8ccd43c0d9 100644 --- a/packages/kokkos/example/tutorial/01_hello_world/hello_world.cpp +++ b/packages/kokkos/example/tutorial/01_hello_world/hello_world.cpp @@ -85,7 +85,14 @@ struct hello_world { // (as well as on the host). If not building with CUDA, the macro // is unnecessary but harmless. KOKKOS_INLINE_FUNCTION - void operator()(const int i) const { printf("Hello from i = %i\n", i); } + void operator()(const int i) const { + // FIXME_SYCL needs workaround for printf +#ifndef __SYCL_DEVICE_ONLY__ + printf("Hello from i = %i\n", i); +#else + (void)i; +#endif + } }; int main(int argc, char* argv[]) { diff --git a/packages/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp b/packages/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp index c3bd00e7d40adbfd808643e939d58647a346b3b0..06f209774eae10a4a11161d17aae979450d6e850 100644 --- a/packages/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp +++ b/packages/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp @@ -104,8 +104,13 @@ int main(int argc, char* argv[]) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_for( 15, KOKKOS_LAMBDA(const int i) { + // FIXME_SYCL needs workaround for printf +#ifndef __SYCL_DEVICE_ONLY__ // printf works in a CUDA parallel kernel; std::ostream does not. printf("Hello from i = %i\n", i); +#else + (void)i; +#endif }); #endif // You must call finalize() after you are done using Kokkos. diff --git a/packages/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp b/packages/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp index caacc828e5075fa6a179f8d9b0a99a31a29fc8aa..32b18e4d2047c5f3dcc23614109f4440b4686549 100644 --- a/packages/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp +++ b/packages/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp @@ -122,7 +122,7 @@ int main() { // Fill the 'data' array on the host with random numbers. We assume // that they come from some process which is only implemented on the // host, via some library. (That's true in this case.) - for (size_type i = 0; i < data.extent(0); ++i) { + for (size_type i = 0; i < static_cast<size_type>(data.extent(0)); ++i) { h_data(i) = rand() % nnumbers; } Kokkos::deep_copy(data, h_data); // copy from host to device diff --git a/packages/kokkos/example/tutorial/06_simple_mdrangepolicy/CMakeLists.txt b/packages/kokkos/example/tutorial/06_simple_mdrangepolicy/CMakeLists.txt index ca9f0bf8da93f918cb07e825fb36f35c97619c11..2a6c3f6c27a3699d0715c8e5ab41448221432aaf 100644 --- a/packages/kokkos/example/tutorial/06_simple_mdrangepolicy/CMakeLists.txt +++ b/packages/kokkos/example/tutorial/06_simple_mdrangepolicy/CMakeLists.txt @@ -7,4 +7,3 @@ KOKKOS_ADD_EXECUTABLE( tutorial_06_simple_mdrangepolicy SOURCES simple_mdrangepolicy.cpp ) - diff --git a/packages/kokkos/example/tutorial/Advanced_Views/01_data_layouts/data_layouts.cpp b/packages/kokkos/example/tutorial/Advanced_Views/01_data_layouts/data_layouts.cpp index 643ac87a86168e29d11332251a53efbc817ea9f5..597d1e3056ece9ef5865a3fb79dfef09ccf50a6a 100644 --- a/packages/kokkos/example/tutorial/Advanced_Views/01_data_layouts/data_layouts.cpp +++ b/packages/kokkos/example/tutorial/Advanced_Views/01_data_layouts/data_layouts.cpp @@ -68,13 +68,15 @@ struct init_view { ViewType a; init_view(ViewType a_) : a(a_) {} + using size_type = typename ViewType::size_type; + KOKKOS_INLINE_FUNCTION void operator()(const typename ViewType::size_type i) const { // On CPUs this loop could be vectorized so j should do stride 1 // access on a for optimal performance. I.e. a should be LayoutRight. // On GPUs threads should do coalesced loads and stores. That means // that i should be the stride one access for optimal performance. - for (typename ViewType::size_type j = 0; j < a.extent(1); ++j) { + for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) { a(i, j) = 1.0 * a.extent(0) * i + 1.0 * j; } } @@ -95,6 +97,8 @@ struct contraction { contraction(view_type a_, ViewType1 v1_, ViewType2 v2_) : a(a_), v1(v1_), v2(v2_) {} + using size_type = typename view_type::size_type; + // As with the initialization functor the performance of this operator // depends on the architecture and the chosen data layouts. // On CPUs optimal would be to vectorize the inner loop, so j should be the @@ -104,7 +108,7 @@ struct contraction { // LayoutLeft and v2 LayoutRight. KOKKOS_INLINE_FUNCTION void operator()(const view_type::size_type i) const { - for (view_type::size_type j = 0; j < v1.extent(1); ++j) { + for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) { a(i) = v1(i, j) * v2(j, i); } } diff --git a/packages/kokkos/example/tutorial/Advanced_Views/02_memory_traits/memory_traits.cpp b/packages/kokkos/example/tutorial/Advanced_Views/02_memory_traits/memory_traits.cpp index cff215d0ebf9581cc3de646287bad432046e75f7..00bfeea36b972e6ea08ab8c82ec5aaca1a4e2af5 100644 --- a/packages/kokkos/example/tutorial/Advanced_Views/02_memory_traits/memory_traits.cpp +++ b/packages/kokkos/example/tutorial/Advanced_Views/02_memory_traits/memory_traits.cpp @@ -113,8 +113,9 @@ int main(int narg, char* arg[]) { srand(134231); + using size_type = view_type::size_type; for (int i = 0; i < size; i++) { - for (view_type::size_type j = 0; j < h_idx.extent(1); ++j) { + for (size_type j = 0; j < static_cast<size_type>(h_idx.extent(1)); ++j) { h_idx(i, j) = (size + i + (rand() % 500 - 250)) % size; } } diff --git a/packages/kokkos/example/tutorial/Advanced_Views/03_subviews/subviews.cpp b/packages/kokkos/example/tutorial/Advanced_Views/03_subviews/subviews.cpp index ca2eeac41682a5629d3e66903474b78fa96d851c..20e5c5a284f415e7627fd07df20ffbe5856f3428 100644 --- a/packages/kokkos/example/tutorial/Advanced_Views/03_subviews/subviews.cpp +++ b/packages/kokkos/example/tutorial/Advanced_Views/03_subviews/subviews.cpp @@ -78,9 +78,11 @@ struct set_boundary { set_boundary(ViewType a_, double value_) : a(a_), value(value_) {} + using size_type = typename ViewType::size_type; + KOKKOS_INLINE_FUNCTION - void operator()(const typename ViewType::size_type i) const { - for (typename ViewType::size_type j = 0; j < a.extent(1); ++j) { + void operator()(const size_type i) const { + for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) { a(i, j) = value; } } @@ -96,11 +98,12 @@ struct set_inner { set_inner(ViewType a_, double value_) : a(a_), value(value_) {} + using size_type = typename ViewType::size_type; + KOKKOS_INLINE_FUNCTION - void operator()(const typename ViewType::size_type i) const { - using size_type = typename ViewType::size_type; - for (size_type j = 0; j < a.extent(1); ++j) { - for (size_type k = 0; k < a.extent(2); ++k) { + void operator()(const size_type i) const { + for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) { + for (size_type k = 0; k < static_cast<size_type>(a.extent(2)); ++k) { a(i, j, k) = value; } } @@ -116,12 +119,13 @@ struct update { update(ViewType a_, const double dt_) : a(a_), dt(dt_) {} + using size_type = typename ViewType::size_type; + KOKKOS_INLINE_FUNCTION - void operator()(typename ViewType::size_type i) const { - using size_type = typename ViewType::size_type; + void operator()(size_type i) const { i++; - for (size_type j = 1; j < a.extent(1) - 1; j++) { - for (size_type k = 1; k < a.extent(2) - 1; k++) { + for (size_type j = 1; j < static_cast<size_type>(a.extent(1) - 1); j++) { + for (size_type k = 1; k < static_cast<size_type>(a.extent(2) - 1); k++) { a(i, j, k) += dt * (a(i, j, k + 1) - a(i, j, k - 1) + a(i, j + 1, k) - a(i, j - 1, k) + a(i + 1, j, k) - a(i - 1, j, k)); } diff --git a/packages/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp b/packages/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp index 174d13d102a337bda707accaa915547aa97d488d..3c0fcd085c7c2afe29a328dfa3f574ab9ac81276 100644 --- a/packages/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp +++ b/packages/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp @@ -175,8 +175,9 @@ int main(int narg, char* arg[]) { // Get a reference to the host view of idx directly (equivalent to // idx.view<idx_type::host_mirror_space>() ) idx_type::t_host h_idx = idx.h_view; + using size_type = view_type::size_type; for (int i = 0; i < size; ++i) { - for (view_type::size_type j = 0; j < h_idx.extent(1); ++j) { + for (size_type j = 0; j < static_cast<size_type>(h_idx.extent(1)); ++j) { h_idx(i, j) = (size + i + (rand() % 500 - 250)) % size; } } diff --git a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp index 9afc144752284288704ff9223c52a9261ba7a0df..735de65e056c84a5290105db39d5369a50f16ec7 100644 --- a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp +++ b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp @@ -75,8 +75,13 @@ struct hello_world { // The TeamPolicy<>::member_type provides functions to query the multi // dimensional index of a thread as well as the number of thread-teams and // the size of each team. +#ifndef __SYCL_DEVICE_ONLY__ + // FIXME_SYCL needs printf workaround printf("Hello World: %i %i // %i %i\n", thread.league_rank(), thread.team_rank(), thread.league_size(), thread.team_size()); +#else + (void)thread; +#endif } }; diff --git a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp index a182b08b8439d61d1a25fee5e8798ea56f761c0c..dcb1e0561bca8b096b528d61128f85c6254c221c 100644 --- a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp +++ b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp @@ -85,11 +85,16 @@ int main(int narg, char* args[]) { policy, KOKKOS_LAMBDA(const team_member& thread, int& lsum) { lsum += 1; - // TeamPolicy<>::member_type provides functions to query the - // multidimensional index of a thread, as well as the number of - // thread teams and the size of each team. + // TeamPolicy<>::member_type provides functions to query the + // multidimensional index of a thread, as well as the number of + // thread teams and the size of each team. +#ifndef __SYCL_DEVICE_ONLY__ + // FIXME_SYCL needs workaround for printf printf("Hello World: %i %i // %i %i\n", thread.league_rank(), thread.team_rank(), thread.league_size(), thread.team_size()); +#else + (void)thread; +#endif }, sum); #endif diff --git a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp index 29e23e904c545e2f4258cf0e462d4315ff9edfdd..a528b71fe33f817b03dc32bacdbe8cd96271eab7 100644 --- a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp +++ b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp @@ -73,8 +73,13 @@ struct hello_world { // also executed by all threads of the team. Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, 31), [&](const int& i) { +#ifndef __SYCL_DEVICE_ONLY__ + // FIXME_SYCL needs printf workaround printf("Hello World: (%i , %i) executed loop %i \n", thread.league_rank(), thread.team_rank(), i); +#else + (void) i; +#endif }); } }; diff --git a/packages/kokkos/generate_makefile.bash b/packages/kokkos/generate_makefile.bash index 144ed92608f2574ed067abb92d6bdbaf3a89b751..e9871b436971a551c82751756b2b18de9175839a 100755 --- a/packages/kokkos/generate_makefile.bash +++ b/packages/kokkos/generate_makefile.bash @@ -146,6 +146,7 @@ display_help_text() { echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit." echo "--with-hip[=/Path/To/Hip]: Enable Hip and set path to ROCM Toolkit." echo "--with-openmptarget: Enable OpenMPTarget backend." + echo "--with-sycl: Enable Sycl backend." echo "--with-openmp: Enable OpenMP backend." echo "--with-pthread: Enable Pthreads backend." echo "--with-serial: Enable Serial backend." @@ -159,7 +160,7 @@ display_help_text() { echo " [AMD: GPU]" echo " VEGA900 = AMD GPU MI25 GFX900" echo " VEGA906 = AMD GPU MI50/MI60 GFX906" - echo " VEGA908 = AMD GPU" + echo " VEGA908 = AMD GPU MI100 GFX908" echo " [ARM]" echo " ARMV80 = ARMv8.0 Compatible CPU" echo " ARMV81 = ARMv8.1 Compatible CPU" @@ -199,7 +200,7 @@ display_help_text() { echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" echo " build. This will still set certain required" echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," - echo " --std=c++14, etc.)." + echo " -std=c++14, etc.)." echo "--cxxstandard=[FLAGS] Set CMAKE_CXX_STANDARD for library build and test" echo " c++14 (default), c++17, c++1y, c++1z, c++2a" echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" @@ -285,6 +286,9 @@ do --with-openmp) update_kokkos_devices OpenMP ;; + --with-sycl) + update_kokkos_devices Sycl + ;; --with-pthread) update_kokkos_devices Pthread ;; @@ -356,7 +360,7 @@ do ;; --compiler*) COMPILER="${key#*=}" - CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l) + CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep -c "no ${COMPILER}") if [ ${CNUM} -gt 0 ]; then echo "Invalid compiler by --compiler command: '${COMPILER}'" exit @@ -365,7 +369,7 @@ do echo "Empty compiler specified by --compiler command." exit fi - CNUM=$(command -v ${COMPILER} | grep ${COMPILER} | wc -l) + CNUM=$(command -v ${COMPILER} | grep -c ${COMPILER}) if [ ${CNUM} -eq 0 ]; then echo "Invalid compiler by --compiler command: '${COMPILER}'" exit diff --git a/packages/kokkos/gnu_generate_makefile.bash b/packages/kokkos/gnu_generate_makefile.bash index 20ad18bd29a0a1c5529a572daf7490a08e63b320..ea509669f068d677a0354c83891d7caf298b1e34 100755 --- a/packages/kokkos/gnu_generate_makefile.bash +++ b/packages/kokkos/gnu_generate_makefile.bash @@ -94,7 +94,7 @@ do ;; --compiler*) COMPILER="${key#*=}" - CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l) + CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep -c "no ${COMPILER}") if [ ${CNUM} -gt 0 ]; then echo "Invalid compiler by --compiler command: '${COMPILER}'" exit @@ -103,7 +103,7 @@ do echo "Empty compiler specified by --compiler command." exit fi - CNUM=$(command -v ${COMPILER} | grep ${COMPILER} | wc -l) + CNUM=$(command -v ${COMPILER} | grep -c ${COMPILER}) if [ ${CNUM} -eq 0 ]; then echo "Invalid compiler by --compiler command: '${COMPILER}'" exit @@ -174,7 +174,7 @@ do echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" echo " build. This will still set certain required" echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," - echo " --std=c++14, etc.)." + echo " -std=c++14, etc.)." echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test" echo " c++14 (default), c++17, c++1y, c++1z, c++2a" echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index e746bd7d0103b0cb2e813290304f8f90c9b35f72..7a58f593d00e424b7d7dcbda226f5c4c6d7ccd3c 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -23,3 +23,4 @@ tag: 3.1.01 date: 05:04:2020 master: 785d19f2 release: 2be028bc tag: 3.2.00 date: 08:19:2020 master: 3b2fdc7e release: 5dc6d303 tag: 3.3.00 date: 12:16:2020 master: 734f577a release: 1535ba5c tag: 3.3.01 date: 01:06:2021 master: 6d65b5a3 release: 4d23839c +tag: 3.4.00 date: 04:26:2021 master: 1fb0c284 release: 5d7738d6 diff --git a/packages/kokkos/scripts/docker/Dockerfile.clang b/packages/kokkos/scripts/docker/Dockerfile.clang index 8d1a95b8bafe5b04188b0c77192060fca2aa0e5f..6aaf75fae55ff975df5045bb73a0813236871d89 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.clang +++ b/packages/kokkos/scripts/docker/Dockerfile.clang @@ -9,7 +9,7 @@ RUN apt-get update && apt-get install -y \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -ARG CMAKE_VERSION=3.10.3 +ARG CMAKE_VERSION=3.16.8 ENV CMAKE_DIR=/opt/cmake RUN CMAKE_KEY=2D2CEF1034921684 && \ CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.gcc b/packages/kokkos/scripts/docker/Dockerfile.gcc index fd37305f9c20d69c0dfe319f0365db81dd03cf16..56972d3185d0f62e6b9effb64e8f2cedefe25c66 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.gcc +++ b/packages/kokkos/scripts/docker/Dockerfile.gcc @@ -1,6 +1,6 @@ FROM gcc:5.3.0 -ARG CMAKE_VERSION=3.10.3 +ARG CMAKE_VERSION=3.16.8 ENV CMAKE_DIR=/opt/cmake RUN CMAKE_KEY=2D2CEF1034921684 && \ CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.hipcc b/packages/kokkos/scripts/docker/Dockerfile.hipcc index dddd09ae44c00514c0f1955e9b432f4be8c813f9..d3b6b93a023396aa785703a5aeec0c4001af34e8 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.hipcc +++ b/packages/kokkos/scripts/docker/Dockerfile.hipcc @@ -2,6 +2,7 @@ ARG BASE=rocm/dev-ubuntu-20.04:3.8 FROM $BASE RUN apt-get update && apt-get install -y \ + git \ kmod \ wget \ ccache \ @@ -12,7 +13,7 @@ RUN apt-get update && apt-get install -y \ ENV PATH=/opt/rocm/bin:$PATH -ARG CMAKE_VERSION=3.10.3 +ARG CMAKE_VERSION=3.16.8 ENV CMAKE_DIR=/opt/cmake RUN CMAKE_KEY=2D2CEF1034921684 && \ CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject index ce4ffaa0429021803e9653504205c9d8e33b9c86..5d53a645e4bc7c551698719d3edb1c3768467ca7 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject +++ b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject @@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -ARG CMAKE_VERSION=3.10.3 +ARG CMAKE_VERSION=3.16.8 ENV CMAKE_DIR=/opt/cmake RUN CMAKE_KEY=2D2CEF1034921684 && \ CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvcc b/packages/kokkos/scripts/docker/Dockerfile.nvcc index 868f9be1c4d7afbf3bbb18994eadd1650517dff8..e17accc0663980694821b8002b976277fcd9ca42 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.nvcc +++ b/packages/kokkos/scripts/docker/Dockerfile.nvcc @@ -5,6 +5,7 @@ ARG ADDITIONAL_PACKAGES RUN apt-get update && apt-get install -y \ bc \ + git \ wget \ ccache \ $ADDITIONAL_PACKAGES \ @@ -12,7 +13,7 @@ RUN apt-get update && apt-get install -y \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -ARG CMAKE_VERSION=3.10.3 +ARG CMAKE_VERSION=3.16.8 ENV CMAKE_DIR=/opt/cmake RUN CMAKE_KEY=2D2CEF1034921684 && \ CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.sycl b/packages/kokkos/scripts/docker/Dockerfile.sycl index 331270491f9150fab8d31ec02f26a84193e5cd2f..fdcd6d01fb8e3158000aa1507bb5bfcf7e0d9b4e 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.sycl +++ b/packages/kokkos/scripts/docker/Dockerfile.sycl @@ -1,16 +1,18 @@ -ARG BASE=intel/oneapi-basekit:devel-ubuntu18.04 +ARG BASE=nvidia/cuda:10.2-devel FROM $BASE RUN apt-get update && apt-get install -y \ + bc \ + git \ wget \ ccache \ + ninja-build \ + python3 \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -ENV PATH=/opt/intel/oneapi/compiler/latest/linux/bin/:$PATH - -ARG CMAKE_VERSION=3.10.3 +ARG CMAKE_VERSION=3.18.5 ENV CMAKE_DIR=/opt/cmake RUN CMAKE_KEY=2D2CEF1034921684 && \ CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \ @@ -26,3 +28,20 @@ RUN CMAKE_KEY=2D2CEF1034921684 && \ sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \ rm cmake* ENV PATH=${CMAKE_DIR}/bin:$PATH + +ENV SYCL_DIR=/opt/sycl +RUN SYCL_VERSION=20210311 && \ + SYCL_URL=https://github.com/intel/llvm/archive/sycl-nightly && \ + SYCL_ARCHIVE=${SYCL_VERSION}.tar.gz && \ + SCRATCH_DIR=/scratch && mkdir -p ${SCRATCH_DIR} && cd ${SCRATCH_DIR} && \ + wget --quiet ${SYCL_URL}/${SYCL_ARCHIVE} && \ + mkdir llvm && \ + tar -xf ${SYCL_ARCHIVE} -C llvm --strip-components=1 && \ + cd llvm && \ + python3 buildbot/configure.py --cuda && \ + python3 buildbot/compile.py && \ + mkdir -p ${SYCL_DIR} && \ + mv ${SCRATCH_DIR}/llvm/build/install/* ${SYCL_DIR} && \ + echo "${SYCL_DIR}/lib" > /etc/ld.so.conf.d/sycl.conf && ldconfig && \ + rm -rf ${SCRATCH_DIR} +ENV PATH=${SYCL_DIR}/bin:$PATH diff --git a/packages/kokkos/scripts/spack_test/CMakeLists.txt b/packages/kokkos/scripts/spack_test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4c28bd0b8eccff2487ae1388960bbbc6b8504a34 --- /dev/null +++ b/packages/kokkos/scripts/spack_test/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.16) +project(SpackTestGen) +set(TEST_LIST_DEF ${CMAKE_CURRENT_SOURCE_DIR}/test_list.def) +file(STRINGS ${TEST_LIST_DEF} TEST_FILES) + +#Copy test source to Spack test directory +foreach (TEST_FILE ${TEST_FILES}) + set(TEST_FILE_LOCATION ${SPACK_PACKAGE_SOURCE_DIR}/${TEST_FILE}) + file(COPY ${TEST_FILE_LOCATION} DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/out) +endforeach() + +#Clean up names +foreach(TEST_FILE ${TEST_FILES} ) + string( REGEX REPLACE ".+\/" "" TEST_FILE ${TEST_FILE} ) + list(APPEND SRC_NAME_LIST ${TEST_FILE}) + string( REPLACE ".cpp" "" TEST_FILE ${TEST_FILE} ) + list(APPEND BIN_NAME_LIST ${TEST_FILE}) +endforeach() + +#Configure test cmake script and run script +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt.in ${CMAKE_CURRENT_SOURCE_DIR}/out/CMakeLists.txt @ONLY) diff --git a/packages/kokkos/scripts/spack_test/CMakeLists.txt.in b/packages/kokkos/scripts/spack_test/CMakeLists.txt.in new file mode 100644 index 0000000000000000000000000000000000000000..4a216df4aab7b326efc94866b0f943af7c42d29f --- /dev/null +++ b/packages/kokkos/scripts/spack_test/CMakeLists.txt.in @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.16) +project(kokkos_spack_test CXX) +find_package(Kokkos REQUIRED) + +set(SRC_NAME_LIST "@SRC_NAME_LIST@") +set(BIN_NAME_LIST "@BIN_NAME_LIST@") + +enable_testing() +list(LENGTH SRC_NAME_LIST LEN) +math(EXPR LEN "${LEN}-1") + +set(CMAKE_CXX_COMPILER ${Kokkos_CXX_COMPILER}) + +foreach (it RANGE ${LEN}) + list(GET SRC_NAME_LIST ${it} src) + list(GET BIN_NAME_LIST ${it} bin) + add_executable(${bin} ${src}) + target_link_libraries(${bin} Kokkos::kokkos) + add_test(NAME ${bin} COMMAND ${bin}) + set_tests_properties(${bin} PROPERTIES + LABELS "Kokkos" + PROCESSORS 1 + TIMEOUT 60) +endforeach() diff --git a/packages/kokkos/scripts/spack_test/test_list.def b/packages/kokkos/scripts/spack_test/test_list.def new file mode 100644 index 0000000000000000000000000000000000000000..8703ccb9854140245f5ff684b85eb32c6881b207 --- /dev/null +++ b/packages/kokkos/scripts/spack_test/test_list.def @@ -0,0 +1,4 @@ +example/tutorial/01_hello_world/hello_world.cpp +example/tutorial/02_simple_reduce/simple_reduce.cpp +example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp +example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp diff --git a/packages/kokkos/scripts/testing_scripts/TestEXEC_TEST.cpp b/packages/kokkos/scripts/testing_scripts/TestEXEC_TEST.cpp index f2d33eb26cb8e131974cab8ce6693bc00f0dda10..883e88b51b7dd6c3f116ea8731934db5b7dde72a 100644 --- a/packages/kokkos/scripts/testing_scripts/TestEXEC_TEST.cpp +++ b/packages/kokkos/scripts/testing_scripts/TestEXEC_TEST.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include <exec/TestEXEC_Category.hpp> +#include <TestEXEC_Category.hpp> #include <TestTEST.hpp> diff --git a/packages/kokkos/scripts/testing_scripts/generate_makefile.bash b/packages/kokkos/scripts/testing_scripts/generate_makefile.bash index cd767975115684e19db41f3af5392eed566be4ec..f21124ed6e716844e876cf209ee2af5cb9a7dbbd 100755 --- a/packages/kokkos/scripts/testing_scripts/generate_makefile.bash +++ b/packages/kokkos/scripts/testing_scripts/generate_makefile.bash @@ -86,7 +86,7 @@ do ;; --compiler*) COMPILER="${key#*=}" - CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l) + CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep -c "no ${COMPILER}") if [ ${CNUM} -gt 0 ]; then echo "Invalid compiler by --compiler command: '${COMPILER}'" exit @@ -95,7 +95,7 @@ do echo "Empty compiler specified by --compiler command." exit fi - CNUM=$(command -v ${COMPILER} | grep ${COMPILER} | wc -l) + CNUM=$(command -v ${COMPILER} | grep -c ${COMPILER}) if [ ${CNUM} -eq 0 ]; then echo "Invalid compiler by --compiler command: '${COMPILER}'" exit @@ -166,7 +166,7 @@ do echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" echo " build. This will still set certain required" echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," - echo " --std=c++14, etc.)." + echo " -std=c++14, etc.)." echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test" echo " c++14 (default), c++17, c++1y, c++1z, c++2a" echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" diff --git a/packages/kokkos/scripts/testing_scripts/test_all_sandia b/packages/kokkos/scripts/testing_scripts/test_all_sandia index 578d2992a3840fa7a87d22c29603246061c150a6..877b35b73e1aef7c64cdb2d7e5f00f7bc235781c 100755 --- a/packages/kokkos/scripts/testing_scripts/test_all_sandia +++ b/packages/kokkos/scripts/testing_scripts/test_all_sandia @@ -112,6 +112,10 @@ if [[ "$HOSTNAME" == kokkos-dev\.sandia\.gov* ]]; then MACHINE=kokkos-dev fi +if [[ "$HOSTNAME" == sogpu01* ]]; then + MACHINE=sogpu +fi + if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then if [[ "$MACHINE" = "" ]]; then MACHINE=sems @@ -269,9 +273,9 @@ if [ "$MACHINE" = "sems" ]; then # On rhel7 sems machines gcc/7.3.0, clang/4.0.1, and intel/16.0.3 are missing # Remove kokkkos-env module use - module load sems-cmake/3.12.2 - BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>" - CUDA9_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.2.0" + module load sems-cmake/3.17.1 + BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>" + CUDA9_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.2.0" SKIP_HWLOC=True # No sems hwloc module @@ -304,15 +308,47 @@ if [ "$MACHINE" = "sems" ]; then "cuda/9.2 $CUDA9_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi +elif [ "$MACHINE" = "sogpu" ]; then + source /projects/sems/modulefiles/utils/sems-modules-init.sh + + module load sems-cmake/3.17.1 sems-git + BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>" + CUDA_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.2.0" + CUDA11_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/8.3.0" + SKIP_HWLOC=True + # No sems hwloc module + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=Volta70" + fi + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.4.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/8.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/9.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/7.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/9.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/10.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/18.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/19.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "cuda/10.1 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/11.1 $CUDA11_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) elif [ "$MACHINE" = "kokkos-dev" ]; then source /projects/sems/modulefiles/utils/sems-modules-init.sh - module load sems-cmake/3.12.2 - BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>" - CUDA9_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/6.1.0" - CUDA10_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.2.0" - CUDA11_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/9.2.0" - CLANG7_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-cuda/9.2" + module load sems-cmake/3.17.1 + BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>" + CUDA9_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/6.1.0" + CUDA10_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.2.0" + CUDA11_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/9.2.0" + CLANG7_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-cuda/9.2" SKIP_HWLOC=True if [ -z "$ARCH_FLAG" ]; then @@ -354,10 +390,10 @@ elif [ "$MACHINE" = "white" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/<COMPILER_VERSION>" - IBM_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/xl/<COMPILER_VERSION>,gcc/7.2.0" - CUDA_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.2.0,ibm/xl/16.1.1" - CUDA10_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.4.0,ibm/xl/16.1.1" + BASE_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>" + IBM_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/xl/<COMPILER_VERSION>,gcc/7.2.0" + CUDA_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.2.0,ibm/xl/16.1.1" + CUDA10_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.4.0,ibm/xl/16.1.1" # Don't do pthread with Power GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" @@ -372,7 +408,8 @@ elif [ "$MACHINE" = "white" ]; then ) else # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.4.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.4.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/9.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" @@ -391,10 +428,10 @@ elif [ "$MACHINE" = "weaver" ]; then source /etc/profile.d/modules.sh SKIP_HWLOC=True - BASE_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/<COMPILER_VERSION>" - IBM_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/xl/<COMPILER_VERSION>,gcc/7.2.0" - CUDA_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.2.0,ibm/xl/16.1.1" - CUDA10_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.4.0,ibm/xl/16.1.1" + BASE_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>" + IBM_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/xl/<COMPILER_VERSION>,gcc/7.2.0" + CUDA_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.2.0,ibm/xl/16.1.1" + CUDA10_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.4.0,ibm/xl/16.1.1" # Don't do pthread with Power GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" @@ -430,7 +467,7 @@ elif [ "$MACHINE" = "voltrino" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="PrgEnv-intel,craype-mic-knl,cmake/3.16.2,slurm/19.05.5a,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/9.3.0" + BASE_MODULE_LIST="PrgEnv-intel,craype-mic-knl,cmake/3.16.2,slurm/20.11.4a,<COMPILER_NAME>/<COMPILER_VERSION>,gcc/9.3.0" # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("intel/17.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" @@ -446,11 +483,12 @@ elif [ "$MACHINE" = "mayer" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=96 - BASE_MODULE_LIST="cmake/3.14.5,<COMPILER_NAME>/<COMPILER_VERSION>" + BASE_MODULE_LIST="cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>" # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gnu7/7.2.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "arm/20.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS") + "gnu9/9.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "arm/20.1 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS") if [ -z "$ARCH_FLAG" ]; then ARCH_FLAG="--arch=ARMV8_THUNDERX2" @@ -461,10 +499,12 @@ elif [ "$MACHINE" = "blake" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - module load cmake/3.12.3 + module load cmake/3.19.3 - BASE_MODULE_LIST="cmake/3.12.3,<COMPILER_NAME>/<COMPILER_VERSION>" - BASE_MODULE_LIST_INTEL="cmake/3.12.3,<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + BASE_MODULE_LIST="cmake/3.19.3,<COMPILER_NAME>/<COMPILER_VERSION>" + BASE_MODULE_LIST_INTEL="cmake/3.19.3,<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + BASE_MODULE_LIST_ONEAPI="cmake/3.19.3,<COMPILER_NAME>/oneAPI/base-toolkit/<COMPILER_VERSION>" + ONEAPI_WARNING_FLAGS="" if [ "$SPOT_CHECK" = "True" ]; then @@ -479,12 +519,14 @@ elif [ "$MACHINE" = "blake" ]; then "intel/19.1.144 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/19.3.199 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/19.5.281 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/2021.1.1 $BASE_MODULE_LIST_ONEAPI $INTEL_BUILD_LIST icpx $ONEAPI_WARNING_FLAGS" "gcc/5.5.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/6.4.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/8.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/8.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/9.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/10.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" ) fi @@ -498,18 +540,18 @@ elif [ "$MACHINE" = "apollo" ]; then module load sems-git module load sems-tex - module load sems-cmake/3.12.2 + module load sems-cmake/3.17.1 module load sems-gdb module load binutils SKIP_HWLOC=True - BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>" - CLANG_MODULE_LIST="sems-env,sems-cmake/3.12.2,<COMPILER_NAME>/<COMPILER_VERSION>" - CUDA10_MODULE_LIST="sems-env,sems-cmake/3.12.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0" - CUDA10X_MODULE_LIST="sems-env,sems-cmake/3.12.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.3.0" + BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>" + CLANG_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>" + CUDA10_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0" + CUDA10X_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.3.0" - HPX3_MODULE_LIST="sems-env,sems-cmake/3.12.2,compilers/hpx/1.3.0,sems-gcc/6.1.0,binutils" + HPX3_MODULE_LIST="sems-env,sems-cmake/3.17.1,compilers/hpx/1.3.0,sems-gcc/6.1.0,binutils" BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" @@ -548,19 +590,19 @@ elif [ "$MACHINE" = "kokkos-dev-2" ]; then module load sems-git module load sems-tex - module load sems-cmake/3.12.2 + module load sems-cmake/3.17.1 module load sems-gdb SKIP_HWLOC=True - BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-<COMPILER_NAME>/<COMPILER_VERSION>" - GCC91_MODULE_LIST="sems-env,sems-cmake/3.12.2,<COMPILER_NAME>/<COMPILER_VERSION>" - NVCC9_MODULE_LIST="sems-env,sems-cmake/3.12.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0" - NVCC_MODULE_LIST="sems-env,sems-cmake/3.12.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.3.0" - NVCC11_MODULE_LIST="sems-env,sems-cmake/3.12.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/9.2.0" + BASE_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-<COMPILER_NAME>/<COMPILER_VERSION>" + GCC91_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>" + NVCC9_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0" + NVCC_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.3.0" + NVCC11_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/9.2.0" - CLANG8_MODULE_LIST="sems-env,sems-cmake/3.12.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/10.0" - PGI_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-gcc/7.3.0,<COMPILER_NAME>/<COMPILER_VERSION>" + CLANG8_MODULE_LIST="sems-env,sems-cmake/3.17.1,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/10.0" + PGI_MODULE_LIST="sems-env,sems-cmake/3.17.1,sems-gcc/7.3.0,<COMPILER_NAME>/<COMPILER_VERSION>" BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_Pthread" BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_OpenMP" @@ -768,8 +810,8 @@ setup_env() { done if [ -e ${CM_ALL_SCRIPT_PATH}/update_lib.sh ]; then - echo "calling ${CM_ALL_SCRIPT_PATH}/update_lib.sh $MACHINE" - source ${CM_ALL_SCRIPT_PATH}/update_lib.sh $MACHINE + echo "calling ${CM_ALL_SCRIPT_PATH}/update_lib.sh $MACHINE $compiler" + source ${CM_ALL_SCRIPT_PATH}/update_lib.sh $MACHINE $compiler fi return 0 } @@ -851,8 +893,12 @@ single_build_and_test() { echo " \$KOKKOS_PATH/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=\$KOKKOS_PATH --with-options=${KOKKOS_OPTIONS} --with-cuda-options=${KOKKOS_CUDA_OPTIONS} ${KOKKOS_BOUNDS_CHECK} --no-examples $extra_args" &> call_generate_makefile_genericpath.sh run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} ${KOKKOS_BOUNDS_CHECK} --no-examples $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + local make_par_lvl=12 + if [[ "$MACHINE" = white* ]]; then + make_par_lvl=48 + fi local -i build_start_time=$(date +%s) - run_cmd make -j 48 all >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + run_cmd make -j $make_par_lvl all >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } local -i build_end_time=$(date +%s) comment="build_time=$(($build_end_time-$build_start_time))" diff --git a/packages/kokkos/scripts/testing_scripts/update_lib.sh b/packages/kokkos/scripts/testing_scripts/update_lib.sh index 47f9745759637b5c7fad45af06ce15595bf3afd1..34ab5dd3c9a0afae4b10b70d99772308f35b3f9f 100755 --- a/packages/kokkos/scripts/testing_scripts/update_lib.sh +++ b/packages/kokkos/scripts/testing_scripts/update_lib.sh @@ -1,30 +1,53 @@ #!/bin/bash -if [ "$1" = blake ]; then - ICPCVER="$(icpc --version | grep icpc | cut -d ' ' -f 3)" - if [[ "${ICPCVER}" = 17.* || "${ICPCVER}" = 18.0.128 ]]; then - module swap gcc/4.9.3 gcc/6.4.0 - module list - fi -fi -if [ "$1" = kokkos-dev ]; then +local machine_input="$1" +local compiler_input="$2" + +check_sems_intel() { ICPCVER="$(icpc --version | grep icpc | cut -d ' ' -f 3)" if [[ "${ICPCVER}" = 17.* ]]; then module swap sems-gcc/4.9.3 sems-gcc/6.4.0 module list fi -fi -if [ "$1" = kokkos-dev-2 ]; then - ICPCVER="$(icpc --version | grep icpc | cut -d ' ' -f 3)" - if [[ "${ICPCVER}" = 17.* ]]; then - module swap sems-gcc/4.9.3 sems-gcc/6.4.0 + if [[ "${ICPCVER}" = 19.* ]]; then + # Newer gcc needed for c++ standard beyond c++14 + module swap sems-gcc/6.1.0 sems-gcc/7.2.0 module list fi -fi -if [ "$1" = sems ]; then +} + +check_sems_clang() { + CLANGVER=$(clang --version | grep "clang version" | cut -d " " -f 3) + if [[ "${CLANGVER}" = 9.* ]] || [[ "${CLANGVER}" = 10.* ]]; then + # Newer gcc needed for c++ standard beyond c++14 + module swap sems-gcc/5.3.0 sems-gcc/6.4.0 + module list + fi +} + +check_compiler_modules() { + if [[ "$compiler_input" = clang/* ]]; then + echo " clang compiler - check supporting modules" + check_sems_clang + elif [[ "$compiler_input" = intel/* ]]; then + echo " intel compiler - check supporting modules" + check_sems_intel + fi +} + +if [ "$machine_input" = blake ]; then ICPCVER="$(icpc --version | grep icpc | cut -d ' ' -f 3)" - if [[ "${ICPCVER}" = 17.* ]]; then - module swap sems-gcc/4.9.3 sems-gcc/6.4.0 + if [[ "${ICPCVER}" = 17.* || "${ICPCVER}" = 18.0.128 ]]; then + module swap gcc/4.9.3 gcc/6.4.0 module list fi fi +if [ "$machine_input" = kokkos-dev ]; then + check_compiler_modules +fi +if [ "$machine_input" = kokkos-dev-2 ]; then + check_compiler_modules +fi +if [ "$machine_input" = sems ] || [ "$machine_input" = sogpu ]; then + check_compiler_modules +fi