From b9ec061afdb0aefa3489d9beaecf99cd608f3d15 Mon Sep 17 00:00:00 2001 From: Stephane Del Pino <stephane.delpino44@gmail.com> Date: Mon, 12 Nov 2018 22:44:00 +0100 Subject: [PATCH] git subrepo pull packages/kokkos subrepo: subdir: "packages/kokkos" merged: "9614f72c7" upstream: origin: "git@github.com:kokkos/kokkos.git" branch: "master" commit: "9614f72c7" git-subrepo: version: "0.4.0" origin: "git@github.com:ingydotnet/git-subrepo.git" commit: "5d6aba9" --- packages/kokkos/.gitrepo | 7 +- packages/kokkos/.travis.yml | 36 +- packages/kokkos/CHANGELOG.md | 63 + packages/kokkos/CMakeLists.txt | 7 +- packages/kokkos/Makefile.kokkos | 85 +- packages/kokkos/README | 33 +- .../kokkos/algorithms/src/Kokkos_Random.hpp | 6 + .../kokkos/algorithms/src/Kokkos_Sort.hpp | 10 +- packages/kokkos/benchmarks/gups/Makefile | 41 + .../kokkos/benchmarks/gups/gups-kokkos.cc | 199 ++ packages/kokkos/benchmarks/stream/Makefile | 41 + .../kokkos/benchmarks/stream/stream-kokkos.cc | 265 +++ packages/kokkos/bin/hpcbind | 66 +- packages/kokkos/bin/nvcc_wrapper | 111 +- packages/kokkos/cmake/kokkos_build.cmake | 4 + packages/kokkos/cmake/kokkos_functions.cmake | 2 +- packages/kokkos/cmake/kokkos_options.cmake | 13 +- packages/kokkos/cmake/kokkos_settings.cmake | 11 +- packages/kokkos/config/test_all_sandia | 11 +- .../kokkos/containers/src/Kokkos_DualView.hpp | 335 ++- .../containers/src/Kokkos_DynRankView.hpp | 100 +- .../containers/src/Kokkos_OffsetView.hpp | 1895 +++++++++++++++++ .../containers/src/Kokkos_StaticCrsGraph.hpp | 4 +- .../kokkos/containers/src/Kokkos_Vector.hpp | 16 +- .../containers/unit_tests/CMakeLists.txt | 4 + .../kokkos/containers/unit_tests/Makefile | 5 + .../containers/unit_tests/TestDynViewAPI.hpp | 64 + .../containers/unit_tests/TestOffsetView.hpp | 426 ++++ .../containers/unit_tests/TestScatterView.hpp | 27 +- .../unit_tests/TestStaticCrsGraph.hpp | 1 + .../unit_tests/cuda/TestCuda_OffsetView.cpp | 47 + .../openmp/TestOpenMP_OffsetView.cpp | 47 + .../unit_tests/rocm/TestROCm_Category.hpp | 2 +- .../serial/TestSerial_OffsetView.cpp | 46 + .../threads/TestThreads_OffsetView.cpp | 47 + packages/kokkos/core/src/CMakeLists.txt | 4 + .../kokkos/core/src/Cuda/Kokkos_CudaExec.hpp | 8 +- .../kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 7 + .../kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp | 6 +- .../core/src/Cuda/Kokkos_Cuda_Internal.hpp | 221 +- .../core/src/Cuda/Kokkos_Cuda_Locks.hpp | 3 + .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 240 ++- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 319 ++- .../kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp | 124 +- .../Kokkos_Cuda_Version_9_8_Compatibility.hpp | 13 +- .../kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 2 + .../core/src/KokkosExp_MDRangePolicy.hpp | 6 +- packages/kokkos/core/src/Kokkos_Array.hpp | 8 +- packages/kokkos/core/src/Kokkos_Concepts.hpp | 5 +- packages/kokkos/core/src/Kokkos_CopyViews.hpp | 252 ++- .../kokkos/core/src/Kokkos_ExecPolicy.hpp | 8 + packages/kokkos/core/src/Kokkos_Layout.hpp | 146 ++ packages/kokkos/core/src/Kokkos_Macros.hpp | 7 +- .../core/src/Kokkos_Parallel_Reduce.hpp | 17 +- .../kokkos/core/src/Kokkos_ScratchSpace.hpp | 49 + packages/kokkos/core/src/Kokkos_Serial.hpp | 22 +- .../kokkos/core/src/Kokkos_TaskScheduler.hpp | 2 - packages/kokkos/core/src/Kokkos_View.hpp | 1065 ++++----- packages/kokkos/core/src/Makefile | 36 +- .../core/src/Makefile.generate_build_files | 34 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 4 - .../core/src/OpenMP/Kokkos_OpenMP_Team.hpp | 55 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 3 +- .../core/src/ROCm/Kokkos_ROCm_Config.hpp | 4 +- .../kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp | 25 +- .../kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp | 21 +- .../core/src/ROCm/Kokkos_ROCm_Parallel.hpp | 393 +++- .../core/src/ROCm/Kokkos_ROCm_Reduce.hpp | 9 +- .../core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp | 224 +- .../kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp | 120 +- .../core/src/ROCm/Kokkos_ROCm_Space.cpp | 4 + .../kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp | 5 +- .../core/src/Threads/Kokkos_ThreadsTeam.hpp | 101 +- ...IInst_int64_t_double_LayoutLeft_Rank1.cpp} | 0 ...IInst_int64_t_double_LayoutLeft_Rank2.cpp} | 0 ...IInst_int64_t_double_LayoutLeft_Rank3.cpp} | 0 ...IInst_int64_t_double_LayoutLeft_Rank4.cpp} | 0 ...IInst_int64_t_double_LayoutLeft_Rank5.cpp} | 0 ...IInst_int64_t_double_LayoutLeft_Rank8.cpp} | 0 ...Inst_int64_t_double_LayoutRight_Rank1.cpp} | 0 ...Inst_int64_t_double_LayoutRight_Rank2.cpp} | 0 ...Inst_int64_t_double_LayoutRight_Rank3.cpp} | 0 ...Inst_int64_t_double_LayoutRight_Rank4.cpp} | 0 ...Inst_int64_t_double_LayoutRight_Rank5.cpp} | 0 ...Inst_int64_t_double_LayoutRight_Rank8.cpp} | 0 ...nst_int64_t_double_LayoutStride_Rank1.cpp} | 0 ...nst_int64_t_double_LayoutStride_Rank2.cpp} | 0 ...nst_int64_t_double_LayoutStride_Rank3.cpp} | 0 ...nst_int64_t_double_LayoutStride_Rank4.cpp} | 0 ...nst_int64_t_double_LayoutStride_Rank5.cpp} | 0 ...nst_int64_t_double_LayoutStride_Rank8.cpp} | 0 ...TIInst_int64_t_float_LayoutLeft_Rank1.cpp} | 0 ...TIInst_int64_t_float_LayoutLeft_Rank2.cpp} | 0 ...TIInst_int64_t_float_LayoutLeft_Rank3.cpp} | 0 ...TIInst_int64_t_float_LayoutLeft_Rank4.cpp} | 0 ...TIInst_int64_t_float_LayoutLeft_Rank5.cpp} | 0 ...TIInst_int64_t_float_LayoutLeft_Rank8.cpp} | 0 ...IInst_int64_t_float_LayoutRight_Rank1.cpp} | 0 ...IInst_int64_t_float_LayoutRight_Rank2.cpp} | 0 ...IInst_int64_t_float_LayoutRight_Rank3.cpp} | 0 ...IInst_int64_t_float_LayoutRight_Rank4.cpp} | 0 ...IInst_int64_t_float_LayoutRight_Rank5.cpp} | 0 ...IInst_int64_t_float_LayoutRight_Rank8.cpp} | 0 ...Inst_int64_t_float_LayoutStride_Rank1.cpp} | 0 ...Inst_int64_t_float_LayoutStride_Rank2.cpp} | 0 ...Inst_int64_t_float_LayoutStride_Rank3.cpp} | 0 ...Inst_int64_t_float_LayoutStride_Rank4.cpp} | 0 ...Inst_int64_t_float_LayoutStride_Rank5.cpp} | 0 ...Inst_int64_t_float_LayoutStride_Rank8.cpp} | 0 ...Inst_int64_t_int64_t_LayoutLeft_Rank1.cpp} | 0 ...Inst_int64_t_int64_t_LayoutLeft_Rank2.cpp} | 0 ...Inst_int64_t_int64_t_LayoutLeft_Rank3.cpp} | 0 ...Inst_int64_t_int64_t_LayoutLeft_Rank4.cpp} | 0 ...Inst_int64_t_int64_t_LayoutLeft_Rank5.cpp} | 0 ...Inst_int64_t_int64_t_LayoutLeft_Rank8.cpp} | 0 ...nst_int64_t_int64_t_LayoutRight_Rank1.cpp} | 0 ...nst_int64_t_int64_t_LayoutRight_Rank2.cpp} | 0 ...nst_int64_t_int64_t_LayoutRight_Rank3.cpp} | 0 ...nst_int64_t_int64_t_LayoutRight_Rank4.cpp} | 0 ...nst_int64_t_int64_t_LayoutRight_Rank5.cpp} | 0 ...nst_int64_t_int64_t_LayoutRight_Rank8.cpp} | 0 ...st_int64_t_int64_t_LayoutStride_Rank1.cpp} | 0 ...st_int64_t_int64_t_LayoutStride_Rank2.cpp} | 0 ...st_int64_t_int64_t_LayoutStride_Rank3.cpp} | 0 ...st_int64_t_int64_t_LayoutStride_Rank4.cpp} | 0 ...st_int64_t_int64_t_LayoutStride_Rank5.cpp} | 0 ...st_int64_t_int64_t_LayoutStride_Rank8.cpp} | 0 ...yETIInst_int64_t_int_LayoutLeft_Rank1.cpp} | 0 ...yETIInst_int64_t_int_LayoutLeft_Rank2.cpp} | 0 ...yETIInst_int64_t_int_LayoutLeft_Rank3.cpp} | 0 ...yETIInst_int64_t_int_LayoutLeft_Rank4.cpp} | 0 ...yETIInst_int64_t_int_LayoutLeft_Rank5.cpp} | 0 ...yETIInst_int64_t_int_LayoutLeft_Rank8.cpp} | 0 ...ETIInst_int64_t_int_LayoutRight_Rank1.cpp} | 0 ...ETIInst_int64_t_int_LayoutRight_Rank2.cpp} | 0 ...ETIInst_int64_t_int_LayoutRight_Rank3.cpp} | 0 ...ETIInst_int64_t_int_LayoutRight_Rank4.cpp} | 0 ...ETIInst_int64_t_int_LayoutRight_Rank5.cpp} | 0 ...ETIInst_int64_t_int_LayoutRight_Rank8.cpp} | 0 ...TIInst_int64_t_int_LayoutStride_Rank1.cpp} | 0 ...TIInst_int64_t_int_LayoutStride_Rank2.cpp} | 0 ...TIInst_int64_t_int_LayoutStride_Rank3.cpp} | 0 ...TIInst_int64_t_int_LayoutStride_Rank4.cpp} | 0 ...TIInst_int64_t_int_LayoutStride_Rank5.cpp} | 0 ...TIInst_int64_t_int_LayoutStride_Rank8.cpp} | 0 ...pyETIInst_int_double_LayoutLeft_Rank1.cpp} | 0 ...pyETIInst_int_double_LayoutLeft_Rank2.cpp} | 0 ...pyETIInst_int_double_LayoutLeft_Rank3.cpp} | 0 ...pyETIInst_int_double_LayoutLeft_Rank4.cpp} | 0 ...pyETIInst_int_double_LayoutLeft_Rank5.cpp} | 0 ...pyETIInst_int_double_LayoutLeft_Rank8.cpp} | 0 ...yETIInst_int_double_LayoutRight_Rank1.cpp} | 0 ...yETIInst_int_double_LayoutRight_Rank2.cpp} | 0 ...yETIInst_int_double_LayoutRight_Rank3.cpp} | 0 ...yETIInst_int_double_LayoutRight_Rank4.cpp} | 0 ...yETIInst_int_double_LayoutRight_Rank5.cpp} | 0 ...yETIInst_int_double_LayoutRight_Rank8.cpp} | 0 ...ETIInst_int_double_LayoutStride_Rank1.cpp} | 0 ...ETIInst_int_double_LayoutStride_Rank2.cpp} | 0 ...ETIInst_int_double_LayoutStride_Rank3.cpp} | 0 ...ETIInst_int_double_LayoutStride_Rank4.cpp} | 0 ...ETIInst_int_double_LayoutStride_Rank5.cpp} | 0 ...ETIInst_int_double_LayoutStride_Rank8.cpp} | 0 ...opyETIInst_int_float_LayoutLeft_Rank1.cpp} | 0 ...opyETIInst_int_float_LayoutLeft_Rank2.cpp} | 0 ...opyETIInst_int_float_LayoutLeft_Rank3.cpp} | 0 ...opyETIInst_int_float_LayoutLeft_Rank4.cpp} | 0 ...opyETIInst_int_float_LayoutLeft_Rank5.cpp} | 0 ...opyETIInst_int_float_LayoutLeft_Rank8.cpp} | 0 ...pyETIInst_int_float_LayoutRight_Rank1.cpp} | 0 ...pyETIInst_int_float_LayoutRight_Rank2.cpp} | 0 ...pyETIInst_int_float_LayoutRight_Rank3.cpp} | 0 ...pyETIInst_int_float_LayoutRight_Rank4.cpp} | 0 ...pyETIInst_int_float_LayoutRight_Rank5.cpp} | 0 ...pyETIInst_int_float_LayoutRight_Rank8.cpp} | 0 ...yETIInst_int_float_LayoutStride_Rank1.cpp} | 0 ...yETIInst_int_float_LayoutStride_Rank2.cpp} | 0 ...yETIInst_int_float_LayoutStride_Rank3.cpp} | 0 ...yETIInst_int_float_LayoutStride_Rank4.cpp} | 0 ...yETIInst_int_float_LayoutStride_Rank5.cpp} | 0 ...yETIInst_int_float_LayoutStride_Rank8.cpp} | 0 ...yETIInst_int_int64_t_LayoutLeft_Rank1.cpp} | 0 ...yETIInst_int_int64_t_LayoutLeft_Rank2.cpp} | 0 ...yETIInst_int_int64_t_LayoutLeft_Rank3.cpp} | 0 ...yETIInst_int_int64_t_LayoutLeft_Rank4.cpp} | 0 ...yETIInst_int_int64_t_LayoutLeft_Rank5.cpp} | 0 ...yETIInst_int_int64_t_LayoutLeft_Rank8.cpp} | 0 ...ETIInst_int_int64_t_LayoutRight_Rank1.cpp} | 0 ...ETIInst_int_int64_t_LayoutRight_Rank2.cpp} | 0 ...ETIInst_int_int64_t_LayoutRight_Rank3.cpp} | 0 ...ETIInst_int_int64_t_LayoutRight_Rank4.cpp} | 0 ...ETIInst_int_int64_t_LayoutRight_Rank5.cpp} | 0 ...ETIInst_int_int64_t_LayoutRight_Rank8.cpp} | 0 ...TIInst_int_int64_t_LayoutStride_Rank1.cpp} | 0 ...TIInst_int_int64_t_LayoutStride_Rank2.cpp} | 0 ...TIInst_int_int64_t_LayoutStride_Rank3.cpp} | 0 ...TIInst_int_int64_t_LayoutStride_Rank4.cpp} | 0 ...TIInst_int_int64_t_LayoutStride_Rank5.cpp} | 0 ...TIInst_int_int64_t_LayoutStride_Rank8.cpp} | 0 ...wCopyETIInst_int_int_LayoutLeft_Rank1.cpp} | 0 ...wCopyETIInst_int_int_LayoutLeft_Rank2.cpp} | 0 ...wCopyETIInst_int_int_LayoutLeft_Rank3.cpp} | 0 ...wCopyETIInst_int_int_LayoutLeft_Rank4.cpp} | 0 ...wCopyETIInst_int_int_LayoutLeft_Rank5.cpp} | 0 ...wCopyETIInst_int_int_LayoutLeft_Rank8.cpp} | 0 ...CopyETIInst_int_int_LayoutRight_Rank1.cpp} | 0 ...CopyETIInst_int_int_LayoutRight_Rank2.cpp} | 0 ...CopyETIInst_int_int_LayoutRight_Rank3.cpp} | 0 ...CopyETIInst_int_int_LayoutRight_Rank4.cpp} | 0 ...CopyETIInst_int_int_LayoutRight_Rank5.cpp} | 0 ...CopyETIInst_int_int_LayoutRight_Rank8.cpp} | 0 ...opyETIInst_int_int_LayoutStride_Rank1.cpp} | 0 ...opyETIInst_int_int_LayoutStride_Rank2.cpp} | 0 ...opyETIInst_int_int_LayoutStride_Rank3.cpp} | 0 ...opyETIInst_int_int_LayoutStride_Rank4.cpp} | 0 ...opyETIInst_int_int_LayoutStride_Rank5.cpp} | 0 ...opyETIInst_int_int_LayoutStride_Rank8.cpp} | 0 .../eti/ROCm/Makefile.eti_Experimental::ROCm | 288 --- .../core/src/eti/ROCm/Makefile.eti_ROCm | 288 +++ .../Kokkos_Atomic_Compare_Exchange_Strong.hpp | 13 +- .../core/src/impl/Kokkos_Atomic_Exchange.hpp | 13 +- .../core/src/impl/Kokkos_Atomic_Fetch_Add.hpp | 14 +- .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 13 +- .../core/src/impl/Kokkos_Atomic_Generic.hpp | 26 +- .../core/src/impl/Kokkos_Atomic_Windows.hpp | 12 +- packages/kokkos/core/src/impl/Kokkos_Core.cpp | 16 +- .../kokkos/core/src/impl/Kokkos_HBWSpace.cpp | 2 + .../kokkos/core/src/impl/Kokkos_HostSpace.cpp | 2 + .../core/src/impl/Kokkos_HostThreadTeam.hpp | 33 +- .../kokkos/core/src/impl/Kokkos_OldMacros.hpp | 15 +- .../core/src/impl/Kokkos_Serial_Task.cpp | 20 +- .../kokkos/core/src/impl/Kokkos_ViewArray.hpp | 114 +- .../core/src/impl/Kokkos_ViewLayoutTiled.hpp | 945 ++++++++ .../core/src/impl/Kokkos_ViewMapping.hpp | 303 ++- .../kokkos/core/src/impl/Kokkos_ViewTile.hpp | 4 +- .../kokkos/core/src/impl/Kokkos_hwloc.cpp | 26 +- packages/kokkos/core/src/kokkos.pc.in | 71 + packages/kokkos/core/unit_test/CMakeLists.txt | 2 + packages/kokkos/core/unit_test/Makefile | 52 +- packages/kokkos/core/unit_test/TestAtomic.hpp | 11 +- packages/kokkos/core/unit_test/TestCXX11.hpp | 16 +- .../kokkos/core/unit_test/TestComplex.hpp | 2 +- .../kokkos/core/unit_test/TestMDRange.hpp | 212 ++ .../kokkos/core/unit_test/TestMDRange_a.hpp | 4 + .../kokkos/core/unit_test/TestMDRange_b.hpp | 3 +- .../kokkos/core/unit_test/TestMDRange_c.hpp | 4 +- .../kokkos/core/unit_test/TestMDRange_d.hpp | 4 + .../kokkos/core/unit_test/TestMDRange_e.hpp | 4 + .../kokkos/core/unit_test/TestMemoryPool.hpp | 2 + packages/kokkos/core/unit_test/TestReduce.hpp | 3 + .../core/unit_test/TestReduceDeviceView.hpp | 131 ++ .../kokkos/core/unit_test/TestReducers.hpp | 14 +- packages/kokkos/core/unit_test/TestScan.hpp | 12 +- packages/kokkos/core/unit_test/TestTeam.hpp | 169 +- .../core/unit_test/TestTeamTeamSize.hpp | 146 ++ .../kokkos/core/unit_test/TestTeamVector.hpp | 68 +- .../kokkos/core/unit_test/TestViewAPI_a.hpp | 2 - .../kokkos/core/unit_test/TestViewAPI_b.hpp | 2 - .../kokkos/core/unit_test/TestViewAPI_c.hpp | 2 - .../kokkos/core/unit_test/TestViewAPI_d.hpp | 2 - .../kokkos/core/unit_test/TestViewCopy.hpp | 155 ++ .../TestViewLayoutStrideAssignment.hpp | 740 +++++++ .../core/unit_test/TestViewLayoutTiled.hpp | 1215 +++++++++++ .../cuda/TestCudaHostPinned_ViewCopy.cpp | 45 + .../unit_test/cuda/TestCudaUVM_ViewCopy.cpp | 45 + .../core/unit_test/cuda/TestCuda_Other.cpp | 1 + .../cuda/TestCuda_Reductions_DeviceView.cpp | 45 + .../core/unit_test/cuda/TestCuda_Team.cpp | 16 + .../unit_test/cuda/TestCuda_TeamScratch.cpp | 4 + .../unit_test/cuda/TestCuda_TeamTeamSize.cpp | 45 + .../TestCuda_ViewLayoutStrideAssignment.cpp | 46 + .../unit_test/openmp/TestOpenMP_Other.cpp | 1 + .../TestOpenMP_Reductions_DeviceView.cpp | 45 + .../core/unit_test/openmp/TestOpenMP_Team.cpp | 15 + .../openmp/TestOpenMP_TeamScratch.cpp | 3 + .../openmp/TestOpenMP_TeamTeamSize.cpp | 46 + .../TestOpenMP_ViewLayoutStrideAssignment.cpp | 46 + .../rocm/TestROCmHostPinned_ViewCopy.cpp | 45 + .../core/unit_test/rocm/TestROCm_Crs.cpp | 47 + .../rocm/TestROCm_MDRangeReduce_a.cpp | 54 + .../rocm/TestROCm_MDRangeReduce_b.cpp | 54 + .../rocm/TestROCm_MDRangeReduce_c.cpp | 54 + .../rocm/TestROCm_MDRangeReduce_d.cpp | 54 + .../rocm/TestROCm_MDRangeReduce_e.cpp | 54 + .../unit_test/rocm/TestROCm_SubView_c13.cpp | 54 + .../rocm/TestROCm_TeamReductionScan.cpp | 2 - .../unit_test/rocm/TestROCm_TeamScratch.cpp | 4 + .../unit_test/rocm/TestROCm_TeamTeamSize.cpp | 49 + .../unit_test/serial/TestSerial_Other.cpp | 1 + .../TestSerial_Reductions_DeviceView.cpp | 45 + .../core/unit_test/serial/TestSerial_Team.cpp | 15 + .../serial/TestSerial_TeamScratch.cpp | 4 + .../serial/TestSerial_TeamTeamSize.cpp | 45 + .../TestSerial_ViewLayoutStrideAssignment.cpp | 46 + .../kokkos/core/unit_test/standalone/Makefile | 55 + .../unit_test/standalone/UnitTestMainInit.cpp | 71 + .../unit_test/threads/TestThreads_Other.cpp | 1 + .../TestThreads_Reductions_DeviceView.cpp | 45 + .../unit_test/threads/TestThreads_Team.cpp | 15 + .../threads/TestThreads_TeamScratch.cpp | 4 + .../threads/TestThreads_TeamTeamSize.cpp | 47 + ...TestThreads_ViewLayoutStrideAssignment.cpp | 46 + packages/kokkos/doc/kokkos-promotion.txt | 24 +- .../01_thread_teams/thread_teams.cpp | 19 +- .../nested_parallel_for.cpp | 3 +- .../kokkos/example/virtual_functions/Makefile | 55 + .../example/virtual_functions/classes.cpp | 26 + .../example/virtual_functions/classes.hpp | 39 + .../kokkos/example/virtual_functions/main.cpp | 36 + packages/kokkos/generate_makefile.bash | 23 +- packages/kokkos/master_history.txt | 1 + .../eti/generate_view_copy_cpp_files_write | 8 +- .../scripts/testing_scripts/test_all_sandia | 790 +++++++ ...=> blake_jenkins_run_script_pthread_intel} | 11 +- ... => blake_jenkins_run_script_serial_intel} | 11 +- .../white_run_jenkins_script_cuda | 6 +- .../white_run_jenkins_script_omp | 6 +- 317 files changed, 13238 insertions(+), 1978 deletions(-) create mode 100644 packages/kokkos/benchmarks/gups/Makefile create mode 100644 packages/kokkos/benchmarks/gups/gups-kokkos.cc create mode 100644 packages/kokkos/benchmarks/stream/Makefile create mode 100644 packages/kokkos/benchmarks/stream/stream-kokkos.cc create mode 100644 packages/kokkos/containers/src/Kokkos_OffsetView.hpp create mode 100644 packages/kokkos/containers/unit_tests/TestOffsetView.hpp create mode 100644 packages/kokkos/containers/unit_tests/cuda/TestCuda_OffsetView.cpp create mode 100644 packages/kokkos/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp create mode 100644 packages/kokkos/containers/unit_tests/serial/TestSerial_OffsetView.cpp create mode 100644 packages/kokkos/containers/unit_tests/threads/TestThreads_OffsetView.cpp rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp} (100%) rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp} (100%) delete mode 100644 packages/kokkos/core/src/eti/ROCm/Makefile.eti_Experimental::ROCm create mode 100644 packages/kokkos/core/src/eti/ROCm/Makefile.eti_ROCm create mode 100644 packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp create mode 100644 packages/kokkos/core/src/kokkos.pc.in create mode 100644 packages/kokkos/core/unit_test/TestReduceDeviceView.hpp create mode 100644 packages/kokkos/core/unit_test/TestTeamTeamSize.hpp create mode 100644 packages/kokkos/core/unit_test/TestViewCopy.hpp create mode 100644 packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp create mode 100644 packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp create mode 100644 packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy.cpp create mode 100644 packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy.cpp create mode 100644 packages/kokkos/core/unit_test/cuda/TestCuda_Reductions_DeviceView.cpp create mode 100644 packages/kokkos/core/unit_test/cuda/TestCuda_TeamTeamSize.cpp create mode 100644 packages/kokkos/core/unit_test/cuda/TestCuda_ViewLayoutStrideAssignment.cpp create mode 100644 packages/kokkos/core/unit_test/openmp/TestOpenMP_Reductions_DeviceView.cpp create mode 100644 packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamTeamSize.cpp create mode 100644 packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewLayoutStrideAssignment.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewCopy.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_Crs.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_a.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_b.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_c.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_d.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_e.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_SubView_c13.cpp create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_TeamTeamSize.cpp create mode 100644 packages/kokkos/core/unit_test/serial/TestSerial_Reductions_DeviceView.cpp create mode 100644 packages/kokkos/core/unit_test/serial/TestSerial_TeamTeamSize.cpp create mode 100644 packages/kokkos/core/unit_test/serial/TestSerial_ViewLayoutStrideAssignment.cpp create mode 100644 packages/kokkos/core/unit_test/standalone/Makefile create mode 100644 packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp create mode 100644 packages/kokkos/core/unit_test/threads/TestThreads_Reductions_DeviceView.cpp create mode 100644 packages/kokkos/core/unit_test/threads/TestThreads_TeamTeamSize.cpp create mode 100644 packages/kokkos/core/unit_test/threads/TestThreads_ViewLayoutStrideAssignment.cpp create mode 100644 packages/kokkos/example/virtual_functions/Makefile create mode 100644 packages/kokkos/example/virtual_functions/classes.cpp create mode 100644 packages/kokkos/example/virtual_functions/classes.hpp create mode 100644 packages/kokkos/example/virtual_functions/main.cpp create mode 100755 packages/kokkos/scripts/testing_scripts/test_all_sandia rename packages/kokkos/scripts/trilinos-integration/{shepard_jenkins_run_script_pthread_intel => blake_jenkins_run_script_pthread_intel} (82%) rename packages/kokkos/scripts/trilinos-integration/{shepard_jenkins_run_script_serial_intel => blake_jenkins_run_script_serial_intel} (82%) diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo index e20dc392b..ef0c50a60 100644 --- a/packages/kokkos/.gitrepo +++ b/packages/kokkos/.gitrepo @@ -6,6 +6,7 @@ [subrepo] remote = git@github.com:kokkos/kokkos.git branch = master - commit = d3a941925cbfb71785d8ea68259123ed52d3f9da - parent = e02f01f376b1594c9768e06f70b637965d594da9 - cmdver = 0.3.1 + commit = 9614f72c75aa2131d56900511e5eebae54a7bd8b + parent = 7fc65e3330cc86e88570067a4f99f6d794992ac1 + cmdver = 0.4.0 + method = merge diff --git a/packages/kokkos/.travis.yml b/packages/kokkos/.travis.yml index 2734954ad..bdeaf4762 100644 --- a/packages/kokkos/.travis.yml +++ b/packages/kokkos/.travis.yml @@ -6,18 +6,13 @@ os: - linux - osx -addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - cmake - - clang - compiler: - gcc - clang +cache: + - ccache + env: - THREADING="serial" - THREADING="openmp" @@ -25,22 +20,39 @@ env: # Apple GCC does not support OpenMP. GCC with OpenMP requires Homebrew. # Apple Clang does not support OpenMP. Clang with OpenMP requires Homebrew. -# Clang OpenMP support is not always available. matrix: exclude: - - compiler: clang - env: THREADING="openmp" - os: osx env: THREADING="openmp" - os: osx compiler: gcc +before_script: + - if [[ ${TRAVIS_OS_NAME} == "osx" ]]; then + brew update; + export HOMEBREW_NO_AUTO_UPDATE=1; + brew ls --versions ccache > /dev/null || brew install ccache; + export PATH=/usr/local/opt/ccache/libexec:$PATH; + fi + - ccache -z + script: - export OMP_NUM_THREADS=2 - export OMP_PLACES=threads - export OMP_PROC_BIND=spread + # LD_LIBRARY_PATH workaround to find clang's libomp: https://github.com/travis-ci/travis-ci/issues/8613 + - if [[ ${CC} = clang ]]; then export LD_LIBRARY_PATH=/usr/local/clang/lib${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH; fi + # enable ccache for clang on linux and add CCACHE_CPP2 to avoid 'Argument unused during compilation -I...' warning + - if [[ ${TRAVIS_OS_NAME} = linux && ${CC} = clang ]]; then + ln -s /usr/bin/ccache $HOME/bin/clang++; + export CCACHE_CPP2=yes; + GENERATE_OPTS="--gcc-toolchain=/usr"; + fi - mkdir build - cd build - - ../generate_makefile.bash --compiler=$CXX --with-$THREADING --with-options=compiler_warnings + - ../generate_makefile.bash --compiler=$CXX --with-$THREADING --with-options=compiler_warnings ${GENERATE_OPTS} - make - make test + +after_success: + - ccache -s diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index 145cc6270..5564096ea 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,68 @@ # Change Log +## [2.7.24](https://github.com/kokkos/kokkos/tree/2.7.24) (2018-11-04) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.00...2.7.24) + +**Implemented enhancements:** + +- DualView: Add non-templated functions for sync, need\_sync, view, modify [\#1858](https://github.com/kokkos/kokkos/issues/1858) +- DualView: Avoid needlessly allocates and initializes modify\_host and modify\_device flag views [\#1831](https://github.com/kokkos/kokkos/issues/1831) +- DualView: Incorrect deduction of "not device type" [\#1659](https://github.com/kokkos/kokkos/issues/1659) +- BuildSystem: Add KOKKOS\_ENABLE\_CXX14 and KOKKOS\_ENABLE\_CXX17 [\#1602](https://github.com/kokkos/kokkos/issues/1602) +- BuildSystem: Installed kokkos\_generated\_settings.cmake contains build directories instead of install directories [\#1838](https://github.com/kokkos/kokkos/issues/1838) +- BuildSystem: KOKKOS\_ARCH: add ticks to printout of improper arch setting [\#1649](https://github.com/kokkos/kokkos/issues/1649) +- BuildSystem: Make core/src/Makefile for Cuda use needed nvcc\_wrapper [\#1296](https://github.com/kokkos/kokkos/issues/1296) +- Build: Support PGI as host compiler for NVCC [\#1828](https://github.com/kokkos/kokkos/issues/1828) +- Build: Many Warnings Fixed e.g.[\#1786](https://github.com/kokkos/kokkos/issues/1786) +- Capability: OffsetView with non-zero begin index [\#567](https://github.com/kokkos/kokkos/issues/567) +- Capability: Reductions into device side view [\#1788](https://github.com/kokkos/kokkos/issues/1788) +- Capability: Add max\_size to Kokkos::Array [\#1760](https://github.com/kokkos/kokkos/issues/1760) +- Capability: View Assignment: LayoutStride -\> LayoutLeft and LayoutStride -\> LayoutRight [\#1594](https://github.com/kokkos/kokkos/issues/1594) +- Capability: Atomic function allow implicit conversion of update argument [\#1571](https://github.com/kokkos/kokkos/issues/1571) +- Capability: Add team\_size\_max with tagged functors [\#663](https://github.com/kokkos/kokkos/issues/663) +- Capability: Fix allignment of views from Kokkos\_ScratchSpace should use different alignment [\#1700](https://github.com/kokkos/kokkos/issues/1700) +- Capabilitiy: create\_mirror\_view\_and\_copy for DynRankView [\#1651](https://github.com/kokkos/kokkos/issues/1651) +- Capability: DeepCopy HBWSpace / HostSpace [\#548](https://github.com/kokkos/kokkos/issues/548) +- ROCm: support team vector scan [\#1645](https://github.com/kokkos/kokkos/issues/1645) +- ROCm: Merge from rocm-hackathon2 [\#1636](https://github.com/kokkos/kokkos/issues/1636) +- ROCm: Add ParallelScanWithTotal [\#1611](https://github.com/kokkos/kokkos/issues/1611) +- ROCm: Implement MDRange in ROCm [\#1314](https://github.com/kokkos/kokkos/issues/1314) +- ROCm: Implement Reducers for Nested Parallelism Levels [\#963](https://github.com/kokkos/kokkos/issues/963) +- ROCm: Add asynchronous deep copy [\#959](https://github.com/kokkos/kokkos/issues/959) +- Tests: Memory pool test seems to allocate 8GB [\#1830](https://github.com/kokkos/kokkos/issues/1830) +- Tests: Add unit\_test for team\_broadcast [\#734](https://github.com/kokkos/kokkos/issues/734) + +**Fixed bugs:** + +- BuildSystem: Makefile.kokkos gets gcc-toolchain wrong if gcc is cached [\#1841](https://github.com/kokkos/kokkos/issues/1841) +- BuildSystem: kokkos\_generated\_settings.cmake placement is inconsistent [\#1771](https://github.com/kokkos/kokkos/issues/1771) +- BuildSystem: Invalid escape sequence \. in kokkos\_functions.cmake [\#1661](https://github.com/kokkos/kokkos/issues/1661) +- BuildSystem: Problem in Kokkos generated cmake file [\#1770](https://github.com/kokkos/kokkos/issues/1770) +- BuildSystem: invalid file names on windows [\#1671](https://github.com/kokkos/kokkos/issues/1671) +- Tests: reducers min/max\_loc test fails randomly due to multiple min values and thus multiple valid locations [\#1681](https://github.com/kokkos/kokkos/issues/1681) +- Tests: cuda.scatterview unit test causes "Bus error" when force\_uvm and enable\_lambda are enabled [\#1852](https://github.com/kokkos/kokkos/issues/1852) +- Tests: cuda.cxx11 unit test fails when force\_uvm and enable\_lambda are enabled [\#1850](https://github.com/kokkos/kokkos/issues/1850) +- Tests: threads.reduce\_device\_view\_range\_policy failing with Cuda/8.0.44 and RDC [\#1836](https://github.com/kokkos/kokkos/issues/1836) +- Build: compile error when compiling Kokkos with hwloc 2.0.1 \(on OSX 10.12.6, with g++ 7.2.0\) [\#1506](https://github.com/kokkos/kokkos/issues/1506) +- Build: dual\_view.view broken with UVM [\#1834](https://github.com/kokkos/kokkos/issues/1834) +- Build: White cuda/9.2 + gcc/7.2 warnings triggering errors [\#1833](https://github.com/kokkos/kokkos/issues/1833) +- Build: warning: enum constant in boolean context [\#1813](https://github.com/kokkos/kokkos/issues/1813) +- Capability: Fix overly conservative max\_team\_size thingy [\#1808](https://github.com/kokkos/kokkos/issues/1808) +- DynRankView: Ctors taking ViewAllocateWithoutInitializing broken [\#1783](https://github.com/kokkos/kokkos/issues/1783) +- Cuda: Apollo cuda.team\_broadcast test fail with clang-6.0 [\#1762](https://github.com/kokkos/kokkos/issues/1762) +- Cuda: Clang spurious test failure in impl\_view\_accessible [\#1753](https://github.com/kokkos/kokkos/issues/1753) +- Cuda: Kokkos::complex\<double\> atomic deadlocks with Clang 6 Cuda build with -O0 [\#1752](https://github.com/kokkos/kokkos/issues/1752) +- Cuda: LayoutStride Test fails for UVM as default memory space [\#1688](https://github.com/kokkos/kokkos/issues/1688) +- Cuda: Scan wrong values on Volta [\#1676](https://github.com/kokkos/kokkos/issues/1676) +- Cuda: Kokkos::deep\_copy error with CudaUVM and Kokkos::Serial spaces [\#1652](https://github.com/kokkos/kokkos/issues/1652) +- Cuda: cudaErrorInvalidConfiguration with debug build [\#1647](https://github.com/kokkos/kokkos/issues/1647) +- Cuda: parallel\_for with TeamPolicy::team\_size\_recommended with launch bounds not working -- reported by Daniel Holladay [\#1283](https://github.com/kokkos/kokkos/issues/1283) +- Cuda: Using KOKKOS\_CLASS\_LAMBDA in a class with Kokkos::Random\_XorShift64\_Pool member data [\#1696](https://github.com/kokkos/kokkos/issues/1696) +- Long Build Times on Darwin [\#1721](https://github.com/kokkos/kokkos/issues/1721) +- Capability: Typo in Kokkos\_Sort.hpp - BinOp3D - wrong comparison [\#1720](https://github.com/kokkos/kokkos/issues/1720) +- Buffer overflow in SharedAllocationRecord in Kokkos\_HostSpace.cpp [\#1673](https://github.com/kokkos/kokkos/issues/1673) +- Serial unit test failure [\#1632](https://github.com/kokkos/kokkos/issues/1632) + ## [2.7.00](https://github.com/kokkos/kokkos/tree/2.7.00) (2018-05-24) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.6.00...2.7.00) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index 9e5308f1c..236f523ae 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -11,7 +11,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) # Define Project Name if this is a standalone build IF(NOT DEFINED ${PROJECT_NAME}) - project(Kokkos CXX) + project(Kokkos CXX) ENDIF() # Basic initialization (Used in KOKKOS_SETTINGS) @@ -22,7 +22,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) set_kokkos_cxx_compiler() set_kokkos_cxx_standard() - + #------------ GET OPTIONS AND KOKKOS_SETTINGS -------------------------------- # Add Kokkos' modules to CMake's module path. set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") @@ -34,7 +34,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) #------------ GENERATE HEADER AND SOURCE FILES ------------------------------- execute_process( - COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings + COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} PREFIX=${CMAKE_INSTALL_PREFIX} generate_build_settings WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out RESULT_VARIABLE GEN_SETTINGS_RESULT @@ -45,6 +45,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) endif() include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos) + install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}") string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}") string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}") diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index 6fc70cc70..05f3cf781 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -15,12 +15,13 @@ KOKKOS_ARCH ?= "" KOKKOS_DEBUG ?= "no" # Options: hwloc,librt,experimental_memkind KOKKOS_USE_TPLS ?= "" -# Options: c++11,c++1z +# Options: c++11,c++14,c++1y,c++17,c++1z,c++2a KOKKOS_CXX_STANDARD ?= "c++11" # Options: aggressive_vectorization,disable_profiling,disable_deprecated_code,enable_large_mem_tests KOKKOS_OPTIONS ?= "" # Option for setting ETI path KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti +KOKKOS_CMAKE ?= "no" # Default settings specific options. # Options: force_uvm,use_ldg,rdc,enable_lambda @@ -35,7 +36,11 @@ kokkos_has_string=$(if $(findstring $2,$1),1,0) # Check for general settings. KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes) KOKKOS_INTERNAL_ENABLE_CXX11 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++11) +KOKKOS_INTERNAL_ENABLE_CXX14 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++14) +KOKKOS_INTERNAL_ENABLE_CXX1Y := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1y) +KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17) KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z) +KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a) # Check for external libraries. KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc) @@ -104,6 +109,18 @@ KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VE KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) +# Check Host Compiler if using NVCC through nvcc_wrapper +ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER := $(strip $(shell echo $(CXX) | grep nvcc_wrapper | wc -l)) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER), 1) + + KOKKOS_CXX_HOST_VERSION := $(strip $(shell $(CXX) $(CXXFLAGS) --host-version 2>&1)) + KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),PGI) + KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),Intel Corporation) + KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),clang) + endif +endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) KOKKOS_INTERNAL_COMPILER_CLANG = 1 endif @@ -196,18 +213,34 @@ endif # Set C++11 flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) KOKKOS_INTERNAL_CXX11_FLAG := --c++11 + KOKKOS_INTERNAL_CXX14_FLAG := --c++14 + #KOKKOS_INTERNAL_CXX17_FLAG := --c++17 else ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11 + #KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14 + KOKKOS_INTERNAL_CXX1Y_FLAG := -std=c++1y + #KOKKOS_INTERNAL_CXX17_FLAG := -std=c++17 + #KOKKOS_INTERNAL_CXX1Z_FLAG := -std=c++1Z + #KOKKOS_INTERNAL_CXX2A_FLAG := -std=c++2a else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11 + KOKKOS_INTERNAL_CXX14_FLAG := -hstd=c++14 + #KOKKOS_INTERNAL_CXX1Y_FLAG := -hstd=c++1y + #KOKKOS_INTERNAL_CXX17_FLAG := -hstd=c++17 + #KOKKOS_INTERNAL_CXX1Z_FLAG := -hstd=c++1z + #KOKKOS_INTERNAL_CXX2A_FLAG := -hstd=c++2a else ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) KOKKOS_INTERNAL_CXX11_FLAG := else KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11 + KOKKOS_INTERNAL_CXX14_FLAG := --std=c++14 + KOKKOS_INTERNAL_CXX1Y_FLAG := --std=c++1y + KOKKOS_INTERNAL_CXX17_FLAG := --std=c++17 KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z + KOKKOS_INTERNAL_CXX2A_FLAG := --std=c++2a endif endif endif @@ -330,7 +363,9 @@ endif #CPPFLAGS is now unused KOKKOS_CPPFLAGS = -KOKKOS_CXXFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH) +ifneq ($(KOKKOS_CMAKE), yes) + KOKKOS_CXXFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH) +endif KOKKOS_TPL_INCLUDE_DIRS = KOKKOS_TPL_LIBRARY_DIRS = KOKKOS_TPL_LIBRARY_NAMES = @@ -341,9 +376,11 @@ endif KOKKOS_LIBS = -ldl KOKKOS_TPL_LIBRARY_NAMES += dl -KOKKOS_LDFLAGS = -L$(shell pwd) -# CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command -KOKKOS_CXXLDFLAGS = -L$(shell pwd) +ifneq ($(KOKKOS_CMAKE), yes) + KOKKOS_LDFLAGS = -L$(shell pwd) + # CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command + KOKKOS_CXXLDFLAGS = -L$(shell pwd) +endif KOKKOS_LINK_FLAGS = KOKKOS_SRC = KOKKOS_HEADERS = @@ -371,10 +408,12 @@ tmp := $(call kokkos_append_header,"/* Execution Spaces */") ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA") + tmp := $(call kokkos_append_header,"\#define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)") endif ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM') + tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1') endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) @@ -432,11 +471,25 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11") endif - +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") +endif +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") +endif +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") +endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11") - tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX1Z") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") +endif +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) @@ -459,7 +512,9 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) ifneq ($(HWLOC_PATH),) - KOKKOS_CXXFLAGS += -I$(HWLOC_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + KOKKOS_CXXFLAGS += -I$(HWLOC_PATH)/include + endif KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include @@ -478,7 +533,9 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) ifneq ($(MEMKIND_PATH),) - KOKKOS_CXXFLAGS += -I$(MEMKIND_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + KOKKOS_CXXFLAGS += -I$(MEMKIND_PATH)/include + endif KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include @@ -971,7 +1028,9 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) ifneq ($(CUDA_PATH),) - KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include + endif KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include @@ -1026,7 +1085,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) ifneq ($(QTHREADS_PATH),) - KOKKOS_CXXFLAGS += -I$(QTHREADS_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + KOKKOS_CXXFLAGS += -I$(QTHREADS_PATH)/include + endif KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include diff --git a/packages/kokkos/README b/packages/kokkos/README index 31d134bf0..4b6d4170e 100644 --- a/packages/kokkos/README +++ b/packages/kokkos/README @@ -52,44 +52,47 @@ For specifics see the LICENSE file contained in the repository or distribution. * GCC 4.8.4 * GCC 4.9.3 * GCC 5.1.0 - * GCC 5.3.0 + * GCC 5.5.0 * GCC 6.1.0 + * GCC 7.2.0 + * GCC 7.3.0 + * GCC 8.1.0 * Intel 15.0.2 * Intel 16.0.1 - * Intel 17.1.043 + * Intel 17.0.1 * Intel 17.4.196 - * Intel 18.0.128 + * Intel 18.2.128 * Clang 3.6.1 * Clang 3.7.1 * Clang 3.8.1 * Clang 3.9.0 * Clang 4.0.0 - * Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44) - * Clang 6.0.0 for CUDA (CUDA Toolkit 9.1) - * PGI 17.10 - * NVCC 7.0 for CUDA (with gcc 4.8.4) + * Clang 6.0.0 for CUDA (CUDA Toolkit 9.0) + * Clang 7.0.0 for CUDA (CUDA Toolkit 9.1) + * PGI 18.7 * NVCC 7.5 for CUDA (with gcc 4.8.4) * NVCC 8.0.44 for CUDA (with gcc 5.3.0) * NVCC 9.1 for CUDA (with gcc 6.1.0) ### Primary tested compilers on Power 8 are: - * GCC 5.4.0 (OpenMP,Serial) - * IBM XL 13.1.6 (OpenMP, Serial) - * NVCC 8.0.44 for CUDA (with gcc 5.4.0) - * NVCC 9.0.103 for CUDA (with gcc 6.3.0 and XL 13.1.6) + * GCC 6.4.0 (OpenMP,Serial) + * GCC 7.2.0 (OpenMP,Serial) + * IBM XL 16.1.0 (OpenMP, Serial) + * NVCC 9.2.88 for CUDA (with gcc 7.2.0 and XL 16.1.0) ### Primary tested compilers on Intel KNL are: - * GCC 6.2.0 * Intel 16.4.258 (with gcc 4.7.2) * Intel 17.2.174 (with gcc 4.9.3) - * Intel 18.0.128 (with gcc 4.9.3) + * Intel 18.2.199 (with gcc 4.9.3) -### Primary tested compilers on ARM - * GCC 6.1.0 +### Primary tested compilers on ARM (Cavium ThunderX2) + * GCC 7.2.0 + * ARM/Clang 18.4.0 ### Other compilers working: * X86: - Cygwin 2.1.0 64bit with gcc 4.9.3 + - GCC 8.1.0 (not warning free) ### Known non-working combinations: * Power8: diff --git a/packages/kokkos/algorithms/src/Kokkos_Random.hpp b/packages/kokkos/algorithms/src/Kokkos_Random.hpp index 5f1d88bff..e14471a48 100644 --- a/packages/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/packages/kokkos/algorithms/src/Kokkos_Random.hpp @@ -697,6 +697,7 @@ namespace Kokkos { typedef Random_XorShift64<DeviceType> generator_type; typedef DeviceType device_type; + KOKKOS_INLINE_FUNCTION Random_XorShift64_Pool() { num_states_ = 0; } @@ -709,12 +710,14 @@ namespace Kokkos { #endif } + KOKKOS_INLINE_FUNCTION Random_XorShift64_Pool(const Random_XorShift64_Pool& src): locks_(src.locks_), state_(src.state_), num_states_(src.num_states_) {} + KOKKOS_INLINE_FUNCTION Random_XorShift64_Pool operator = (const Random_XorShift64_Pool& src) { locks_ = src.locks_; state_ = src.state_; @@ -958,6 +961,7 @@ namespace Kokkos { typedef DeviceType device_type; + KOKKOS_INLINE_FUNCTION Random_XorShift1024_Pool() { num_states_ = 0; } @@ -972,6 +976,7 @@ namespace Kokkos { #endif } + KOKKOS_INLINE_FUNCTION Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src): locks_(src.locks_), state_(src.state_), @@ -979,6 +984,7 @@ namespace Kokkos { num_states_(src.num_states_) {} + KOKKOS_INLINE_FUNCTION Random_XorShift1024_Pool operator = (const Random_XorShift1024_Pool& src) { locks_ = src.locks_; state_ = src.state_; diff --git a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp index c952b1e54..8bdd87672 100644 --- a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -246,8 +246,8 @@ public: { bin_count_atomic = Kokkos::View<int*, Space >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); bin_count_const = bin_count_atomic; - bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins()); - sort_order = offset_type("PermutationVector",range_end-range_begin); + bin_offsets = offset_type(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::bin_offsets"),bin_op.max_bins()); + sort_order = offset_type(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sort_order"),range_end-range_begin); } BinSort( const_key_view_type keys_ @@ -290,7 +290,7 @@ public: #ifdef KOKKOS_ENABLE_DEPRECATED_CODE scratch_view_type - sorted_values("Scratch", + sorted_values(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sorted_values"), len, values.extent(1), values.extent(2), @@ -301,7 +301,7 @@ public: values.extent(7)); #else scratch_view_type - sorted_values("Scratch", + sorted_values(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sorted_values"), values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, values.rank_dynamic > 1 ? values.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG , values.rank_dynamic > 2 ? values.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, @@ -483,7 +483,7 @@ struct BinOp3D { if (keys(i1,0)>keys(i2,0)) return true; else if (keys(i1,0)==keys(i2,0)) { if (keys(i1,1)>keys(i2,1)) return true; - else if (keys(i1,1)==keys(i2,2)) { + else if (keys(i1,1)==keys(i2,1)) { if (keys(i1,2)>keys(i2,2)) return true; } } diff --git a/packages/kokkos/benchmarks/gups/Makefile b/packages/kokkos/benchmarks/gups/Makefile new file mode 100644 index 000000000..717611166 --- /dev/null +++ b/packages/kokkos/benchmarks/gups/Makefile @@ -0,0 +1,41 @@ +#Set your Kokkos path to something appropriate +KOKKOS_PATH = ${HOME}/git/kokkos-github-repo +KOKKOS_DEVICES = "Cuda" +KOKKOS_ARCH = "Pascal60" +KOKKOS_CUDA_OPTIONS = enable_lambda +#KOKKOS_DEVICES = "OpenMP" +#KOKKOS_ARCH = "Power8" + +SRC = gups-kokkos.cc + +default: build + echo "Start Build" + +CXXFLAGS = -O3 +CXX = ${HOME}/git/kokkos-github-repo/bin/nvcc_wrapper +#CXX = g++ + +LINK = ${CXX} + +LINKFLAGS = +EXE = gups-kokkos + +DEPFLAGS = -M + +OBJ = $(SRC:.cc=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o $(EXE) + +# Compilation rules + +%.o:%.cc $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/packages/kokkos/benchmarks/gups/gups-kokkos.cc b/packages/kokkos/benchmarks/gups/gups-kokkos.cc new file mode 100644 index 000000000..4602adda7 --- /dev/null +++ b/packages/kokkos/benchmarks/gups/gups-kokkos.cc @@ -0,0 +1,199 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +//@HEADER +*/ + +#include "Kokkos_Core.hpp" +#include <cstdio> +#include <cstdlib> +#include <cmath> + +#include <sys/time.h> + +#define HLINE "-------------------------------------------------------------\n" + +#if defined(KOKKOS_ENABLE_CUDA) +typedef Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror GUPSHostArray; +typedef Kokkos::View<int64_t*, Kokkos::CudaSpace> GUPSDeviceArray; +#else +typedef Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror GUPSHostArray; +typedef Kokkos::View<int64_t*, Kokkos::HostSpace> GUPSDeviceArray; +#endif + +typedef int GUPSIndex; + +double now() { + struct timeval now; + gettimeofday(&now, NULL); + + return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6); +} + +void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices, const int64_t dataCount) { + for( GUPSIndex i = 0; i < indices.extent(0); ++i ) { + indices[i] = lrand48() % dataCount; + } + + Kokkos::deep_copy(dev_indices, indices); +} + +void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data, const int64_t datum, + const bool performAtomics) { + + if( performAtomics ) { + Kokkos::parallel_for("bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) { + Kokkos::atomic_fetch_xor( &data[indices[i]], datum ); + }); + } else { + Kokkos::parallel_for("bench-gups-non-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) { + data[indices[i]] ^= datum; + }); + } + + Kokkos::fence(); +} + +int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const int repeats, + const bool useAtomics) { + + printf("Reports fastest timing per kernel\n"); + printf("Creating Views...\n"); + + printf("Memory Sizes:\n"); + printf("- Elements: %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(dataCount), + 1.0e-6 * ((double) dataCount * (double) sizeof(int64_t))); + printf("- Indices: %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(indicesCount), + 1.0e-6 * ((double) indicesCount * (double) sizeof(int64_t))); + printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No") ); + printf("Benchmark kernels will be performed for %d iterations.\n", repeats); + + printf(HLINE); + + GUPSDeviceArray dev_indices("indices", indicesCount); + GUPSDeviceArray dev_data("data", dataCount); + int64_t datum = -1; + + GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices); + GUPSHostArray data = Kokkos::create_mirror_view(dev_data); + + double gupsTime = 0.0; + + printf("Initializing Views...\n"); + +#if defined(KOKKOS_HAVE_OPENMP) + Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount), +#else + Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount), +#endif + KOKKOS_LAMBDA(const int i) { + + data[i] = 10101010101; + }); + +#if defined(KOKKOS_HAVE_OPENMP) + Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount), +#else + Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount), +#endif + KOKKOS_LAMBDA(const int i) { + + indices[i] = 0; + }); + + Kokkos::deep_copy(dev_data, data); + Kokkos::deep_copy(dev_indices, indices); + double start; + + printf("Starting benchmarking...\n"); + + for( GUPSIndex k = 0; k < repeats; ++k ) { + randomize_indices(indices, dev_indices, data.extent(0)); + + start = now(); + run_gups(dev_indices, dev_data, datum, useAtomics); + gupsTime += now() - start; + } + + Kokkos::deep_copy(indices, dev_indices); + Kokkos::deep_copy(data, dev_data); + + printf(HLINE); + printf("GUP/s Random: %18.6f\n", + (1.0e-9 * ((double) repeats) * (double) dev_indices.extent(0)) / gupsTime); + printf(HLINE); + + return 0; +} + +int main(int argc, char* argv[]) { + + printf(HLINE); + printf("Kokkos GUPS Benchmark\n"); + printf(HLINE); + + srand48(1010101); + + Kokkos::initialize(argc, argv); + + int64_t indices = 8192; + int64_t data = 33554432; + int64_t repeats = 10; + bool useAtomics = false; + + for( int i = 1; i < argc; ++i ) { + if( strcmp( argv[i], "--indices" ) == 0 ) { + indices = std::atoll(argv[i+1]); + ++i; + } else if( strcmp( argv[i], "--data" ) == 0 ) { + data = std::atoll(argv[i+1]); + ++i; + } else if( strcmp( argv[i], "--repeats" ) == 0 ) { + repeats = std::atoll(argv[i+1]); + ++i; + } else if( strcmp( argv[i], "--atomics" ) == 0 ) { + useAtomics = true; + } + } + + const int rc = run_benchmark(indices, data, repeats, useAtomics); + + Kokkos::finalize(); + + return rc; +} diff --git a/packages/kokkos/benchmarks/stream/Makefile b/packages/kokkos/benchmarks/stream/Makefile new file mode 100644 index 000000000..04566b322 --- /dev/null +++ b/packages/kokkos/benchmarks/stream/Makefile @@ -0,0 +1,41 @@ +#Set your Kokkos path to something appropriate +KOKKOS_PATH = ${HOME}/git/kokkos-github-repo +#KOKKOS_DEVICES = "Cuda" +#KOKKOS_ARCH = "Pascal60" +#KOKKOS_CUDA_OPTIONS = enable_lambda +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "Power8" + +SRC = stream-kokkos.cc + +default: build + echo "Start Build" + +CXXFLAGS = -O3 +#CXX = ${HOME}/git/kokkos-github-repo/bin/nvcc_wrapper +CXX = g++ + +LINK = ${CXX} + +LINKFLAGS = +EXE = stream-kokkos + +DEPFLAGS = -M + +OBJ = $(SRC:.cc=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o $(EXE) + +# Compilation rules + +%.o:%.cc $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/packages/kokkos/benchmarks/stream/stream-kokkos.cc b/packages/kokkos/benchmarks/stream/stream-kokkos.cc new file mode 100644 index 000000000..370995432 --- /dev/null +++ b/packages/kokkos/benchmarks/stream/stream-kokkos.cc @@ -0,0 +1,265 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +//@HEADER +*/ + +#include "Kokkos_Core.hpp" +#include <cstdio> +#include <cstdlib> +#include <cmath> + +#include <sys/time.h> + +#define STREAM_ARRAY_SIZE 100000000 +#define STREAM_NTIMES 20 + +#define HLINE "-------------------------------------------------------------\n" + +#if defined(KOKKOS_ENABLE_CUDA) +typedef Kokkos::View<double*, Kokkos::CudaSpace>::HostMirror StreamHostArray; +typedef Kokkos::View<double*, Kokkos::CudaSpace> StreamDeviceArray; +#else +typedef Kokkos::View<double*, Kokkos::HostSpace>::HostMirror StreamHostArray; +typedef Kokkos::View<double*, Kokkos::HostSpace> StreamDeviceArray; +#endif + +typedef int StreamIndex; + +double now() { + struct timeval now; + gettimeofday(&now, NULL); + + return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6); +} + +void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c) { + + Kokkos::parallel_for("copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) { + c[i] = a[i]; + }); + + Kokkos::fence(); +} + +void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c, + const double scalar) { + + Kokkos::parallel_for("copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) { + b[i] = scalar * c[i]; + }); + + Kokkos::fence(); +} + +void perform_add(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c) { + Kokkos::parallel_for("add", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) { + c[i] = a[i] + b[i]; + }); + + Kokkos::fence(); +} + +void perform_triad(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c, + const double scalar) { + + Kokkos::parallel_for("triad", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) { + a[i] = b[i] + scalar * c[i]; + }); + + Kokkos::fence(); +} + +int perform_validation(StreamHostArray& a, StreamHostArray& b, StreamHostArray& c, + const StreamIndex arraySize, const double scalar) { + + double ai = 1.0; + double bi = 2.0; + double ci = 0.0; + + for( StreamIndex i = 0; i < arraySize; ++i ) { + ci = ai; + bi = scalar * ci; + ci = ai + bi; + ai = bi + scalar * ci; + }; + + double aError = 0.0; + double bError = 0.0; + double cError = 0.0; + + for( StreamIndex i = 0; i < arraySize; ++i ) { + aError = std::abs( a[i] - ai ); + bError = std::abs( b[i] - bi ); + cError = std::abs( c[i] - ci ); + } + + double aAvgError = aError / (double) arraySize; + double bAvgError = bError / (double) arraySize; + double cAvgError = cError / (double) arraySize; + + const double epsilon = 1.0e-13; + int errorCount = 0; + + if( std::abs( aAvgError / ai ) > epsilon ) { + fprintf(stderr, "Error: validation check on View a failed.\n"); + errorCount++; + } + + if( std::abs( bAvgError / bi ) > epsilon ) { + fprintf(stderr, "Error: validation check on View b failed.\n"); + errorCount++; + } + + if( std::abs( cAvgError / ci ) > epsilon ) { + fprintf(stderr, "Error: validation check on View c failed.\n"); + errorCount++; + } + + if( errorCount == 0 ) { + printf("All solutions checked and verified.\n"); + } + + return errorCount; +} + +int run_benchmark() { + + printf("Reports fastest timing per kernel\n"); + printf("Creating Views...\n"); + + printf("Memory Sizes:\n"); + printf("- Array Size: %" PRIu64 "\n", static_cast<uint64_t>(STREAM_ARRAY_SIZE)); + printf("- Per Array: %12.2f MB\n", 1.0e-6 * (double) STREAM_ARRAY_SIZE * (double) sizeof(double)); + printf("- Total: %12.2f MB\n", 3.0e-6 * (double) STREAM_ARRAY_SIZE * (double) sizeof(double)); + + printf("Benchmark kernels will be performed for %d iterations.\n", STREAM_NTIMES); + + printf(HLINE); + + StreamDeviceArray dev_a("a", STREAM_ARRAY_SIZE); + StreamDeviceArray dev_b("b", STREAM_ARRAY_SIZE); + StreamDeviceArray dev_c("c", STREAM_ARRAY_SIZE); + + StreamHostArray a = Kokkos::create_mirror_view(dev_a); + StreamHostArray b = Kokkos::create_mirror_view(dev_b); + StreamHostArray c = Kokkos::create_mirror_view(dev_c); + + const double scalar = 3.0; + + double copyTime = std::numeric_limits<double>::max(); + double scaleTime = std::numeric_limits<double>::max(); + double addTime = std::numeric_limits<double>::max(); + double triadTime = std::numeric_limits<double>::max(); + + printf("Initializing Views...\n"); + +#if defined(KOKKOS_HAVE_OPENMP) + Kokkos::parallel_for("init", Kokkos::RangePolicy<Kokkos::OpenMP>(0, STREAM_ARRAY_SIZE), +#else + Kokkos::parallel_for("init", Kokkos::RangePolicy<Kokkos::Serial>(0, STREAM_ARRAY_SIZE), +#endif + KOKKOS_LAMBDA(const int i) { + + a[i] = 1.0; + b[i] = 2.0; + c[i] = 0.0; + }); + + // Copy contents of a (from the host) to the dev_a (device) + Kokkos::deep_copy(dev_a, a); + Kokkos::deep_copy(dev_b, b); + Kokkos::deep_copy(dev_c, c); + + double start; + + printf("Starting benchmarking...\n"); + + for( StreamIndex k = 0; k < STREAM_NTIMES; ++k ) { + start = now(); + perform_copy(dev_a, dev_b, dev_c); + copyTime = std::min( copyTime, (now() - start) ); + + start = now(); + perform_scale(dev_a, dev_b, dev_c, scalar); + scaleTime = std::min( scaleTime, (now() - start) ); + + start = now(); + perform_add(dev_a, dev_b, dev_c); + addTime = std::min( addTime, (now() - start) ); + + start = now(); + perform_triad(dev_a, dev_b, dev_c, scalar); + triadTime = std::min( triadTime, (now() - start) ); + } + + Kokkos::deep_copy(a, dev_a); + Kokkos::deep_copy(b, dev_b); + Kokkos::deep_copy(c, dev_c); + + printf("Performing validation...\n"); + int rc = perform_validation(a, b, c, STREAM_ARRAY_SIZE, scalar); + + printf(HLINE); + + printf("Copy %11.2f MB/s\n", + ( 1.0e-06 * 2.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / copyTime ); + printf("Scale %11.2f MB/s\n", + ( 1.0e-06 * 2.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / scaleTime ); + printf("Add %11.2f MB/s\n", + ( 1.0e-06 * 3.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / addTime ); + printf("Triad %11.2f MB/s\n", + ( 1.0e-06 * 3.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / triadTime ); + + printf(HLINE); + + return rc; +} + +int main(int argc, char* argv[]) { + + printf(HLINE); + printf("Kokkos STREAM Benchmark\n"); + printf(HLINE); + + Kokkos::initialize(argc, argv); + const int rc = run_benchmark(); + Kokkos::finalize(); + + return rc; +} diff --git a/packages/kokkos/bin/hpcbind b/packages/kokkos/bin/hpcbind index 92f9f81ac..b185a9282 100755 --- a/packages/kokkos/bin/hpcbind +++ b/packages/kokkos/bin/hpcbind @@ -125,18 +125,20 @@ function show_help { echo " --openmp-ratio=N/D Ratio of the cpuset to use for OpenMP" echo " Default: 1" echo " --openmp-places=<Op> Op=threads|cores|sockets. Default: threads" - echo " --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES" - echo " --force-openmp-num-threads=N" + echo " --openmp-num-threads=N" echo " Override logic for selecting OMP_NUM_THREADS" - echo " --force-openmp-proc-bind=<OP>" + echo " --openmp-proc-bind=<OP>" echo " Override logic for selecting OMP_PROC_BIND" - echo " --no-openmp-nested Set OMP_NESTED to false" + echo " --openmp-nested Set OMP_NESTED to true" + echo " --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES" echo " --output-prefix=<P> Save the output to files of the form" echo " P.hpcbind.N, P.stdout.N and P.stderr.N where P is " echo " the prefix and N is the rank (no spaces)" echo " --output-mode=<Op> How console output should be handled." echo " Options are all, rank0, and none. Default: rank0" echo " --lstopo Show bindings in lstopo" + echo " --save-topology=<Xml> Save the topology to the given xml file" + echo " --load-topology=<Xml> Load a previously saved topology from an xml file" echo " -v|--verbose Print bindings and relevant environment variables" echo " -h|--help Show this message" echo "" @@ -189,7 +191,7 @@ HPCBIND_OPENMP_PLACES=${OMP_PLACES:-threads} declare -i HPCBIND_OPENMP_PROC_BIND=1 HPCBIND_OPENMP_FORCE_NUM_THREADS="" HPCBIND_OPENMP_FORCE_PROC_BIND="" -declare -i HPCBIND_OPENMP_NESTED=1 +declare -i HPCBIND_OPENMP_NESTED=0 declare -i HPCBIND_VERBOSE=0 declare -i HPCBIND_LSTOPO=0 @@ -197,6 +199,9 @@ declare -i HPCBIND_LSTOPO=0 HPCBIND_OUTPUT_PREFIX="" HPCBIND_OUTPUT_MODE="rank0" +HPCBIND_OUTPUT_TOPOLOGY="" +HPCBIND_INPUT_TOPOLOGY="" + declare -i HPCBIND_HAS_COMMAND=0 for i in "$@"; do @@ -276,10 +281,22 @@ for i in "$@"; do HPCBIND_OPENMP_NESTED=0 shift ;; + --openmp-nested) + HPCBIND_OPENMP_NESTED=1 + shift + ;; --output-prefix=*) HPCBIND_OUTPUT_PREFIX="${i#*=}" shift ;; + --save-topology=*) + HPCBIND_OUTPUT_TOPOLOGY="${i#*=}" + shift + ;; + --load-topology=*) + HPCBIND_INPUT_TOPOLOGY="${i#*=}" + shift + ;; --output-mode=*) HPCBIND_OUTPUT_MODE="${i#*=}" #convert to lower case @@ -327,24 +344,37 @@ elif [[ ${HPCBIND_QUEUE_RANK} -eq 0 ]]; then HPCBIND_TEE=1 fi +# Save the topology to the given xml file +if [[ "${HPCBIND_OUTPUT_TOPOLOGY}" != "" ]]; then + if [[ ${HPCBIND_QUEUE_RANK} -eq 0 ]]; then + lstopo-no-graphics "${HPCBIND_OUTPUT_TOPOLOGY}" + else + lstopo-no-graphics >/dev/null 2>&1 + fi +fi + +# Load the topology to the given xml file +if [[ "${HPCBIND_INPUT_TOPOLOGY}" != "" ]]; then + if [ -f ${HPCBIND_INPUT_TOPOLOGY} ]; then + export HWLOC_XMLFILE="${HPCBIND_INPUT_TOPOLOGY}" + export HWLOC_THISSYSTEM=1 + fi +fi if [[ "${HPCBIND_OUTPUT_PREFIX}" == "" ]]; then HPCBIND_LOG=/dev/null HPCBIND_ERR=/dev/null HPCBIND_OUT=/dev/null else - if [[ ${HPCBIND_QUEUE_SIZE} -gt 0 ]]; then - HPCBIND_STR_QUEUE_SIZE="${HPCBIND_QUEUE_SIZE}" - HPCBIND_STR_QUEUE_RANK=$(printf %0*d ${#HPCBIND_STR_QUEUE_SIZE} ${HPCBIND_QUEUE_RANK}) - - HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_STR_QUEUE_RANK}" - HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_STR_QUEUE_RANK}" - HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_STR_QUEUE_RANK}" - else - HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_QUEUE_RANK}" - HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_QUEUE_RANK}" - HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_QUEUE_RANK}" + if [[ ${HPCBIND_QUEUE_SIZE} -le 0 ]]; then + HPCBIND_QUEUE_SIZE=1 fi + HPCBIND_STR_QUEUE_SIZE="${HPCBIND_QUEUE_SIZE}" + HPCBIND_STR_QUEUE_RANK=$(printf %0*d ${#HPCBIND_STR_QUEUE_SIZE} ${HPCBIND_QUEUE_RANK}) + + HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_STR_QUEUE_RANK}" + HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_STR_QUEUE_RANK}" + HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_STR_QUEUE_RANK}" > ${HPCBIND_LOG} fi @@ -546,6 +576,8 @@ if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then hostname -s >> ${HPCBIND_LOG} echo "[HPCBIND]" >> ${HPCBIND_LOG} echo "${TMP_ENV}" | grep -E "^HPCBIND_" >> ${HPCBIND_LOG} + echo "[HWLOC]" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^HWLOC_" >> ${HPCBIND_LOG} echo "[CUDA]" >> ${HPCBIND_LOG} echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG} echo "[OPENMP]" >> ${HPCBIND_LOG} @@ -568,6 +600,8 @@ else hostname -s > >(tee -a ${HPCBIND_LOG}) echo "[HPCBIND]" > >(tee -a ${HPCBIND_LOG}) echo "${TMP_ENV}" | grep -E "^HPCBIND_" > >(tee -a ${HPCBIND_LOG}) + echo "[HWLOC]" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^HWLOC_" > >(tee -a ${HPCBIND_LOG}) echo "[CUDA]" > >(tee -a ${HPCBIND_LOG}) echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG}) echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG}) diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper index d339da4fc..f926ae024 100755 --- a/packages/kokkos/bin/nvcc_wrapper +++ b/packages/kokkos/bin/nvcc_wrapper @@ -74,6 +74,9 @@ dry_run=0 host_only=0 host_only_args="" +# Just run version on host compiler +get_host_version=0 + # Enable workaround for CUDA 6.5 for pragma ident replace_pragma_ident=0 @@ -93,6 +96,9 @@ depfile_separate=0 depfile_output_arg="" depfile_target_arg="" +# Option to remove duplicate libraries and object files +remove_duplicate_link_files=0 + #echo "Arguments: $# $@" while [ $# -gt 0 ] @@ -106,10 +112,18 @@ do --host-only) host_only=1 ;; + #get the host version only + --host-version) + get_host_version=1 + ;; #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros --replace-pragma-ident) replace_pragma_ident=1 ;; + #remove duplicate link files + --remove-duplicate-link-files) + remove_duplicate_link_files=1 + ;; #handle source files to be compiled as cuda files *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) cpp_files="$cpp_files $1" @@ -124,7 +138,12 @@ do fi ;; #Handle shared args (valid for both nvcc and the host compiler) - -D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) + -D*) + unescape_commas=`echo "$1" | sed -e 's/\\\,/,/g'` + arg=`printf "%q" $unescape_commas` + shared_args="$shared_args $arg" + ;; + -I*|-L*|-l*|-g|--help|--version|-E|-M|-shared|-w) shared_args="$shared_args $1" ;; #Handle compilation argument @@ -152,7 +171,7 @@ do shift ;; #Handle known nvcc args - -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) + --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) cuda_args="$cuda_args $1" ;; #Handle more known nvcc args @@ -164,8 +183,11 @@ do cuda_args="$cuda_args $1 $2" shift ;; + -rdc=*|-maxrregcount*|--maxrregcount*) + cuda_args="$cuda_args $1" + ;; #Handle c++11 - --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z) + --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1y|-std=c++1y|--std=c++17|-std=c++17|--std=c++1z|-std=c++1z) if [ $stdcxx_applied -eq 1 ]; then echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting" else @@ -205,6 +227,15 @@ do fi shift ;; + #Handle -+ (same as -x c++, specifically used for xl compilers, but mutually exclusive with -x. So replace it with -x c++) + -+) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="-x,c++" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,-x,c++" + fi + ;; #Handle -ccbin (if its not set we can set it to a default value) -ccbin) cuda_args="$cuda_args $1 $2" @@ -212,18 +243,39 @@ do host_compiler=$2 shift ;; - #Handle -arch argument (if its not set use a default - -arch*) + + #Handle -arch argument (if its not set use a default) this is the version with = sign + -arch*|-gencode*) cuda_args="$cuda_args $1" arch_set=1 ;; + #Handle -code argument (if its not set use a default) this is the version with = sign + -code*) + cuda_args="$cuda_args $1" + ;; + #Handle -arch argument (if its not set use a default) this is the version without = sign + -arch|-gencode) + cuda_args="$cuda_args $1 $2" + arch_set=1 + shift + ;; + #Handle -code argument (if its not set use a default) this is the version without = sign + -code) + cuda_args="$cuda_args $1 $2" + shift + ;; #Handle -Xcudafe argument -Xcudafe) cuda_args="$cuda_args -Xcudafe $2" shift ;; + #Handle -Xlinker argument + -Xlinker) + xlinker_args="$xlinker_args -Xlinker $2" + shift + ;; #Handle args that should be sent to the linker - -Wl*) + -Wl,*) xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" host_linker_args="$host_linker_args ${1:4:${#1}}" ;; @@ -256,6 +308,44 @@ do shift done +# Only print host compiler version +if [ $get_host_version -eq 1 ]; then + $host_compiler --version + exit +fi + +#Remove duplicate object files +if [ $remove_duplicate_link_files -eq 1 ]; then +for obj in $object_files +do + object_files_reverse="$obj $object_files_reverse" +done + +object_files_reverse_clean="" +for obj in $object_files_reverse +do + exists=false + for obj2 in $object_files_reverse_clean + do + if [ "$obj" == "$obj2" ] + then + exists=true + echo "Exists: $obj" + fi + done + if [ "$exists" == "false" ] + then + object_files_reverse_clean="$object_files_reverse_clean $obj" + fi +done + +object_files="" +for obj in $object_files_reverse_clean +do + object_files="$obj $object_files" +done +fi + #Add default host compiler if necessary if [ $ccbin_set -ne 1 ]; then cuda_args="$cuda_args -ccbin $host_compiler" @@ -328,10 +418,19 @@ fi #Run compilation command if [ $host_only -eq 1 ]; then + if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then + echo "$host_command" + fi $host_command elif [ -n "$nvcc_depfile_command" ]; then + if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then + echo "$nvcc_command && $nvcc_depfile_command" + fi $nvcc_command && $nvcc_depfile_command else + if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then + echo "$nvcc_command" + fi $nvcc_command fi error_code=$? diff --git a/packages/kokkos/cmake/kokkos_build.cmake b/packages/kokkos/cmake/kokkos_build.cmake index 94dd733ca..8178483d0 100644 --- a/packages/kokkos/cmake/kokkos_build.cmake +++ b/packages/kokkos/cmake/kokkos_build.cmake @@ -235,3 +235,7 @@ install(FILES # Install the export set for use with the install-tree INSTALL(EXPORT KokkosTargets DESTINATION "${INSTALL_CMAKE_DIR}") + +# build and install pkgconfig file +CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY) +INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) diff --git a/packages/kokkos/cmake/kokkos_functions.cmake b/packages/kokkos/cmake/kokkos_functions.cmake index c0c62ccb6..bc490115a 100644 --- a/packages/kokkos/cmake/kokkos_functions.cmake +++ b/packages/kokkos/cmake/kokkos_functions.cmake @@ -47,7 +47,7 @@ function(set_kokkos_cxx_compiler) OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+$" + string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$" INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) endif() diff --git a/packages/kokkos/cmake/kokkos_options.cmake b/packages/kokkos/cmake/kokkos_options.cmake index 80a091bb9..580d1d322 100644 --- a/packages/kokkos/cmake/kokkos_options.cmake +++ b/packages/kokkos/cmake/kokkos_options.cmake @@ -41,7 +41,6 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) string(TOUPPER ${opt} OPT ) IF(DEFINED Kokkos_ENABLE_${opt}) - MESSAGE("Kokkos_ENABLE_${opt} is defined!") IF(DEFINED KOKKOS_ENABLE_${OPT}) IF(NOT ("${KOKKOS_ENABLE_${OPT}}" STREQUAL "${Kokkos_ENABLE_${opt}}")) IF(DEFINED KOKKOS_ENABLE_${OPT}_INTERNAL) @@ -59,7 +58,6 @@ foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) ENDIF() ELSE() SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}}) - MESSAGE("set KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT!") ENDIF() ENDIF() endforeach() @@ -81,6 +79,7 @@ list(APPEND KOKKOS_ARCH_LIST ARMv80 # (HOST) ARMv8.0 Compatible CPU ARMv81 # (HOST) ARMv8.1 Compatible CPU ARMv8-ThunderX # (HOST) ARMv8 Cavium ThunderX CPU + ARMv8-TX2 # (HOST) ARMv8 Cavium ThunderX2 CPU WSM # (HOST) Intel Westmere CPU SNB # (HOST) Intel Sandy/Ivy Bridge CPUs HSW # (HOST) Intel Haswell CPUs @@ -123,11 +122,18 @@ list(APPEND KOKKOS_DEVICES_LIST # List of possible TPLs for Kokkos # From Makefile.kokkos: Options: hwloc,librt,experimental_memkind set(KOKKOS_USE_TPLS_LIST) +if(APPLE) +list(APPEND KOKKOS_USE_TPLS_LIST + HWLOC # hwloc + MEMKIND # experimental_memkind + ) +else() list(APPEND KOKKOS_USE_TPLS_LIST HWLOC # hwloc LIBRT # librt MEMKIND # experimental_memkind ) +endif() # Map of cmake variables to Makefile variables set(KOKKOS_INTERNAL_HWLOC hwloc) set(KOKKOS_INTERNAL_LIBRT librt) @@ -172,6 +178,7 @@ set(KOKKOS_INTERNAL_LAMBDA enable_lambda) set(tmpr "\n ") string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}") +set(KOKKOS_INTERNAL_ARCH_DOCSTR "${tmpr}${KOKKOS_INTERNAL_ARCH_DOCSTR}") # This would be useful, but we use Foo_ENABLE mechanisms #string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_DEVICES_DOCSTR "${KOKKOS_DEVICES_LIST}") #string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_USE_TPLS_DOCSTR "${KOKKOS_USE_TPLS_LIST}") @@ -269,7 +276,7 @@ set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_P set_kokkos_default_default(DEPRECATED_CODE ON) set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAULT} CACHE BOOL "Enable deprecated code.") -set_kokkos_default_default(EXPLICIT_INSTANTIATION ON) +set_kokkos_default_default(EXPLICIT_INSTANTIATION OFF) set(KOKKOS_ENABLE_EXPLICIT_INSTANTIATION ${KOKKOS_INTERNAL_ENABLE_EXPLICIT_INSTANTIATION_DEFAULT} CACHE BOOL "Enable explicit template instantiation.") #------------------------------------------------------------------------------- diff --git a/packages/kokkos/cmake/kokkos_settings.cmake b/packages/kokkos/cmake/kokkos_settings.cmake index 21c9d75a9..387ced6d5 100644 --- a/packages/kokkos/cmake/kokkos_settings.cmake +++ b/packages/kokkos/cmake/kokkos_settings.cmake @@ -15,16 +15,16 @@ # Ensure that KOKKOS_ARCH is in the ARCH_LIST if (KOKKOS_ARCH MATCHES ",") - message("-- Detected a comma in: KOKKOS_ARCH=${KOKKOS_ARCH}") + message("-- Detected a comma in: KOKKOS_ARCH=`${KOKKOS_ARCH}`") message("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow") message("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)") string(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}") - message("-- Commas were changed to semicolons, now KOKKOS_ARCH=${KOKKOS_ARCH}") + message("-- Commas were changed to semicolons, now KOKKOS_ARCH=`${KOKKOS_ARCH}`") endif() foreach(arch ${KOKKOS_ARCH}) list(FIND KOKKOS_ARCH_LIST ${arch} indx) if (indx EQUAL -1) - message(FATAL_ERROR "${arch} is not an accepted value for KOKKOS_ARCH." + message(FATAL_ERROR "`${arch}` is not an accepted value in KOKKOS_ARCH=`${KOKKOS_ARCH}`." " Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") endif () endforeach() @@ -130,7 +130,8 @@ string(REPLACE ";" ":" KOKKOS_INTERNAL_ADDTOPATH "${addpathl}") # Set the KOKKOS_SETTINGS String -- this is the primary communication with the # makefile configuration. See Makefile.kokkos -set(KOKKOS_SETTINGS KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}) +set(KOKKOS_SETTINGS KOKKOS_CMAKE=yes) +set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}) set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_PATH=${KOKKOS_PATH}) set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFIX}) @@ -158,7 +159,7 @@ if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "") set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS}) endif() if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=\"${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}\"") + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}") endif() if (CMAKE_CXX_STANDARD) diff --git a/packages/kokkos/config/test_all_sandia b/packages/kokkos/config/test_all_sandia index 15e6049af..d94c38cbc 100755 --- a/packages/kokkos/config/test_all_sandia +++ b/packages/kokkos/config/test_all_sandia @@ -241,17 +241,16 @@ elif [ "$MACHINE" = "white" ]; then BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>" - CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0" - CUDA_MODULE_LIST2="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.3.0,ibm/xl/13.1.6" + CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.4.0,ibm/xl/16.1.0" # Don't do pthread on white. GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "ibm/13.1.6 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "cuda/9.0.103 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) if [ -z "$ARCH_FLAG" ]; then @@ -362,7 +361,7 @@ elif [ "$MACHINE" = "apollo" ]; then "gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" - "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS" + "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread,OpenMP" clang++ $CUDA_WARNING_FLAGS" "cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else diff --git a/packages/kokkos/containers/src/Kokkos_DualView.hpp b/packages/kokkos/containers/src/Kokkos_DualView.hpp index 548e96d25..adba0c415 100644 --- a/packages/kokkos/containers/src/Kokkos_DualView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DualView.hpp @@ -96,6 +96,7 @@ template< class DataType , class Arg3Type = void> class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > { +template< class , class , class , class > friend class DualView ; public: //! \name Typedefs for device types and various Kokkos::View specializations. //@{ @@ -182,8 +183,20 @@ public: //! \name Counters to keep track of changes ("modified" flags) //@{ - View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device; - View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host; +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE +protected: + // modified_flags[0] -> host + // modified_flags[1] -> device + typedef View<unsigned int[2],LayoutLeft,Kokkos::HostSpace> t_modified_flags; + t_modified_flags modified_flags; + +public: +#else + typedef View<unsigned int[2],LayoutLeft,typename t_host::execution_space> t_modified_flags; + typedef View<unsigned int,LayoutLeft,typename t_host::execution_space> t_modified_flag; + t_modified_flags modified_flags; + t_modified_flag modified_host,modified_device; +#endif //@} //! \name Constructors @@ -194,10 +207,14 @@ public: /// Both device and host View objects are constructed using their /// default constructors. The "modified" flags are both initialized /// to "unmodified." - DualView () : - modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")), - modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host")) - {} +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE + DualView () = default; +#else + DualView ():modified_flags (t_modified_flags("DualView::modified_flags")) { + modified_host = t_modified_flag(modified_flags,0); + modified_device = t_modified_flag(modified_flags,1); + } +#endif /// \brief Constructor that allocates View objects on both host and device. /// @@ -219,17 +236,24 @@ public: const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) : d_view (label, n0, n1, n2, n3, n4, n5, n6, n7) , h_view (create_mirror_view (d_view)) // without UVM, host View mirrors - , modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")) - , modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host")) - {} + , modified_flags (t_modified_flags("DualView::modified_flags")) + { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + modified_host = t_modified_flag(modified_flags,0); + modified_device = t_modified_flag(modified_flags,1); +#endif + } //! Copy constructor (shallow copy) template<class SS, class LS, class DS, class MS> DualView (const DualView<SS,LS,DS,MS>& src) : d_view (src.d_view), h_view (src.h_view), - modified_device (src.modified_device), - modified_host (src.modified_host) + modified_flags (src.modified_flags) +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + , modified_host(src.modified_host) + , modified_device(src.modified_device) +#endif {} //! Subview constructor @@ -241,8 +265,11 @@ public: ) : d_view( Kokkos::subview( src.d_view , arg0 , args ... ) ) , h_view( Kokkos::subview( src.h_view , arg0 , args ... ) ) - , modified_device (src.modified_device) - , modified_host (src.modified_host) + , modified_flags (src.modified_flags) +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + , modified_host(src.modified_host) + , modified_device(src.modified_device) +#endif {} /// \brief Create DualView from existing device and host View objects. @@ -258,8 +285,7 @@ public: DualView (const t_dev& d_view_, const t_host& h_view_) : d_view (d_view_), h_view (h_view_), - modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")), - modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host")) + modified_flags (t_modified_flags("DualView::modified_flags")) { if ( int(d_view.rank) != int(h_view.rank) || d_view.extent(0) != h_view.extent(0) || @@ -281,6 +307,10 @@ public: d_view.span() != h_view.span() ) { Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views"); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + modified_host = t_modified_flag(modified_flags,0); + modified_device = t_modified_flag(modified_flags,1); +#endif } //@} @@ -316,6 +346,30 @@ public: t_dev, t_host>::type& view () const { + #ifndef KOKKOS_ENABLE_DEPRECATED_CODE + constexpr bool device_is_memspace = std::is_same<Device,typename Device::memory_space>::value; + constexpr bool device_is_execspace = std::is_same<Device,typename Device::execution_space>::value; + constexpr bool device_exec_is_t_dev_exec = std::is_same<typename Device::execution_space,typename t_dev::execution_space>::value; + constexpr bool device_mem_is_t_dev_mem = std::is_same<typename Device::memory_space,typename t_dev::memory_space>::value; + constexpr bool device_exec_is_t_host_exec = std::is_same<typename Device::execution_space,typename t_host::execution_space>::value; + constexpr bool device_mem_is_t_host_mem = std::is_same<typename Device::memory_space,typename t_host::memory_space>::value; + constexpr bool device_is_t_host_device = std::is_same<typename Device::execution_space,typename t_host::device_type>::value; + constexpr bool device_is_t_dev_device = std::is_same<typename Device::memory_space,typename t_host::device_type>::value; + + static_assert( + device_is_t_dev_device || device_is_t_host_device || + (device_is_memspace && (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) ) || + (device_is_execspace && (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) ) || + ( + (!device_is_execspace && !device_is_memspace) && ( + (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) + ) + ) + , + "Template parameter to .view() must exactly match one of the DualView's device types or one of the execution or memory spaces"); + #endif + return Impl::if_c< std::is_same< typename t_dev::memory_space, @@ -324,6 +378,72 @@ public: t_host >::select (d_view , h_view); } + KOKKOS_INLINE_FUNCTION + t_host view_host() const { + return h_view; + } + + KOKKOS_INLINE_FUNCTION + t_dev view_device() const { + return d_view; + } + + template<class Device> + static int get_device_side() { + constexpr bool device_is_memspace = std::is_same<Device,typename Device::memory_space>::value; + constexpr bool device_is_execspace = std::is_same<Device,typename Device::execution_space>::value; + constexpr bool device_exec_is_t_dev_exec = std::is_same<typename Device::execution_space,typename t_dev::execution_space>::value; + constexpr bool device_mem_is_t_dev_mem = std::is_same<typename Device::memory_space,typename t_dev::memory_space>::value; + constexpr bool device_exec_is_t_host_exec = std::is_same<typename Device::execution_space,typename t_host::execution_space>::value; + constexpr bool device_mem_is_t_host_mem = std::is_same<typename Device::memory_space,typename t_host::memory_space>::value; + constexpr bool device_is_t_host_device = std::is_same<typename Device::execution_space,typename t_host::device_type>::value; + constexpr bool device_is_t_dev_device = std::is_same<typename Device::memory_space,typename t_host::device_type>::value; + + #ifndef KOKKOS_ENABLE_DEPRECATED_CODE + static_assert( + device_is_t_dev_device || device_is_t_host_device || + (device_is_memspace && (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) ) || + (device_is_execspace && (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) ) || + ( + (!device_is_execspace && !device_is_memspace) && ( + (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) + ) + ) + , + "Template parameter to .sync() must exactly match one of the DualView's device types or one of the execution or memory spaces"); + #endif + + #ifndef KOKKOS_ENABLE_DEPRECATED_CODE + int dev = -1; + #else + int dev = 0; + #endif + if(device_is_t_dev_device) dev = 1; + else if(device_is_t_host_device) dev = 0; + else { + if(device_is_memspace) { + if(device_mem_is_t_dev_mem) dev = 1; + if(device_mem_is_t_host_mem) dev = 0; + if(device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; + } + if(device_is_execspace) { + if(device_exec_is_t_dev_exec) dev = 1; + if(device_exec_is_t_host_exec) dev = 0; + if(device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; + } + if(!device_is_execspace && !device_is_memspace) { + if(device_mem_is_t_dev_mem) dev = 1; + if(device_mem_is_t_host_mem) dev = 0; + if(device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; + if(device_exec_is_t_dev_exec) dev = 1; + if(device_exec_is_t_host_exec) dev = 0; + if(device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; + } + } + return dev; + } + /// \brief Update data on device or host only if data in the other /// space has been marked as modified. /// @@ -347,23 +467,20 @@ public: ( std::is_same< Device , int>::value) , int >::type& = 0) { - const unsigned int dev = - Impl::if_c< - std::is_same< - typename t_dev::memory_space, - typename Device::memory_space>::value , - unsigned int, - unsigned int>::select (1, 0); - - if (dev) { // if Device is the same as DualView's device type - if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + if(modified_flags.data()==NULL) return; + + int dev = get_device_side<Device>(); + + if (dev == 1) { // if Device is the same as DualView's device type + if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { deep_copy (d_view, h_view); - modified_host() = modified_device() = 0; + modified_flags(0) = modified_flags(1) = 0; } - } else { // hopefully Device is the same as DualView's host type - if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + } + if (dev == 0) { // hopefully Device is the same as DualView's host type + if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { deep_copy (h_view, d_view); - modified_host() = modified_device() = 0; + modified_flags(0) = modified_flags(1) = 0; } } if(std::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) { @@ -378,46 +495,71 @@ public: ( std::is_same< Device , int>::value) , int >::type& = 0 ) { - const unsigned int dev = - Impl::if_c< - std::is_same< - typename t_dev::memory_space, - typename Device::memory_space>::value, - unsigned int, - unsigned int>::select (1, 0); - if (dev) { // if Device is the same as DualView's device type - if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + if(modified_flags.data()==NULL) return; + + int dev = get_device_side<Device>(); + + if (dev == 1) { // if Device is the same as DualView's device type + if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); } - } else { // hopefully Device is the same as DualView's host type - if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + } + if (dev == 0){ // hopefully Device is the same as DualView's host type + if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); } } } + void sync_host() { + if( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) + Impl::throw_runtime_exception("Calling sync_host on a DualView with a const datatype."); + if(modified_flags.data()==NULL) return; + if(modified_flags(1) > modified_flags(0)) { + deep_copy (h_view, d_view); + modified_flags(1) = modified_flags(0) = 0; + } + } + + void sync_device() { + if( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) + Impl::throw_runtime_exception("Calling sync_device on a DualView with a const datatype."); + if(modified_flags.data()==NULL) return; + if(modified_flags(0) > modified_flags(1)) { + deep_copy (d_view, h_view); + modified_flags(1) = modified_flags(0) = 0; + } + } + template<class Device> bool need_sync() const { - const unsigned int dev = - Impl::if_c< - std::is_same< - typename t_dev::memory_space, - typename Device::memory_space>::value , - unsigned int, - unsigned int>::select (1, 0); - - if (dev) { // if Device is the same as DualView's device type - if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + if(modified_flags.data()==NULL) return false; + int dev = get_device_side<Device>(); + + if (dev == 1) { // if Device is the same as DualView's device type + if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { return true; } - } else { // hopefully Device is the same as DualView's host type - if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + } + if (dev == 0){ // hopefully Device is the same as DualView's host type + if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { return true; } } return false; } + + inline bool need_sync_host() const { + if(modified_flags.data()==NULL) return false; + return modified_flags(0)<modified_flags(1); + } + + inline bool need_sync_device() const { + if(modified_flags.data()==NULL) return false; + return modified_flags(1)<modified_flags(0); + } + /// \brief Mark data as modified on the given device \c Device. /// /// If \c Device is the same as this DualView's device type, then @@ -425,26 +567,22 @@ public: /// data as modified. template<class Device> void modify () { - const unsigned int dev = - Impl::if_c< - std::is_same< - typename t_dev::memory_space, - typename Device::memory_space>::value, - unsigned int, - unsigned int>::select (1, 0); - - if (dev) { // if Device is the same as DualView's device type + if(modified_flags.data()==NULL) return; + int dev = get_device_side<Device>(); + + if (dev == 1) { // if Device is the same as DualView's device type // Increment the device's modified count. - modified_device () = (modified_device () > modified_host () ? - modified_device () : modified_host ()) + 1; - } else { // hopefully Device is the same as DualView's host type + modified_flags(1) = (modified_flags(1) > modified_flags(0) ? + modified_flags(1) : modified_flags(0)) + 1; + } + if (dev == 0) { // hopefully Device is the same as DualView's host type // Increment the host's modified count. - modified_host () = (modified_device () > modified_host () ? - modified_device () : modified_host ()) + 1; + modified_flags(0) = (modified_flags(1) > modified_flags(0) ? + modified_flags(1) : modified_flags(0)) + 1; } #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK - if (modified_host() && modified_device()) { + if (modified_flags(0) && modified_flags(1)) { std::string msg = "Kokkos::DualView::modify ERROR: "; msg += "Concurrent modification of host and device views "; msg += "in DualView \""; @@ -455,6 +593,45 @@ public: #endif } + inline void modify_host() { + if(modified_flags.data()!=NULL) { + modified_flags(0) = (modified_flags(1) > modified_flags(0) ? + modified_flags(1) : modified_flags(0)) + 1; + #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + if (modified_flags(0) && modified_flags(1)) { + std::string msg = "Kokkos::DualView::modify_host ERROR: "; + msg += "Concurrent modification of host and device views "; + msg += "in DualView \""; + msg += d_view.label(); + msg += "\"\n"; + Kokkos::abort(msg.c_str()); + } + #endif + } + } + + inline void modify_device() { + if(modified_flags.data()!=NULL) { + modified_flags(1) = (modified_flags(1) > modified_flags(0) ? + modified_flags(1) : modified_flags(0)) + 1; + #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + if (modified_flags(0) && modified_flags(1)) { + std::string msg = "Kokkos::DualView::modify_device ERROR: "; + msg += "Concurrent modification of host and device views "; + msg += "in DualView \""; + msg += d_view.label(); + msg += "\"\n"; + Kokkos::abort(msg.c_str()); + } + #endif + } + } + + inline void clear_sync_state() { + if(modified_flags.data()!=NULL) + modified_flags(1) = modified_flags(0) = 0; + } + //@} //! \name Methods for reallocating or resizing the View objects. //@{ @@ -476,7 +653,10 @@ public: h_view = create_mirror_view( d_view ); /* Reset dirty flags */ - modified_device() = modified_host() = 0; + if(modified_flags.data()==NULL) { + modified_flags = t_modified_flags("DualView::modified_flags"); + } else + modified_flags(1) = modified_flags(0) = 0; } /// \brief Resize both views, copying old contents into new if necessary. @@ -491,13 +671,16 @@ public: const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ) { - if(modified_device() >= modified_host()) { + if(modified_flags.data()==NULL) { + modified_flags = t_modified_flags("DualView::modified_flags"); + } + if(modified_flags(1) >= modified_flags(0)) { /* Resize on Device */ ::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7); h_view = create_mirror_view( d_view ); /* Mark Device copy as modified */ - modified_device() = modified_device()+1; + modified_flags(1) = modified_flags(1)+1; } else { /* Realloc on Device */ @@ -525,7 +708,7 @@ public: d_view = create_mirror_view( typename t_dev::execution_space(), h_view ); /* Mark Host copy as modified */ - modified_host() = modified_host()+1; + modified_flags(0) = modified_flags(0)+1; } } @@ -649,7 +832,10 @@ void deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference const DualView<ST,SL,SD,SM>& src ) { - if (src.modified_device () >= src.modified_host ()) { + if(src.modified_flags.data()==NULL || dst.modified_flags.data()==NULL) { + return deep_copy(dst.d_view, src.d_view); + } + if (src.modified_flags(1) >= src.modified_flags(0)) { deep_copy (dst.d_view, src.d_view); dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> (); } else { @@ -666,7 +852,10 @@ deep_copy (const ExecutionSpace& exec , DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference const DualView<ST,SL,SD,SM>& src ) { - if (src.modified_device () >= src.modified_host ()) { + if(src.modified_flags.data()==NULL || dst.modified_flags.data()==NULL) { + return deep_copy(exec, dst.d_view, src.d_view); + } + if (src.modified_flags(1) >= src.modified_flags(0)) { deep_copy (exec, dst.d_view, src.d_view); dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> (); } else { diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp index b30009a99..8be2c49a3 100644 --- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -64,7 +64,7 @@ namespace Impl { template <typename Specialize> struct DynRankDimTraits { - enum : size_t{unspecified =KOKKOS_INVALID_INDEX}; + enum : size_t{unspecified = KOKKOS_INVALID_INDEX}; // Compute the rank of the view from the nonzero dimension arguments. KOKKOS_INLINE_FUNCTION @@ -384,8 +384,8 @@ public: // Removed dimension checks... typedef typename DstType::offset_type dst_offset_type ; - dst.m_map.m_offset = dst_offset_type(std::integral_constant<unsigned,0>() , src.layout() ); //Check this for integer input1 for padding, etc - dst.m_map.m_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_handle , src.m_track ); + dst.m_map.m_impl_offset = dst_offset_type(std::integral_constant<unsigned,0>() , src.layout() ); //Check this for integer input1 for padding, etc + dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_impl_handle , src.m_track ); dst.m_track.assign( src.m_track , DstTraits::is_managed ); dst.m_rank = src.Rank ; } @@ -565,10 +565,14 @@ public: //---------------------------------------- // Allow specializations to query their specialized map - +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE KOKKOS_INLINE_FUNCTION const Kokkos::Impl::ViewMapping< traits , void > & implementation_map() const { return m_map ; } +#endif + KOKKOS_INLINE_FUNCTION + const Kokkos::Impl::ViewMapping< traits , void > & + impl_map() const { return m_map ; } //---------------------------------------- @@ -624,7 +628,7 @@ public: reference_type operator()() const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) - return implementation_map().reference(); + return impl_map().reference(); //return m_map.reference(0,0,0,0,0,0,0); } @@ -647,7 +651,7 @@ public: typename std::enable_if< !std::is_same<typename drvtraits::value_type, typename drvtraits::scalar_array_type>::value && std::is_integral<iType>::value, reference_type>::type operator[](const iType & i0) const { -// auto map = implementation_map(); +// auto map = impl_map(); const size_t dim_scalar = m_map.dimension_scalar(); const size_t bytes = this->span() / dim_scalar; @@ -785,7 +789,7 @@ public: reference_type access() const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) - return implementation_map().reference(); + return impl_map().reference(); //return m_map.reference(0,0,0,0,0,0,0); } @@ -1004,7 +1008,7 @@ public: //---------------------------------------- // Allocation according to allocation properties and array layout - // unused arg_layout dimensions must be set toKOKKOS_INVALID_INDEX so that rank deduction can properly take place + // unused arg_layout dimensions must be set to KOKKOS_INVALID_INDEX so that rank deduction can properly take place template< class ... P > explicit inline DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop @@ -1179,7 +1183,7 @@ public: : DynRankView( Kokkos::Impl::ViewCtorProp< std::string >( arg_label ) , typename traits::array_layout ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) - ) + ) {} // For backward compatibility @@ -1189,8 +1193,7 @@ public: , const typename traits::array_layout & arg_layout ) : DynRankView( Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing ) - - , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) + , arg_layout ) {} @@ -1205,7 +1208,9 @@ public: , const size_t arg_N6 =KOKKOS_INVALID_INDEX , const size_t arg_N7 =KOKKOS_INVALID_INDEX ) - : DynRankView(Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 ) + : DynRankView(Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing ) + , typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7) + ) {} //---------------------------------------- @@ -1445,30 +1450,30 @@ public: ret_type dst ; const SubviewExtents< 7 , rank > extents = - ExtentGenerator< Args ... >::generator( src.m_map.m_offset.m_dim , args... ) ; + ExtentGenerator< Args ... >::generator( src.m_map.m_impl_offset.m_dim , args... ) ; - dst_offset_type tempdst( src.m_map.m_offset , extents ) ; + dst_offset_type tempdst( src.m_map.m_impl_offset , extents ) ; dst.m_track = src.m_track ; - dst.m_map.m_offset.m_dim.N0 = tempdst.m_dim.N0 ; - dst.m_map.m_offset.m_dim.N1 = tempdst.m_dim.N1 ; - dst.m_map.m_offset.m_dim.N2 = tempdst.m_dim.N2 ; - dst.m_map.m_offset.m_dim.N3 = tempdst.m_dim.N3 ; - dst.m_map.m_offset.m_dim.N4 = tempdst.m_dim.N4 ; - dst.m_map.m_offset.m_dim.N5 = tempdst.m_dim.N5 ; - dst.m_map.m_offset.m_dim.N6 = tempdst.m_dim.N6 ; - - dst.m_map.m_offset.m_stride.S0 = tempdst.m_stride.S0 ; - dst.m_map.m_offset.m_stride.S1 = tempdst.m_stride.S1 ; - dst.m_map.m_offset.m_stride.S2 = tempdst.m_stride.S2 ; - dst.m_map.m_offset.m_stride.S3 = tempdst.m_stride.S3 ; - dst.m_map.m_offset.m_stride.S4 = tempdst.m_stride.S4 ; - dst.m_map.m_offset.m_stride.S5 = tempdst.m_stride.S5 ; - dst.m_map.m_offset.m_stride.S6 = tempdst.m_stride.S6 ; - - dst.m_map.m_handle = dst_handle_type( src.m_map.m_handle + - src.m_map.m_offset( extents.domain_offset(0) + dst.m_map.m_impl_offset.m_dim.N0 = tempdst.m_dim.N0 ; + dst.m_map.m_impl_offset.m_dim.N1 = tempdst.m_dim.N1 ; + dst.m_map.m_impl_offset.m_dim.N2 = tempdst.m_dim.N2 ; + dst.m_map.m_impl_offset.m_dim.N3 = tempdst.m_dim.N3 ; + dst.m_map.m_impl_offset.m_dim.N4 = tempdst.m_dim.N4 ; + dst.m_map.m_impl_offset.m_dim.N5 = tempdst.m_dim.N5 ; + dst.m_map.m_impl_offset.m_dim.N6 = tempdst.m_dim.N6 ; + + dst.m_map.m_impl_offset.m_stride.S0 = tempdst.m_stride.S0 ; + dst.m_map.m_impl_offset.m_stride.S1 = tempdst.m_stride.S1 ; + dst.m_map.m_impl_offset.m_stride.S2 = tempdst.m_stride.S2 ; + dst.m_map.m_impl_offset.m_stride.S3 = tempdst.m_stride.S3 ; + dst.m_map.m_impl_offset.m_stride.S4 = tempdst.m_stride.S4 ; + dst.m_map.m_impl_offset.m_stride.S5 = tempdst.m_stride.S5 ; + dst.m_map.m_impl_offset.m_stride.S6 = tempdst.m_stride.S6 ; + + dst.m_map.m_impl_handle = dst_handle_type( src.m_map.m_impl_handle + + src.m_map.m_impl_offset( extents.domain_offset(0) , extents.domain_offset(1) , extents.domain_offset(2) , extents.domain_offset(3) @@ -1896,6 +1901,7 @@ inline typename DynRankView<T,P...>::HostMirror create_mirror( const DynRankView<T,P...> & src , typename std::enable_if< + std::is_same< typename ViewTraits<T,P...>::specialize , void >::value && ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout , Kokkos::LayoutStride >::value >::type * = 0 @@ -1914,6 +1920,7 @@ inline typename DynRankView<T,P...>::HostMirror create_mirror( const DynRankView<T,P...> & src , typename std::enable_if< + std::is_same< typename ViewTraits<T,P...>::specialize , void >::value && std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout , Kokkos::LayoutStride >::value >::type * = 0 @@ -1929,7 +1936,11 @@ create_mirror( const DynRankView<T,P...> & src // Create a mirror in a new space (specialization for different space) template<class Space, class T, class ... P> -typename Impl::MirrorDRVType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::DynRankView<T,P...> & src) { +typename Impl::MirrorDRVType<Space,T,P ...>::view_type +create_mirror(const Space& , const Kokkos::DynRankView<T,P...> & src + , typename std::enable_if< + std::is_same< typename ViewTraits<T,P...>::specialize , void >::value + >::type * = 0) { return typename Impl::MirrorDRVType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) ); } @@ -1985,6 +1996,29 @@ create_mirror_view(const Space& , const Kokkos::DynRankView<T,P...> & src return typename Impl::MirrorDRViewType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) ); } +// Create a mirror view and deep_copy in a new space (specialization for same space) +template<class Space, class T, class ... P> +typename Impl::MirrorDRViewType<Space,T,P ...>::view_type +create_mirror_view_and_copy(const Space& , const Kokkos::DynRankView<T,P...> & src + , std::string const& name = "" + , typename std::enable_if<Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + (void)name; + return src; +} + +// Create a mirror view and deep_copy in a new space (specialization for different space) +template<class Space, class T, class ... P> +typename Impl::MirrorDRViewType<Space,T,P ...>::view_type +create_mirror_view_and_copy(const Space& , const Kokkos::DynRankView<T,P...> & src + , std::string const& name = "" + , typename std::enable_if<!Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + using Mirror = typename Impl::MirrorDRViewType<Space,T,P ...>::view_type; + std::string label = name.empty() ? src.label() : name; + auto mirror = Mirror( Kokkos::ViewAllocateWithoutInitializing(label), Impl::reconstructLayout(src.layout(), src.rank()) ); + deep_copy(mirror, src); + return mirror; +} + } //end Kokkos diff --git a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp new file mode 100644 index 000000000..b614764ee --- /dev/null +++ b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp @@ -0,0 +1,1895 @@ +/* + * Kokkos_OffsetView.hpp + * + * Created on: Apr 23, 2018 + * Author: swbova + */ + +#ifndef KOKKOS_OFFSETVIEW_HPP_ +#define KOKKOS_OFFSETVIEW_HPP_ + + +#include <Kokkos_Core.hpp> + +#include <Kokkos_View.hpp> + +namespace Kokkos { + + namespace Experimental { + //---------------------------------------------------------------------------- + //---------------------------------------------------------------------------- + + template< class DataType , class ... Properties > + class OffsetView ; + + template< class > struct is_offset_view : public std::false_type {}; + + template< class D, class ... P > + struct is_offset_view< OffsetView<D,P...> > : public std::true_type {}; + + template< class D, class ... P > + struct is_offset_view< const OffsetView<D,P...> > : public std::true_type {}; + +#define KOKKOS_INVALID_OFFSET int64_t(0) +#define KOKKOS_INVALID_INDEX_RANGE {KOKKOS_INVALID_OFFSET, KOKKOS_INVALID_OFFSET} + + template <typename iType, typename std::enable_if< std::is_integral<iType>::value && + std::is_signed<iType>::value, iType >::type = 0> + using IndexRange = Kokkos::Array<iType, 2>; + + + using index_list_type = std::initializer_list<int64_t>; + + + // template <typename iType, + // typename std::enable_if< std::is_integral<iType>::value && + // std::is_signed<iType>::value, iType >::type = 0> using min_index_type = std::initializer_list<iType>; + + namespace Impl { + + template<class ViewType> + struct GetOffsetViewTypeFromViewType { + + typedef OffsetView<typename ViewType::data_type,typename ViewType::array_layout, + typename ViewType::device_type,typename ViewType::memory_traits> type; + + }; + + template< unsigned , class MapType, class BeginsType > + KOKKOS_INLINE_FUNCTION + bool offsetview_verify_operator_bounds( const MapType &, const BeginsType & ) + { return true ; } + + template< unsigned R , class MapType , class BeginsType, class iType , class ... Args > + KOKKOS_INLINE_FUNCTION + bool offsetview_verify_operator_bounds + ( const MapType & map + , const BeginsType & begins + , const iType & i + , Args ... args + ) + { + + const bool legalIndex = ( int64_t(i) >= begins[R] ) && + ( int64_t(i) <= int64_t(begins[R] + map.extent(R) - 1) ); + return legalIndex + && offsetview_verify_operator_bounds<R+1>( map , begins, args ... ); + } + template< unsigned , class MapType, class BeginsType > + inline + void offsetview_error_operator_bounds( char * , int , const MapType & , const BeginsType &) + {} + + template< unsigned R , class MapType , class BeginsType , class iType , class ... Args > + inline + void offsetview_error_operator_bounds + ( char * buf + , int len + , const MapType & map + , const BeginsType begins + , const iType & i + , Args ... args + ) + { + const int64_t b = begins[R]; + const int64_t e = b + map.extent(R) - 1; + const int n = + snprintf(buf,len," %ld <= %ld <= %ld %c" + , static_cast<unsigned long>(b) + , static_cast<unsigned long>(i) + , static_cast<unsigned long>(e) + , ( sizeof...(Args) ? ',' : ')' ) + ); + offsetview_error_operator_bounds<R+1>(buf+n,len-n,map,begins,args...); + } + + template< class MemorySpace , class MapType , class BeginsType, class ... Args > + KOKKOS_INLINE_FUNCTION + void offsetview_verify_operator_bounds + ( Kokkos::Impl::SharedAllocationTracker const & tracker + , const MapType & map , const BeginsType & begins, Args ... args ) + { + if ( ! offsetview_verify_operator_bounds<0>( map , begins, args ... ) ) { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + enum { LEN = 1024 }; + char buffer[ LEN ]; + const std::string label = tracker.template get_label<MemorySpace>(); + int n = snprintf(buffer,LEN,"OffsetView bounds error of view labeled %s (",label.c_str()); + offsetview_error_operator_bounds<0>( buffer + n , LEN - n , map ,begins, args ... ); + Kokkos::Impl::throw_runtime_exception(std::string(buffer)); +#else + /* Check #1: is there a SharedAllocationRecord? + (we won't use it, but if its not there then there isn't + a corresponding SharedAllocationHeader containing a label). + This check should cover the case of Views that don't + have the Unmanaged trait but were initialized by pointer. */ + if (tracker.has_record()) { + Kokkos::Impl::operator_bounds_error_on_device<MapType>( + map, Kokkos::Impl::has_printable_label_typedef<MapType>()); + } else { + Kokkos::abort("OffsetView bounds error"); + } +#endif + } + } + +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + KOKKOS_INLINE_FUNCTION + void runtime_check_rank_host(const size_t rank_dynamic, const size_t rank, + const index_list_type minIndices, const std::string & label) + { + bool isBad = false; + std::string message = "Kokkos::Experimental::OffsetView ERROR: for OffsetView labeled '" + label + "':"; + if (rank_dynamic != rank) { + message += "The full rank must be the same as the dynamic rank. full rank = "; + message += std::to_string(rank) + " dynamic rank = " + std::to_string(rank_dynamic) + "\n"; + isBad = true; + } + + size_t numOffsets = 0; + for(size_t i = 0; i < minIndices.size(); ++i ){ + if( minIndices.begin()[i] != -KOKKOS_INVALID_OFFSET) numOffsets++; + } + if (numOffsets != rank_dynamic) { + message += "The number of offsets provided ( " + std::to_string(numOffsets) + + " ) must equal the dynamic rank ( " + std::to_string(rank_dynamic) + " )."; + isBad = true; + } + + if(isBad) Kokkos::abort(message.c_str()); + } +#endif + + KOKKOS_INLINE_FUNCTION + void runtime_check_rank_device(const size_t rank_dynamic, const size_t rank, + const index_list_type minIndices) + { + if (rank_dynamic != rank) { + Kokkos::abort("The full rank of an OffsetView must be the same as the dynamic rank."); + } + size_t numOffsets = 0; + for(size_t i = 0; i < minIndices.size(); ++i ){ + if( minIndices.begin()[i] != -KOKKOS_INVALID_OFFSET) numOffsets++; + } + if (numOffsets != rank) { + Kokkos::abort("The number of offsets provided to an OffsetView constructor must equal the dynamic rank."); + } + + } + } + + template< class DataType , class ... Properties > + class OffsetView : public ViewTraits< DataType , Properties ... > { + public: + + typedef ViewTraits< DataType , Properties ... > traits ; + + + + private: + + template< class , class ... > friend class OffsetView ; + template< class , class ... > friend class View ; //FIXME delete this line + template< class , class ... > friend class Kokkos::Impl::ViewMapping ; + + + typedef Kokkos::Impl::ViewMapping< traits , void > map_type ; + typedef Kokkos::Impl::SharedAllocationTracker track_type ; + public: + enum { Rank = map_type::Rank }; + typedef Kokkos::Array<int64_t, Rank> begins_type ; + + + template <typename iType, typename std::enable_if< std::is_integral<iType>::value, iType>::type = 0> + KOKKOS_INLINE_FUNCTION + int64_t begin(const iType dimension) const { + return dimension < Rank ? m_begins[dimension] : 0; + } + + KOKKOS_INLINE_FUNCTION + begins_type begins() const { return m_begins;} + + template <typename iType, typename std::enable_if< std::is_integral<iType>::value, iType>::type = 0> + KOKKOS_INLINE_FUNCTION + int64_t end(const iType dimension) const {return begin(dimension) + m_map.extent(dimension);} + + + private: + track_type m_track ; + map_type m_map ; + begins_type m_begins; + + public: + //---------------------------------------- + /** \brief Compatible view of array of scalar types */ + typedef OffsetView< typename traits::scalar_array_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + array_type ; + + /** \brief Compatible view of const data type */ + typedef OffsetView< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + const_type ; + + /** \brief Compatible view of non-const data type */ + typedef OffsetView< typename traits::non_const_data_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + non_const_type ; + + /** \brief Compatible HostMirror view */ + typedef OffsetView< typename traits::non_const_data_type , + typename traits::array_layout , + typename traits::host_mirror_space > + HostMirror ; + + //---------------------------------------- + // Domain rank and extents + + /** \brief rank() to be implemented + */ + //KOKKOS_INLINE_FUNCTION + //static + //constexpr unsigned rank() { return map_type::Rank; } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , size_t >::type + extent( const iType & r ) const + { return m_map.extent(r); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , int >::type + extent_int( const iType & r ) const + { return static_cast<int>(m_map.extent(r)); } + + KOKKOS_INLINE_FUNCTION constexpr + typename traits::array_layout layout() const + { return m_map.layout(); } + + + KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.dimension_0() * + m_map.dimension_1() * + m_map.dimension_2() * + m_map.dimension_3() * + m_map.dimension_4() * + m_map.dimension_5() * + m_map.dimension_6() * + m_map.dimension_7(); } + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral<iType>::value , size_t >::type + stride(iType r) const { + return (r == 0 ? m_map.stride_0() : + (r == 1 ? m_map.stride_1() : + (r == 2 ? m_map.stride_2() : + (r == 3 ? m_map.stride_3() : + (r == 4 ? m_map.stride_4() : + (r == 5 ? m_map.stride_5() : + (r == 6 ? m_map.stride_6() : + m_map.stride_7()))))))); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); } + + //---------------------------------------- + // Range span is the span which contains all members. + + typedef typename map_type::reference_type reference_type ; + typedef typename map_type::pointer_type pointer_type ; + + enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value }; + + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); } + KOKKOS_INLINE_FUNCTION bool span_is_contiguous() const { return m_map.span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); } + + //---------------------------------------- + // Allow specializations to query their specialized map + + KOKKOS_INLINE_FUNCTION + const Kokkos::Impl::ViewMapping< traits , void > & + implementation_map() const { return m_map ; } + + //---------------------------------------- + + private: + + enum { + is_layout_left = std::is_same< typename traits::array_layout + , Kokkos::LayoutLeft >::value , + + is_layout_right = std::is_same< typename traits::array_layout + , Kokkos::LayoutRight >::value , + + is_layout_stride = std::is_same< typename traits::array_layout + , Kokkos::LayoutStride >::value , + + is_default_map = + std::is_same< typename traits::specialize , void >::value && + ( is_layout_left || is_layout_right || is_layout_stride ) + }; + + template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space + { KOKKOS_FORCEINLINE_FUNCTION static void check() {} }; + + template< class Space > struct verify_space<Space,false> + { KOKKOS_FORCEINLINE_FUNCTION static void check() + { Kokkos::abort("Kokkos::View ERROR: attempt to access inaccessible memory space"); + }; + }; + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + +#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( ARG ) \ + OffsetView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ + Kokkos::Experimental::Impl::offsetview_verify_operator_bounds< typename traits::memory_space > ARG ; + +#else + +#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( ARG ) \ + OffsetView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); + +#endif + public: + + //------------------------------ + // Rank 0 operator() + + KOKKOS_FORCEINLINE_FUNCTION + reference_type + operator()() const + { + return m_map.reference(); + } + //------------------------------ + // Rank 1 operator() + + + template< typename I0> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0) const + { + + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) ) + const size_t j0 = i0 - m_begins[0]; + return m_map.reference(j0); + } + + template< typename I0> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && is_default_map + && ! is_layout_stride + ), reference_type >::type + operator()( const I0 & i0 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) ) + const size_t j0 = i0 - m_begins[0]; + return m_map.m_impl_handle[ j0 ]; + } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator()( const I0 & i0) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) ) + const size_t j0 = i0 - m_begins[0]; + return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * j0 ]; + } + //------------------------------ + // Rank 1 operator[] + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && ! is_default_map + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) ) + const size_t j0 = i0 - m_begins[0]; + return m_map.reference(j0); + } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && is_default_map + && ! is_layout_stride + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) ) + const size_t j0 = i0 - m_begins[0]; + return m_map.m_impl_handle[ j0 ]; + } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) ) + const size_t j0 = i0 - m_begins[0]; + return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * j0 ]; + } + + + //------------------------------ + // Rank 2 + + template< typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1>::value + && ( 2 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + return m_map.reference(j0,j1); + } + + template< typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_left && ( traits::rank_dynamic == 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + return m_map.m_impl_handle[ j0 + m_map.m_impl_offset.m_dim.N0 * j1 ]; + } + + template< typename I0 , typename I1> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_left && ( traits::rank_dynamic != 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + return m_map.m_impl_handle[ j0 + m_map.m_impl_offset.m_stride * j1 ]; + } + + template< typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_right && ( traits::rank_dynamic == 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + return m_map.m_impl_handle[ j1 + m_map.m_impl_offset.m_dim.N1 * j0 ]; + } + + template< typename I0 , typename I1 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_right && ( traits::rank_dynamic != 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + return m_map.m_impl_handle[ j1 + m_map.m_impl_offset.m_stride * j0 ]; + } + + template< typename I0 , typename I1> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + return m_map.m_impl_handle[ j0 * m_map.m_impl_offset.m_stride.S0 + + j1 * m_map.m_impl_offset.m_stride.S1 ]; + } + + //------------------------------ + // Rank 3 + + template< typename I0 , typename I1 , typename I2 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2>::value + && ( 3 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + return m_map.m_impl_handle[ m_map.m_impl_offset(j0, j1, j2) ]; + } + + template< typename I0 , typename I1 , typename I2> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2>::value + && ( 3 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + return m_map.reference(j0, j1, j2); + } + + //------------------------------ + // Rank 4 + + template< typename I0 , typename I1 , typename I2 , typename I3> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3>::value + && ( 4 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2, i3) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + return m_map.m_impl_handle[ m_map.m_impl_offset(j0,j1,j2,j3) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3>::value + && ( 4 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2, i3) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + return m_map.reference(j0,j1,j2,j3); + } + + //------------------------------ + // Rank 5 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4>::value + && ( 5 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2, i3, i4) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + const size_t j4 = i4 - m_begins[4]; + return m_map.m_impl_handle[ m_map.m_impl_offset(j0, j1,j2, j3, j4) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4>::value + && ( 5 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2, i3, i4) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + const size_t j4 = i4 - m_begins[4]; + return m_map.reference(j0,j1,j2,j3,j4); + } + + //------------------------------ + // Rank 6 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5>::value + && ( 6 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + const size_t j4 = i4 - m_begins[4]; + const size_t j5 = i5 - m_begins[5]; + return m_map.m_impl_handle[ m_map.m_impl_offset(j0,j1,j2,j3,j4,j5) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5>::value + && ( 6 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + const size_t j4 = i4 - m_begins[4]; + const size_t j5 = i5 - m_begins[5]; + return m_map.reference(j0,j1,j2,j3,j4,j5); + } + + //------------------------------ + // Rank 7 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6>::value + && ( 7 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5, i6) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + const size_t j4 = i4 - m_begins[4]; + const size_t j5 = i5 - m_begins[5]; + const size_t j6 = i6 - m_begins[6]; + return m_map.m_impl_handle[ m_map.m_impl_offset(j0,j1,j2,j3,j4,j5,j6) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6>::value + && ( 7 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5, i6) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + const size_t j4 = i4 - m_begins[4]; + const size_t j5 = i5 - m_begins[5]; + const size_t j6 = i6 - m_begins[6]; + return m_map.reference(j0,j1,j2,j3,j4,j5,j6); + } + + //------------------------------ + // Rank 8 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7>::value + && ( 8 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5, i6, i7) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + const size_t j4 = i4 - m_begins[4]; + const size_t j5 = i5 - m_begins[5]; + const size_t j6 = i6 - m_begins[6]; + const size_t j7 = i7 - m_begins[7]; + return m_map.m_impl_handle[ m_map.m_impl_offset(j0,j1,j2,j3,j4,j5,j6,j7) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7>::value + && ( 8 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const + { + KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5, i6, i7) ) + const size_t j0 = i0 - m_begins[0]; + const size_t j1 = i1 - m_begins[1]; + const size_t j2 = i2 - m_begins[2]; + const size_t j3 = i3 - m_begins[3]; + const size_t j4 = i4 - m_begins[4]; + const size_t j5 = i5 - m_begins[5]; + const size_t j6 = i6 - m_begins[6]; + const size_t j7 = i7 - m_begins[7]; + return m_map.reference(j0,j1,j2,j3,j4,j5,j6,j7); + } + + +#undef KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY + + //---------------------------------------- + // Standard destructor, constructors, and assignment operators + + KOKKOS_INLINE_FUNCTION + ~OffsetView() {} + + KOKKOS_INLINE_FUNCTION + OffsetView() : m_track(), m_map() { + + for(size_t i = 0; i < Rank; ++i) m_begins[i] = KOKKOS_INVALID_INDEX; + } + + KOKKOS_INLINE_FUNCTION + OffsetView( const OffsetView & rhs ) : m_track( rhs.m_track, traits::is_managed ), m_map( rhs.m_map ), + m_begins(rhs.m_begins) {} + + KOKKOS_INLINE_FUNCTION + OffsetView( OffsetView && rhs ) : m_track( std::move(rhs.m_track) ), + m_map( std::move(rhs.m_map)), m_begins(std::move(rhs.m_begins)) {} + + KOKKOS_INLINE_FUNCTION + OffsetView & operator = ( const OffsetView & rhs ) { + m_track = rhs.m_track ; + m_map = rhs.m_map ; + m_begins = rhs.m_begins; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + OffsetView & operator = ( OffsetView && rhs ) { + m_track = std::move(rhs.m_track) ; + m_map = std::move(rhs.m_map) ; + m_begins = std::move(rhs.m_begins) ; + return *this ; + } + + //interoperability with View + private: + typedef View< typename traits::scalar_array_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > view_type; + public: + + KOKKOS_INLINE_FUNCTION + view_type view() const { + + view_type v(m_track, m_map); + return v ; + } + + template<class RT, class ... RP> + KOKKOS_INLINE_FUNCTION + OffsetView( const View<RT, RP...> & aview) : + m_track(aview.impl_track()), m_map(){ + + typedef typename OffsetView<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible OffsetView copy construction" ); + Mapping::assign( m_map , aview.impl_map() , m_track ); + + for (int i = 0; i < aview.Rank; ++i) { + m_begins[i] = 0; + } + } + + template<class RT, class ... RP> + KOKKOS_INLINE_FUNCTION + OffsetView( const View<RT, RP...> & aview + ,const index_list_type & minIndices) : + m_track(aview.impl_track()), m_map(){ + + typedef typename OffsetView<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible OffsetView copy construction" ); + Mapping::assign( m_map , aview.impl_map() , m_track ); + +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + Kokkos::Experimental::Impl::runtime_check_rank_host(traits::rank_dynamic, Rank, minIndices, label()); +#else + Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic, Rank, minIndices); + +#endif + + for (size_t i = 0; i < minIndices.size(); ++i) { + m_begins[i] = minIndices.begin()[i]; + } + } + template<class RT, class ... RP> + KOKKOS_INLINE_FUNCTION + OffsetView( const View<RT, RP...> & aview + ,const begins_type & beg) : + m_track(aview.impl_track()), m_map(), m_begins(beg){ + + typedef typename OffsetView<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible OffsetView copy construction" ); + Mapping::assign( m_map , aview.impl_map() , m_track ); + + + //#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + // Kokkos::Experimental::Impl::runtime_check_rank_host(traits::rank_dynamic, Rank, minIndices, label()); + //#else + // Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic, Rank, minIndices); + // + //#endif + + } + + // may assign unmanaged from managed. + + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + OffsetView( const OffsetView<RT,RP...> & rhs ) + : m_track( rhs.m_track , traits::is_managed ) + , m_map() + , m_begins(rhs.m_begins) + { + typedef typename OffsetView<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible OffsetView copy construction" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); //swb what about assign? + } + + + //---------------------------------------- + // Allocation tracking properties + KOKKOS_INLINE_FUNCTION + int use_count() const + { return m_track.use_count(); } + + inline + const std::string label() const + { return m_track.template get_label< typename traits::memory_space >(); } + + + template< typename Label> + explicit inline + OffsetView( const Label & arg_label + ,typename std::enable_if<Kokkos::Impl::is_view_label<Label>::value , const index_list_type >::type + range0 + ,const index_list_type range1 = KOKKOS_INVALID_INDEX_RANGE + ,const index_list_type range2 = KOKKOS_INVALID_INDEX_RANGE + ,const index_list_type range3 = KOKKOS_INVALID_INDEX_RANGE + ,const index_list_type range4 = KOKKOS_INVALID_INDEX_RANGE + ,const index_list_type range5 = KOKKOS_INVALID_INDEX_RANGE + ,const index_list_type range6 = KOKKOS_INVALID_INDEX_RANGE + ,const index_list_type range7 = KOKKOS_INVALID_INDEX_RANGE + + ) : OffsetView( Kokkos::Impl::ViewCtorProp< std::string >( arg_label ), + typename traits::array_layout + ( range0.begin()[1] - range0.begin()[0] + 1, range1.begin()[1] - range1.begin()[0] + 1 , + range2.begin()[1] - range2.begin()[0] + 1, range3.begin()[1] - range3.begin()[0] + 1, + range4.begin()[1] - range4.begin()[0] + 1, range5.begin()[1] - range5.begin()[0] + 1 , + range6.begin()[1] - range6.begin()[0] + 1, range7.begin()[1] - range7.begin()[0] + 1 ), + {range0.begin()[0], range1.begin()[0], range2.begin()[0], range3.begin()[0], range4.begin()[0], + range5.begin()[0], range6.begin()[0], range7.begin()[0] }) + { + + } + + + + template<class ... P > + explicit KOKKOS_INLINE_FUNCTION + OffsetView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop + ,typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer , typename traits::array_layout >::type const & arg_layout + ,const index_list_type minIndices + ) + : m_track() // No memory tracking + , m_map( arg_prop , arg_layout ) + { + + + for (size_t i = 0; i < minIndices.size(); ++i) { + m_begins[i] = minIndices.begin()[i]; + } + static_assert( + std::is_same< pointer_type + , typename Kokkos::Impl::ViewCtorProp< P... >::pointer_type + >::value , + "When constructing OffsetView to wrap user memory, you must supply matching pointer type" ); + } + + template<class ... P > + explicit inline + OffsetView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer , typename traits::array_layout>::type const & arg_layout + ,const index_list_type minIndices + ) + : m_track() + , m_map() + + { + + for(size_t i = 0; i < Rank; ++i) + m_begins[i] = minIndices.begin()[i]; + + // Append layout and spaces if not input + typedef Kokkos::Impl::ViewCtorProp< P ... > alloc_prop_input ; + + // use 'std::integral_constant<unsigned,I>' for non-types + // to avoid duplicate class error. + typedef Kokkos::Impl::ViewCtorProp + < P ..., typename std::conditional < alloc_prop_input::has_label + , std::integral_constant<unsigned,0>, typename std::string >::type + , typename std::conditional + < alloc_prop_input::has_memory_space + , std::integral_constant<unsigned,1> + , typename traits::device_type::memory_space + >::type + , typename std::conditional + < alloc_prop_input::has_execution_space + , std::integral_constant<unsigned,2> + , typename traits::device_type::execution_space + >::type + > alloc_prop ; + + static_assert( traits::is_managed + , "OffsetView allocation constructor requires managed memory" ); + + if ( alloc_prop::initialize && +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + ! alloc_prop::execution_space::is_initialized() +#else + ! alloc_prop::execution_space::impl_is_initialized() +#endif + ) { + // If initializing view data then + // the execution space must be initialized. + Kokkos::Impl::throw_runtime_exception("Constructing OffsetView and initializing data with uninitialized execution space"); + } + + // Copy the input allocation properties with possibly defaulted properties + alloc_prop prop( arg_prop ); + + //------------------------------------------------------------ +#if defined( KOKKOS_ENABLE_CUDA ) + // If allocating in CudaUVMSpace must fence before and after + // the allocation to protect against possible concurrent access + // on the CPU and the GPU. + // Fence using the trait's executon space (which will be Kokkos::Cuda) + // to avoid incomplete type errors from usng Kokkos::Cuda directly. + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif + //------------------------------------------------------------ + + Kokkos::Impl::SharedAllocationRecord<> * + record = m_map.allocate_shared( prop , arg_layout ); + + //------------------------------------------------------------ +#if defined( KOKKOS_ENABLE_CUDA ) + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif + //------------------------------------------------------------ + + // Setup and initialization complete, start tracking + m_track.assign_allocated_record_to_uninitialized( record ); + +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + Kokkos::Experimental::Impl::runtime_check_rank_host(traits::rank_dynamic, Rank, minIndices, label()); +#else + Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic, Rank, minIndices); + +#endif + + } + + + }; + + + + /** \brief Temporary free function rank() + * until rank() is implemented + * in the View + */ + template < typename D , class ... P > + KOKKOS_INLINE_FUNCTION + constexpr unsigned rank( const OffsetView<D , P...> & V ) { return V.Rank; } //Temporary until added to view + + //---------------------------------------------------------------------------- + //---------------------------------------------------------------------------- + namespace Impl { + + template< class T > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< std::is_integral<T>::value, T>::type + shift_input(const T arg, const int64_t offset) + { + return arg - offset; + } + + KOKKOS_INLINE_FUNCTION + Kokkos::Impl::ALL_t + shift_input(const Kokkos::Impl::ALL_t arg, const int64_t offset) + { + return arg; + } + + template< class T > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< std::is_integral<T>::value, Kokkos::pair<T,T> >::type + shift_input(const Kokkos::pair<T, T> arg, const int64_t offset) + { + + return Kokkos::make_pair<T,T>(arg.first - offset, arg.second - offset); + + } + template< class T > + inline + typename std::enable_if< std::is_integral<T>::value, std::pair<T,T> >::type + shift_input(const std::pair<T, T> arg, const int64_t offset) + { + + return std::make_pair<T,T>(arg.first - offset, arg.second - offset); + + } + + template <size_t N, class Arg, class A> + KOKKOS_INLINE_FUNCTION + void + map_arg_to_new_begin(const size_t i, + Kokkos::Array<int64_t, N> &subviewBegins, typename std::enable_if< N != 0, const Arg>::type shiftedArg, + const Arg arg, const A viewBegins, size_t & counter) { + + if( !std::is_integral<Arg>::value) { + subviewBegins[counter] = shiftedArg == arg ? viewBegins[i] : 0; + counter++; + } + } + + template <size_t N, class Arg, class A> + KOKKOS_INLINE_FUNCTION + void + map_arg_to_new_begin(const size_t i, + Kokkos::Array<int64_t, N> &subviewBegins, typename std::enable_if< N == 0, const Arg>::type shiftedArg, + const Arg arg, const A viewBegins, size_t & counter) { + + } + + + template< class D, class ... P , class T > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T + >::type >::type + subview_offset(const OffsetView< D, P... > & src, T arg) { + + auto theView = src.view(); + auto begins = src.begins(); + + T shiftedArg = shift_input(arg, begins[0]); + + constexpr size_t rank = Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T>::type::Rank; + + auto theSubview = Kokkos::subview( theView , shiftedArg); + + Kokkos::Array<int64_t, rank> subviewBegins; + size_t counter = 0; + Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg, arg, begins, counter); + + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping< + void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > , T >::type >::type offsetView(theSubview, subviewBegins); + + return offsetView; + + } + + template< class D, class ... P , class T0, class T1 > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1 + >::type >::type + subview_offset(const Kokkos::Experimental::OffsetView< D, P... > & src, T0 arg0, T1 arg1) { + + auto theView = src.view(); + auto begins = src.begins(); + + T0 shiftedArg0 = shift_input(arg0, begins[0]); + T1 shiftedArg1 = shift_input(arg1, begins[1]); + + auto theSubview = Kokkos::subview(theView , shiftedArg0, shiftedArg1); + constexpr size_t rank = Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1>::type::Rank; + + Kokkos::Array<int64_t, rank> subviewBegins; + size_t counter = 0; + Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter); + + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping< + void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > , T0, T1 >::type >::type offsetView(theSubview, subviewBegins); + + return offsetView; + + } + + template< class D, class ... P , class T0, class T1, class T2 > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2 + >::type >::type + subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2) { + + auto theView = src.view(); + auto begins = src.begins(); + + T0 shiftedArg0 = shift_input(arg0, begins[0]); + T1 shiftedArg1 = shift_input(arg1, begins[1]); + T2 shiftedArg2 = shift_input(arg2, begins[2]); + + auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2); + + constexpr size_t rank = Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2>::type::Rank; + + Kokkos::Array<int64_t, rank> subviewBegins; + + size_t counter = 0; + Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter); + + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping< + void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > , T0, T1, T2 >::type >::type offsetView(theSubview, subviewBegins); + + return offsetView; + } + + template< class D, class ... P , class T0, class T1, class T2, class T3 > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3 + >::type >::type + subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3) { + + auto theView = src.view(); + auto begins = src.begins(); + + T0 shiftedArg0 = shift_input(arg0, begins[0]); + T1 shiftedArg1 = shift_input(arg1, begins[1]); + T2 shiftedArg2 = shift_input(arg2, begins[2]); + T3 shiftedArg3 = shift_input(arg3, begins[3]); + + auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3); + + constexpr size_t rank = Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3>::type::Rank; + Kokkos::Array<int64_t, rank> subviewBegins; + + size_t counter = 0; + Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter); + + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping< + void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > , T0, T1, T2, T3 >::type >::type offsetView(theSubview, subviewBegins); + + return offsetView; + } + + template< class D, class ... P , class T0, class T1, class T2, class T3, class T4 > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3, T4 + >::type >::type + subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3, T4 arg4) { + + auto theView = src.view(); + auto begins = src.begins(); + + T0 shiftedArg0 = shift_input(arg0, begins[0]); + T1 shiftedArg1 = shift_input(arg1, begins[1]); + T2 shiftedArg2 = shift_input(arg2, begins[2]); + T3 shiftedArg3 = shift_input(arg3, begins[3]); + T4 shiftedArg4 = shift_input(arg4, begins[4]); + + auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3, shiftedArg4); + + constexpr size_t rank = Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3, T4>::type::Rank; + Kokkos::Array<int64_t, rank> subviewBegins; + + size_t counter = 0; + Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(4, subviewBegins, shiftedArg4, arg4, begins, counter); + + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping< + void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > , T0, T1, T2, T3, T4 >::type >::type offsetView(theSubview, subviewBegins); + + return offsetView; + } + + template< class D, class ... P , class T0, class T1, class T2, class T3, class T4, + class T5 > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3, T4, T5 + >::type >::type + subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) { + + auto theView = src.view(); + auto begins = src.begins(); + + T0 shiftedArg0 = shift_input(arg0, begins[0]); + T1 shiftedArg1 = shift_input(arg1, begins[1]); + T2 shiftedArg2 = shift_input(arg2, begins[2]); + T3 shiftedArg3 = shift_input(arg3, begins[3]); + T4 shiftedArg4 = shift_input(arg4, begins[4]); + T5 shiftedArg5 = shift_input(arg5, begins[5]); + + auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3, shiftedArg4, shiftedArg5); + + constexpr size_t rank = Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3, T4, T5>::type::Rank; + + Kokkos::Array<int64_t, rank> subviewBegins; + + size_t counter = 0; + Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(4, subviewBegins, shiftedArg4, arg4, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(5, subviewBegins, shiftedArg5, arg5, begins, counter); + + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping< + void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > , T0, T1, T2, T3, T4, T5 >::type >::type offsetView(theSubview, subviewBegins); + + return offsetView; + } + template< class D, class ... P , class T0, class T1, class T2, class T3, class T4, + class T5, class T6> + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3, T4, T5, T6 + >::type >::type + subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, + T6 arg6) { + + auto theView = src.view(); + auto begins = src.begins(); + + T0 shiftedArg0 = shift_input(arg0, begins[0]); + T1 shiftedArg1 = shift_input(arg1, begins[1]); + T2 shiftedArg2 = shift_input(arg2, begins[2]); + T3 shiftedArg3 = shift_input(arg3, begins[3]); + T4 shiftedArg4 = shift_input(arg4, begins[4]); + T5 shiftedArg5 = shift_input(arg5, begins[5]); + T6 shiftedArg6 = shift_input(arg6, begins[6]); + + auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3, shiftedArg4, shiftedArg5, + shiftedArg6); + + constexpr size_t rank = Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3, T4, T5, T6>::type::Rank; + + Kokkos::Array<int64_t, rank> subviewBegins; + + size_t counter = 0; + Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(4, subviewBegins, shiftedArg4, arg4, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(5, subviewBegins, shiftedArg5, arg5, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(6, subviewBegins, shiftedArg6, arg6, begins, counter); + + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping< + void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > , T0, T1, T2, T3, T4, T5, + T6 >::type >::type offsetView(theSubview, subviewBegins); + + return offsetView; + } + + template< class D, class ... P , class T0, class T1, class T2, class T3, class T4, + class T5, class T6, class T7> + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3, T4, T5, T6, T7 + >::type >::type + subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, + T6 arg6, T7 arg7) { + + auto theView = src.view(); + auto begins = src.begins(); + + T0 shiftedArg0 = shift_input(arg0, begins[0]); + T1 shiftedArg1 = shift_input(arg1, begins[1]); + T2 shiftedArg2 = shift_input(arg2, begins[2]); + T3 shiftedArg3 = shift_input(arg3, begins[3]); + T4 shiftedArg4 = shift_input(arg4, begins[4]); + T5 shiftedArg5 = shift_input(arg5, begins[5]); + T6 shiftedArg6 = shift_input(arg6, begins[6]); + T7 shiftedArg7 = shift_input(arg7, begins[7]); + + auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3, shiftedArg4, shiftedArg5, + shiftedArg6, shiftedArg7); + + constexpr size_t rank = Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , T0, T1, T2, T3, T4, T5, T6, T7>::type::Rank; + + Kokkos::Array<int64_t, rank> subviewBegins; + + size_t counter = 0; + Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(4, subviewBegins, shiftedArg4, arg4, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(5, subviewBegins, shiftedArg5, arg5, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(6, subviewBegins, shiftedArg6, arg6, begins, counter); + Kokkos::Experimental::Impl::map_arg_to_new_begin(7, subviewBegins, shiftedArg7, arg7, begins, counter); + + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping< + void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > , T0, T1, T2, T3, T4, T5, + T6, T7 >::type >::type offsetView(theSubview, subviewBegins); + + return offsetView; + } + } + + template< class D, class ... P , class ... Args > + KOKKOS_INLINE_FUNCTION + typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping + < void /* deduce subview type from source view traits */ + , ViewTraits< D , P... > + , Args ... + >::type >::type + subview( const OffsetView< D, P... > & src , Args ... args ) + { + static_assert( OffsetView< D , P... >::Rank == sizeof...(Args) , + "subview requires one argument for each source OffsetView rank" ); + + + return Kokkos::Experimental::Impl::subview_offset(src, args...); + + + } + + } +} +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + namespace Experimental { + template< class LT , class ... LP , class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + bool operator == ( const OffsetView<LT,LP...> & lhs , + const OffsetView<RT,RP...> & rhs ) + { + // Same data, layout, dimensions + typedef ViewTraits<LT,LP...> lhs_traits ; + typedef ViewTraits<RT,RP...> rhs_traits ; + + return + std::is_same< typename lhs_traits::const_value_type , + typename rhs_traits::const_value_type >::value && + std::is_same< typename lhs_traits::array_layout , + typename rhs_traits::array_layout >::value && + std::is_same< typename lhs_traits::memory_space , + typename rhs_traits::memory_space >::value && + unsigned(lhs_traits::rank) == unsigned(rhs_traits::rank) && + lhs.data() == rhs.data() && + lhs.span() == rhs.span() && + lhs.extent(0) == rhs.extent(0) && + lhs.extent(1) == rhs.extent(1) && + lhs.extent(2) == rhs.extent(2) && + lhs.extent(3) == rhs.extent(3) && + lhs.extent(4) == rhs.extent(4) && + lhs.extent(5) == rhs.extent(5) && + lhs.extent(6) == rhs.extent(6) && + lhs.extent(7) == rhs.extent(7) && + lhs.begin(0) == rhs.begin(0) && + lhs.begin(1) == rhs.begin(1) && + lhs.begin(2) == rhs.begin(2) && + lhs.begin(3) == rhs.begin(3) && + lhs.begin(4) == rhs.begin(4) && + lhs.begin(5) == rhs.begin(5) && + lhs.begin(6) == rhs.begin(6) && + lhs.begin(7) == rhs.begin(7) + ; + } + + template< class LT , class ... LP , class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + bool operator != ( const OffsetView<LT,LP...> & lhs , + const OffsetView<RT,RP...> & rhs ) + { + return ! ( operator==(lhs,rhs) ); + } + + template< class LT , class ... LP , class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + bool operator == ( const View<LT,LP...> & lhs , + const OffsetView<RT,RP...> & rhs ) + { + // Same data, layout, dimensions + typedef ViewTraits<LT,LP...> lhs_traits ; + typedef ViewTraits<RT,RP...> rhs_traits ; + + return + std::is_same< typename lhs_traits::const_value_type , + typename rhs_traits::const_value_type >::value && + std::is_same< typename lhs_traits::array_layout , + typename rhs_traits::array_layout >::value && + std::is_same< typename lhs_traits::memory_space , + typename rhs_traits::memory_space >::value && + unsigned(lhs_traits::rank) == unsigned(rhs_traits::rank) && + lhs.data() == rhs.data() && + lhs.span() == rhs.span() && + lhs.extent(0) == rhs.extent(0) && + lhs.extent(1) == rhs.extent(1) && + lhs.extent(2) == rhs.extent(2) && + lhs.extent(3) == rhs.extent(3) && + lhs.extent(4) == rhs.extent(4) && + lhs.extent(5) == rhs.extent(5) && + lhs.extent(6) == rhs.extent(6) && + lhs.extent(7) == rhs.extent(7) + ; + } + + template< class LT , class ... LP , class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + bool operator == ( const OffsetView<LT,LP...> & lhs , + const View<RT,RP...> & rhs ) + { return rhs == lhs;} + + } +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + +namespace Kokkos { + namespace Experimental { + template< class DT , class ... DP > + inline + void deep_copy + ( const OffsetView<DT,DP...> & dst + , typename ViewTraits<DT,DP...>::const_value_type & value + , typename std::enable_if< + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value + >::type * = 0 ) + { + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::non_const_value_type , + typename ViewTraits<DT,DP...>::value_type >::value + , "deep_copy requires non-const type" ); + + auto dstView = dst.view(); + Kokkos::deep_copy( dstView , value ); + + } + + template< class DT , class ... DP , class ST , class ... SP > + inline + void deep_copy + ( const OffsetView<DT,DP...> & dst + , const OffsetView<ST,SP...> & value + , typename std::enable_if< + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value + >::type * = 0 ) + { + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<ST,SP...>::non_const_value_type >::value + , "deep_copy requires matching non-const destination type" ); + + auto dstView = dst.view(); + Kokkos::deep_copy( dstView , value.view() ); + + } + template< class DT , class ... DP , class ST , class ... SP > + inline + void deep_copy + ( const OffsetView<DT,DP...> & dst + , const View<ST,SP...> & value + , typename std::enable_if< + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value + >::type * = 0 ) + { + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<ST,SP...>::non_const_value_type >::value + , "deep_copy requires matching non-const destination type" ); + + auto dstView = dst.view(); + Kokkos::deep_copy( dstView , value); + + } + + template< class DT , class ... DP , class ST , class ... SP > + inline + void deep_copy + ( const View<DT,DP...> & dst + , const OffsetView<ST,SP...> & value + , typename std::enable_if< + std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value + >::type * = 0 ) + { + static_assert( + std::is_same< typename ViewTraits<DT,DP...>::value_type , + typename ViewTraits<ST,SP...>::non_const_value_type >::value + , "deep_copy requires matching non-const destination type" ); + + Kokkos::deep_copy( dst , value.view() ); + + } + namespace Impl { + + // Deduce Mirror Types + template<class Space, class T, class ... P> + struct MirrorOffsetViewType { + // The incoming view_type + typedef typename Kokkos::Experimental::OffsetView<T,P...> src_view_type; + // The memory space for the mirror view + typedef typename Space::memory_space memory_space; + // Check whether it is the same memory space + enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; + // The array_layout + typedef typename src_view_type::array_layout array_layout; + // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. + typedef typename src_view_type::non_const_data_type data_type; + // The destination view type if it is not the same memory space + typedef Kokkos::Experimental::OffsetView<data_type,array_layout,Space> dest_view_type; + // If it is the same memory_space return the existsing view_type + // This will also keep the unmanaged trait if necessary + typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type; + }; + + template<class Space, class T, class ... P> + struct MirrorOffsetType { + // The incoming view_type + typedef typename Kokkos::Experimental::OffsetView<T,P...> src_view_type; + // The memory space for the mirror view + typedef typename Space::memory_space memory_space; + // Check whether it is the same memory space + enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; + // The array_layout + typedef typename src_view_type::array_layout array_layout; + // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. + typedef typename src_view_type::non_const_data_type data_type; + // The destination view type if it is not the same memory space + typedef Kokkos::Experimental::OffsetView<data_type,array_layout,Space> view_type; + }; + + } + + template< class T , class ... P > + inline + typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror + create_mirror( const Kokkos::Experimental::OffsetView<T,P...> & src + , typename std::enable_if< + ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout + , Kokkos::LayoutStride >::value + >::type * = 0 + ) + { + typedef OffsetView<T,P...> src_type ; + typedef typename src_type::HostMirror dst_type ; + + return dst_type( Kokkos::Impl::ViewCtorProp< std::string >(std::string( src.label() ).append("_mirror") ), + typename Kokkos::ViewTraits<T,P...>::array_layout + ( src.extent(0), src.extent(1), src.extent(2), src.extent(3), src.extent(4), + src.extent(5), src.extent(6), src.extent(7) ), + { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), + src.begin(5), src.begin(6), src.begin(7) }); + } + + template< class T , class ... P > + inline + typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror + create_mirror( const Kokkos::Experimental::OffsetView<T,P...> & src + , typename std::enable_if< + std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout + , Kokkos::LayoutStride >::value + >::type * = 0 + ) + { + typedef OffsetView<T,P...> src_type ; + typedef typename src_type::HostMirror dst_type ; + + Kokkos::LayoutStride layout ; + + layout.dimension[0] = src.extent(0); + layout.dimension[1] = src.extent(1); + layout.dimension[2] = src.extent(2); + layout.dimension[3] = src.extent(3); + layout.dimension[4] = src.extent(4); + layout.dimension[5] = src.extent(5); + layout.dimension[6] = src.extent(6); + layout.dimension[7] = src.extent(7); + + layout.stride[0] = src.stride_0(); + layout.stride[1] = src.stride_1(); + layout.stride[2] = src.stride_2(); + layout.stride[3] = src.stride_3(); + layout.stride[4] = src.stride_4(); + layout.stride[5] = src.stride_5(); + layout.stride[6] = src.stride_6(); + layout.stride[7] = src.stride_7(); + + return dst_type( std::string( src.label() ).append("_mirror") , layout, + { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), + src.begin(5), src.begin(6), src.begin(7) } ); + } + + + // Create a mirror in a new space (specialization for different space) + template<class Space, class T, class ... P> + typename Kokkos::Experimental::Impl::MirrorOffsetType<Space,T,P ...>::view_type + create_mirror(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src) { + return typename Kokkos::Experimental::Impl::MirrorOffsetType<Space,T,P ...>::view_type(src.label(),src.layout(), + { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), + src.begin(5), src.begin(6), src.begin(7) } ); + } + + + template< class T , class ... P > + inline + typename Kokkos::Experimental::OffsetView< T, P... >::HostMirror + create_mirror_view( const typename Kokkos::Experimental::OffsetView< T,P... > & src + , typename std::enable_if<( + std::is_same< typename Kokkos::Experimental::OffsetView<T,P...>::memory_space + , typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror::memory_space + >::value + && + std::is_same< typename Kokkos::Experimental::OffsetView<T,P...>::data_type + , typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror::data_type + >::value + )>::type * = 0 + ) + { + return src ; + } + + template< class T , class ... P > + inline + typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror + create_mirror_view( const Kokkos::Experimental::OffsetView<T,P...> & src + , typename std::enable_if< ! ( + std::is_same< typename Kokkos::Experimental::OffsetView<T,P...>::memory_space + , typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror::memory_space + >::value + && + std::is_same< typename Kokkos::Experimental::OffsetView<T,P...>::data_type + , typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror::data_type + >::value + )>::type * = 0 + ) + { + return Kokkos::Experimental::create_mirror( src ); + } + + // Create a mirror view in a new space (specialization for same space) + template<class Space, class T, class ... P> + typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space,T,P ...>::view_type + create_mirror_view(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src + , typename std::enable_if<Impl::MirrorOffsetViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + return src; + } + + // Create a mirror view in a new space (specialization for different space) + template<class Space, class T, class ... P> + typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space,T,P ...>::view_type + create_mirror_view(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src + , typename std::enable_if<!Impl::MirrorOffsetViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + return typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space,T,P ...>::view_type(src.label(),src.layout(), + { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), + src.begin(5), src.begin(6), src.begin(7) } ); + } + // + // // Create a mirror view and deep_copy in a new space (specialization for same space) + // template<class Space, class T, class ... P> + // typename Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type + // create_mirror_view_and_copy(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src + // , std::string const& name = "" + // , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + // (void)name; + // return src; + // } + // + // // Create a mirror view and deep_copy in a new space (specialization for different space) + // template<class Space, class T, class ... P> + // typename Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type + // create_mirror_view_and_copy(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src + // , std::string const& name = "" + // , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + // using Mirror = typename Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type; + // std::string label = name.empty() ? src.label() : name; + // auto mirror = Mirror(ViewAllocateWithoutInitializing(label), src.layout(), + // { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), + // src.begin(5), src.begin(6), src.begin(7) }); + // deep_copy(mirror, src); + // return mirror; + // } + + } +} /* namespace Kokkos */ + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + +#endif /* KOKKOS_OFFSETVIEW_HPP_ */ diff --git a/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp index c522d85c5..f63ce4b88 100644 --- a/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp +++ b/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp @@ -47,7 +47,9 @@ #include <string> #include <vector> -#include <Kokkos_Core.hpp> +#include <Kokkos_View.hpp> +#include <Kokkos_Parallel.hpp> +#include <Kokkos_Parallel_Reduce.hpp> namespace Kokkos { diff --git a/packages/kokkos/containers/src/Kokkos_Vector.hpp b/packages/kokkos/containers/src/Kokkos_Vector.hpp index 8204ba776..76c515941 100644 --- a/packages/kokkos/containers/src/Kokkos_Vector.hpp +++ b/packages/kokkos/containers/src/Kokkos_Vector.hpp @@ -86,14 +86,13 @@ public: vector():DV() { _size = 0; _extra_storage = 1.1; - DV::modified_host() = 1; } vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Arg1Type>("Vector",size_t(n*(1.1))) { _size = n; _extra_storage = 1.1; - DV::modified_host() = 1; + DV::modified_flags(0) = 1; assign(n,val); } @@ -119,16 +118,16 @@ public: /* Assign value either on host or on device */ - if( DV::modified_host() >= DV::modified_device() ) { + if( DV::template need_sync<typename DV::t_dev::device_type>() ) { set_functor_host f(DV::h_view,val); parallel_for(n,f); DV::t_host::execution_space::fence(); - DV::modified_host()++; + DV::template modify<typename DV::t_host::device_type>(); } else { set_functor f(DV::d_view,val); parallel_for(n,f); DV::t_dev::execution_space::fence(); - DV::modified_device()++; + DV::template modify<typename DV::t_dev::device_type>(); } } @@ -137,7 +136,8 @@ public: } void push_back(Scalar val) { - DV::modified_host()++; + DV::template sync<typename DV::t_host::device_type>(); + DV::template modify<typename DV::t_host::device_type>(); if(_size == span()) { size_t new_size = _size*_extra_storage; if(new_size == _size) new_size++; @@ -247,10 +247,10 @@ public: } void on_host() { - DV::modified_host() = DV::modified_device() + 1; + DV::template modify<typename DV::t_host::device_type>(); } void on_device() { - DV::modified_device() = DV::modified_host() + 1; + DV::template modify<typename DV::t_dev::device_type>(); } void set_overallocation(float extra) { diff --git a/packages/kokkos/containers/unit_tests/CMakeLists.txt b/packages/kokkos/containers/unit_tests/CMakeLists.txt index 3dbe79183..0f94afec8 100644 --- a/packages/kokkos/containers/unit_tests/CMakeLists.txt +++ b/packages/kokkos/containers/unit_tests/CMakeLists.txt @@ -23,6 +23,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( threads/TestThreads_DynRankViewAPI_rank12345.cpp threads/TestThreads_DynRankViewAPI_rank67.cpp threads/TestThreads_ErrorReporter.cpp + threads/TestThreads_OffsetView.cpp threads/TestThreads_ScatterView.cpp threads/TestThreads_StaticCrsGraph.cpp threads/TestThreads_UnorderedMap.cpp @@ -47,6 +48,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( serial/TestSerial_DynRankViewAPI_rank12345.cpp serial/TestSerial_DynRankViewAPI_rank67.cpp serial/TestSerial_ErrorReporter.cpp + serial/TestSerial_OffsetView.cpp serial/TestSerial_ScatterView.cpp serial/TestSerial_StaticCrsGraph.cpp serial/TestSerial_UnorderedMap.cpp @@ -71,6 +73,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( openmp/TestOpenMP_DynRankViewAPI_rank12345.cpp openmp/TestOpenMP_DynRankViewAPI_rank67.cpp openmp/TestOpenMP_ErrorReporter.cpp + openmp/TestOpenMP_OffsetView.cpp openmp/TestOpenMP_ScatterView.cpp openmp/TestOpenMP_StaticCrsGraph.cpp openmp/TestOpenMP_UnorderedMap.cpp @@ -95,6 +98,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( cuda/TestCuda_DynRankViewAPI_rank12345.cpp cuda/TestCuda_DynRankViewAPI_rank67.cpp cuda/TestCuda_ErrorReporter.cpp + cuda/TestCuda_OffsetView.cpp cuda/TestCuda_ScatterView.cpp cuda/TestCuda_StaticCrsGraph.cpp cuda/TestCuda_UnorderedMap.cpp diff --git a/packages/kokkos/containers/unit_tests/Makefile b/packages/kokkos/containers/unit_tests/Makefile index 52d5d61aa..c0e5d2820 100644 --- a/packages/kokkos/containers/unit_tests/Makefile +++ b/packages/kokkos/containers/unit_tests/Makefile @@ -39,6 +39,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA += TestCuda_DynRankViewAPI_rank12345.o OBJ_CUDA += TestCuda_DynRankViewAPI_rank67.o OBJ_CUDA += TestCuda_ErrorReporter.o + OBJ_CUDA += TestCuda_OffsetView.o OBJ_CUDA += TestCuda_ScatterView.o OBJ_CUDA += TestCuda_StaticCrsGraph.o OBJ_CUDA += TestCuda_UnorderedMap.o @@ -57,6 +58,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) OBJ_ROCM += TestROCm_DynRankViewAPI_rank12345.o OBJ_ROCM += TestROCm_DynRankViewAPI_rank67.o OBJ_ROCM += TestROCm_ErrorReporter.o + OBJ_ROCM += TestROCm_OffsetView.o OBJ_ROCM += TestROCm_ScatterView.o OBJ_ROCM += TestROCm_StaticCrsGraph.o OBJ_ROCM += TestROCm_UnorderedMap.o @@ -75,6 +77,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) OBJ_THREADS += TestThreads_DynRankViewAPI_rank12345.o OBJ_THREADS += TestThreads_DynRankViewAPI_rank67.o OBJ_THREADS += TestThreads_ErrorReporter.o + OBJ_THREADS += TestThreads_OffsetView.o OBJ_THREADS += TestThreads_ScatterView.o OBJ_THREADS += TestThreads_StaticCrsGraph.o OBJ_THREADS += TestThreads_UnorderedMap.o @@ -93,6 +96,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank12345.o OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank67.o OBJ_OPENMP += TestOpenMP_ErrorReporter.o + OBJ_OPENMP += TestOpenMP_OffsetView.o OBJ_OPENMP += TestOpenMP_ScatterView.o OBJ_OPENMP += TestOpenMP_StaticCrsGraph.o OBJ_OPENMP += TestOpenMP_UnorderedMap.o @@ -111,6 +115,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL += TestSerial_DynRankViewAPI_rank12345.o OBJ_SERIAL += TestSerial_DynRankViewAPI_rank67.o OBJ_SERIAL += TestSerial_ErrorReporter.o + OBJ_SERIAL += TestSerial_OffsetView.o OBJ_SERIAL += TestSerial_ScatterView.o OBJ_SERIAL += TestSerial_StaticCrsGraph.o OBJ_SERIAL += TestSerial_UnorderedMap.o diff --git a/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp index 8c073710e..6684a5545 100644 --- a/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp +++ b/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp @@ -729,6 +729,7 @@ public: static void run_tests() { run_test_resize_realloc(); run_test_mirror(); + run_test_mirror_and_copy(); run_test_scalar(); run_test(); run_test_const(); @@ -885,6 +886,69 @@ public: } } + static void run_test_mirror_and_copy() + { + // LayoutLeft + { + Kokkos::DynRankView< double, Kokkos::LayoutLeft, Kokkos::HostSpace > a_org( "A", 10 ); + a_org(5) = 42.0; + Kokkos::DynRankView< double, Kokkos::LayoutLeft, Kokkos::HostSpace > a_h = a_org; + auto a_h2 = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), a_h ); + auto a_d = Kokkos::create_mirror_view_and_copy( DeviceType(), a_h ); + auto a_h3 = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), a_d ); + + int equal_ptr_h_h2 = a_h.data() == a_h2.data() ? 1 : 0; + int equal_ptr_h_d = a_h.data() == a_d.data() ? 1 : 0; + int equal_ptr_h2_d = a_h2.data() == a_d.data() ? 1 : 0; + int equal_ptr_h3_d = a_h3.data() == a_d.data() ? 1 : 0; + + int is_same_memspace = std::is_same< Kokkos::HostSpace, typename DeviceType::memory_space >::value ? 1 : 0; + ASSERT_EQ( equal_ptr_h_h2, 1 ); + ASSERT_EQ( equal_ptr_h_d, is_same_memspace ); + ASSERT_EQ( equal_ptr_h2_d, is_same_memspace ); + ASSERT_EQ( equal_ptr_h3_d, is_same_memspace ); + + ASSERT_EQ( a_h.extent(0), a_h3.extent(0) ); + ASSERT_EQ( a_h.extent(0), a_h2.extent(0) ); + ASSERT_EQ( a_h.extent(0), a_d .extent(0) ); + ASSERT_EQ( a_h.extent(0), a_h3.extent(0) ); + ASSERT_EQ( a_h.rank(), a_org.rank() ); + ASSERT_EQ( a_h.rank(), a_h2.rank() ); + ASSERT_EQ( a_h.rank(), a_h3.rank() ); + ASSERT_EQ( a_h.rank(), a_d.rank() ); + ASSERT_EQ( a_org(5), a_h3(5) ); + } + // LayoutRight + { + Kokkos::DynRankView< double, Kokkos::LayoutRight, Kokkos::HostSpace > a_org( "A", 10 ); + a_org(5) = 42.0; + Kokkos::DynRankView< double, Kokkos::LayoutRight, Kokkos::HostSpace > a_h = a_org; + auto a_h2 = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), a_h ); + auto a_d = Kokkos::create_mirror_view_and_copy( DeviceType(), a_h ); + auto a_h3 = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), a_d ); + + int equal_ptr_h_h2 = a_h.data() == a_h2.data() ? 1 : 0; + int equal_ptr_h_d = a_h.data() == a_d.data() ? 1 : 0; + int equal_ptr_h2_d = a_h2.data() == a_d.data() ? 1 : 0; + int equal_ptr_h3_d = a_h3.data() == a_d.data() ? 1 : 0; + + int is_same_memspace = std::is_same< Kokkos::HostSpace, typename DeviceType::memory_space >::value ? 1 : 0; + ASSERT_EQ( equal_ptr_h_h2, 1 ); + ASSERT_EQ( equal_ptr_h_d, is_same_memspace ); + ASSERT_EQ( equal_ptr_h2_d, is_same_memspace ); + ASSERT_EQ( equal_ptr_h3_d, is_same_memspace ); + + ASSERT_EQ( a_h.extent(0), a_h3.extent(0) ); + ASSERT_EQ( a_h.extent(0), a_h2.extent(0) ); + ASSERT_EQ( a_h.extent(0), a_d .extent(0) ); + ASSERT_EQ( a_h.rank(), a_org.rank() ); + ASSERT_EQ( a_h.rank(), a_h2.rank() ); + ASSERT_EQ( a_h.rank(), a_h3.rank() ); + ASSERT_EQ( a_h.rank(), a_d.rank() ); + ASSERT_EQ( a_org(5), a_h3(5) ); + } + } + static void run_test_scalar() { typedef typename dView0::HostMirror hView0 ; //HostMirror of DynRankView is a DynRankView diff --git a/packages/kokkos/containers/unit_tests/TestOffsetView.hpp b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp new file mode 100644 index 000000000..6965199d4 --- /dev/null +++ b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp @@ -0,0 +1,426 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +/* + * FIXME the OffsetView class is really not very well tested. + */ +#ifndef CONTAINERS_UNIT_TESTS_TESTOFFSETVIEW_HPP_ +#define CONTAINERS_UNIT_TESTS_TESTOFFSETVIEW_HPP_ + + + +#include <gtest/gtest.h> +#include <iostream> +#include <cstdlib> +#include <cstdio> +#include <impl/Kokkos_Timer.hpp> +#include <Kokkos_OffsetView.hpp> +#include <KokkosExp_MDRangePolicy.hpp> + +using std::endl; +using std::cout; + +namespace Test{ + + template <typename Scalar, typename Device> + void test_offsetview_construction(unsigned int size) + { + + typedef Kokkos::Experimental::OffsetView<Scalar**, Device> offset_view_type; + typedef Kokkos::View<Scalar**, Device> view_type; + + Kokkos::Experimental::index_list_type range0 = {-1, 3}; + Kokkos::Experimental::index_list_type range1 = {-2, 2}; + + offset_view_type ov("firstOV", range0, range1); + + ASSERT_EQ("firstOV", ov.label()); + ASSERT_EQ(2, ov.Rank); + + ASSERT_EQ(ov.begin(0), -1); + ASSERT_EQ(ov.end(0), 4); + + ASSERT_EQ(ov.begin(1), -2); + ASSERT_EQ(ov.end(1), 3); + + ASSERT_EQ(ov.extent(0), 5); + ASSERT_EQ(ov.extent(1), 5); + + const int ovmin0 = ov.begin(0); + const int ovend0 = ov.end(0); + const int ovmin1 = ov.begin(1); + const int ovend1 = ov.end(1); + +#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) + { + Kokkos::Experimental::OffsetView<Scalar*, Device> offsetV1("OneDOffsetView", range0); + + Kokkos::RangePolicy<Device, int> rangePolicy1(offsetV1.begin(0), offsetV1.end(0)); + Kokkos::parallel_for(rangePolicy1, KOKKOS_LAMBDA (const int i){ + offsetV1(i) = 1; + } + ); + Kokkos::fence(); + + int OVResult = 0; + Kokkos::parallel_reduce(rangePolicy1, KOKKOS_LAMBDA(const int i, int & updateMe){ + updateMe += offsetV1(i); + }, OVResult); + + Kokkos::fence(); + ASSERT_EQ(OVResult, offsetV1.end(0) - offsetV1.begin(0)) << "found wrong number of elements in OffsetView that was summed."; + + } + { //test deep copy of scalar const value into mirro + const int constVal = 6; + typename offset_view_type::HostMirror hostOffsetView = + Kokkos::Experimental::create_mirror_view(ov); + + Kokkos::Experimental::deep_copy(hostOffsetView, constVal); + + for(int i = hostOffsetView.begin(0); i < hostOffsetView.end(0); ++i) { + for(int j = hostOffsetView.begin(1); j < hostOffsetView.end(1); ++j) { + ASSERT_EQ(hostOffsetView(i,j), constVal) << "Bad data found in OffsetView"; + } + } + } + + typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::point_type point_type; + + range_type rangePolicy2D(point_type{ {ovmin0, ovmin1 } }, + point_type{ { ovend0, ovend1 } }); + + const int constValue = 9; + Kokkos::parallel_for(rangePolicy2D, KOKKOS_LAMBDA (const int i, const int j) { + ov(i,j) = constValue; + } + ); + + //test offsetview to offsetviewmirror deep copy + typename offset_view_type::HostMirror hostOffsetView = + Kokkos::Experimental::create_mirror_view(ov); + + Kokkos::Experimental::deep_copy(hostOffsetView, ov); + + for(int i = hostOffsetView.begin(0); i < hostOffsetView.end(0); ++i) { + for(int j = hostOffsetView.begin(1); j < hostOffsetView.end(1); ++j) { + ASSERT_EQ(hostOffsetView(i,j), constValue) << "Bad data found in OffsetView"; + } + } + + int OVResult = 0; + Kokkos::parallel_reduce(rangePolicy2D, KOKKOS_LAMBDA(const int i, const int j, int & updateMe){ + updateMe += ov(i, j); + }, OVResult); + + int answer = 0; + for(int i = ov.begin(0); i < ov.end(0); ++i) { + for(int j = ov.begin(1); j < ov.end(1); ++j) { + answer += constValue; + } + } + + ASSERT_EQ(OVResult, answer) << "Bad data found in OffsetView"; +#endif + + { + offset_view_type ovCopy(ov); + ASSERT_EQ(ovCopy==ov, true) << + "Copy constructor or equivalence operator broken"; + } + + { + offset_view_type ovAssigned = ov; + ASSERT_EQ(ovAssigned==ov, true) << + "Assignment operator or equivalence operator broken"; + } + + { //construct OffsetView from a View plus begins array + const int extent0 = 100; + const int extent1 = 200; + const int extent2 = 300; + Kokkos::View<Scalar***, Device> view3D("view3D", extent0, extent1, extent2); + + Kokkos::deep_copy(view3D, 1); + + Kokkos::Array<int64_t,3> begins = {{-10, -20, -30}}; + Kokkos::Experimental::OffsetView<Scalar***, Device> offsetView3D(view3D, begins); + + typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<3>, Kokkos::IndexType<int64_t> > range3_type; + typedef typename range3_type::point_type point3_type; + + range3_type rangePolicy3DZero(point3_type{ {0, 0, 0 } }, + point3_type{ { extent0, extent1, extent2 } }); + +#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) + int view3DSum = 0; + Kokkos::parallel_reduce(rangePolicy3DZero, KOKKOS_LAMBDA(const int i, const int j, int k, int & updateMe){ + updateMe += view3D(i, j, k); + }, view3DSum); + + range3_type rangePolicy3D(point3_type{ {begins[0], begins[1], begins[2] } }, + point3_type{ { begins[0] + extent0, begins[1] + extent1, begins[2] + extent2 } }); + int offsetView3DSum = 0; + + Kokkos::parallel_reduce(rangePolicy3D, KOKKOS_LAMBDA(const int i, const int j, int k, int & updateMe){ + updateMe += offsetView3D(i, j, k); + }, offsetView3DSum); + + ASSERT_EQ(view3DSum, offsetView3DSum) << "construction of OffsetView from View and begins array broken."; +#endif + } + view_type viewFromOV = ov.view(); + + ASSERT_EQ(viewFromOV == ov, true) << + "OffsetView::view() or equivalence operator View == OffsetView broken"; + + { + offset_view_type ovFromV(viewFromOV, {-1, -2}); + + ASSERT_EQ(ovFromV == viewFromOV , true) << + "Construction of OffsetView from View or equivalence operator OffsetView == View broken"; + } + { + offset_view_type ovFromV = viewFromOV; + ASSERT_EQ(ovFromV == viewFromOV , true) << + "Construction of OffsetView from View by assignment (implicit conversion) or equivalence operator OffsetView == View broken"; + } + + {// test offsetview to view deep copy + view_type aView("aView", ov.extent(0), ov.extent(1)); + Kokkos::Experimental::deep_copy(aView, ov); + +#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) + int sum = 0; + Kokkos::parallel_reduce(rangePolicy2D, KOKKOS_LAMBDA(const int i, const int j, int & updateMe){ + updateMe += ov(i, j) - aView(i- ov.begin(0), j-ov.begin(1)); + }, sum); + + ASSERT_EQ(sum, 0) << "deep_copy(view, offsetView) broken."; +#endif + } + + {// test view to offsetview deep copy + view_type aView("aView", ov.extent(0), ov.extent(1)); + + Kokkos::deep_copy(aView, 99); + Kokkos::Experimental::deep_copy(ov, aView); + + +#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) + int sum = 0; + Kokkos::parallel_reduce(rangePolicy2D, KOKKOS_LAMBDA(const int i, const int j, int & updateMe){ + updateMe += ov(i, j) - aView(i- ov.begin(0), j-ov.begin(1)); + }, sum); + + ASSERT_EQ(sum, 0) << "deep_copy(offsetView, view) broken."; +#endif + } + } + template <typename Scalar, typename Device> + void test_offsetview_subview(unsigned int size) + { + {//test subview 1 + Kokkos::Experimental::OffsetView<Scalar*, Device> sliceMe("offsetToSlice", {-10, 20}); + { + auto offsetSubviewa = Kokkos::Experimental::subview(sliceMe, 0); + ASSERT_EQ(offsetSubviewa.Rank, 0) << "subview of offset is broken."; + } + + } + {//test subview 2 + Kokkos::Experimental::OffsetView<Scalar**, Device> sliceMe("offsetToSlice", {-10,20}, {-20,30}); + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(),-2); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL()); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + } + + + {//test subview rank 3 + + Kokkos::Experimental::OffsetView<Scalar***, Device> sliceMe("offsetToSlice", {-10,20}, {-20,30}, {-30,40}); + + //slice 1 + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe,Kokkos::ALL(),Kokkos::ALL(), 0); + ASSERT_EQ(offsetSubview.Rank, 2) << "subview of offset is broken."; + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe,Kokkos::ALL(), 0,Kokkos::ALL()); + ASSERT_EQ(offsetSubview.Rank, 2) << "subview of offset is broken."; + } + + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe,0, Kokkos::ALL(),Kokkos::ALL()); + ASSERT_EQ(offsetSubview.Rank, 2) << "subview of offset is broken."; + + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe,0, Kokkos::ALL(), Kokkos::make_pair(-30, -21)); + ASSERT_EQ(offsetSubview.Rank, 2) << "subview of offset is broken."; + + ASSERT_EQ(offsetSubview.begin(0) , -20); + ASSERT_EQ(offsetSubview.end(0) , 31); + ASSERT_EQ(offsetSubview.begin(1) , 0); + ASSERT_EQ(offsetSubview.end(1) , 9); + +#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) + typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::point_type point_type; + + const int b0 = offsetSubview.begin(0); + const int b1 = offsetSubview.begin(1); + + const int e0 = offsetSubview.end(0); + const int e1 = offsetSubview.end(1); + + range_type rangeP2D(point_type{ {b0, b1 } }, point_type{ { e0, e1} }); + + Kokkos::parallel_for(rangeP2D, KOKKOS_LAMBDA(const int i, const int j) { + offsetSubview(i,j) = 6; + } + ); + + int sum = 0; + Kokkos::parallel_reduce(rangeP2D, KOKKOS_LAMBDA(const int i, const int j, int & updateMe){ + updateMe += offsetSubview(i, j); + }, sum); + + ASSERT_EQ(sum, 6*(e0-b0)*(e1-b1)); +#endif + } + + // slice 2 + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, 0); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, 0, Kokkos::ALL()); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL(), 0); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + } + + {//test subview rank 4 + + Kokkos::Experimental::OffsetView<Scalar****, Device> sliceMe("offsetToSlice", {-10,20}, {-20,30}, {-30,40}, {-40, 50}); + + //slice 1 + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(),Kokkos::ALL(), Kokkos::ALL(), 0); + ASSERT_EQ(offsetSubview.Rank, 3) << "subview of offset is broken."; + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), Kokkos::ALL(), 0, Kokkos::ALL()); + ASSERT_EQ(offsetSubview.Rank, 3) << "subview of offset is broken."; + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe ,Kokkos::ALL(), 0, Kokkos::ALL(),Kokkos::ALL()); + ASSERT_EQ(offsetSubview.Rank, 3) << "subview of offset is broken."; + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe , 0, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL() ); + ASSERT_EQ(offsetSubview.Rank, 3) << "subview of offset is broken."; + } + + // slice 2 + auto offsetSubview2a = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), Kokkos::ALL(), 0, 0); + ASSERT_EQ(offsetSubview2a.Rank, 2) << "subview of offset is broken."; + { + auto offsetSubview2b = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, Kokkos::ALL(), 0); + ASSERT_EQ(offsetSubview2b.Rank, 2) << "subview of offset is broken."; + } + { + auto offsetSubview2b = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, 0, Kokkos::ALL()); + ASSERT_EQ(offsetSubview2b.Rank, 2) << "subview of offset is broken."; + } + { + auto offsetSubview2b = Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL(), 0, Kokkos::ALL()); + ASSERT_EQ(offsetSubview2b.Rank, 2) << "subview of offset is broken."; + } + { + auto offsetSubview2b = Kokkos::Experimental::subview(sliceMe, 0, 0, Kokkos::ALL(), Kokkos::ALL()); + ASSERT_EQ(offsetSubview2b.Rank, 2) << "subview of offset is broken."; + } + // slice 3 + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, 0, 0); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL(), 0, 0); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, 0, Kokkos::ALL(), 0); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + { + auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, 0, 0, Kokkos::ALL()); + ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken."; + } + + } + + } + + TEST_F( TEST_CATEGORY, offsetview_construction) { + test_offsetview_construction<int,TEST_EXECSPACE>(10); + } + TEST_F( TEST_CATEGORY, offsetview_subview) { + test_offsetview_subview<int,TEST_EXECSPACE>(10); + } + +} // namespace Test + +#endif /* CONTAINERS_UNIT_TESTS_TESTOFFSETVIEW_HPP_ */ diff --git a/packages/kokkos/containers/unit_tests/TestScatterView.hpp b/packages/kokkos/containers/unit_tests/TestScatterView.hpp index 106d2cf98..d402a91b9 100644 --- a/packages/kokkos/containers/unit_tests/TestScatterView.hpp +++ b/packages/kokkos/containers/unit_tests/TestScatterView.hpp @@ -80,7 +80,9 @@ void test_scatter_view_config(int n) Kokkos::Experimental::contribute(original_view, scatter_view); } #if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) + Kokkos::fence(); auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), original_view); + Kokkos::fence(); for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); @@ -111,9 +113,6 @@ struct TestDuplicatedScatterView { test_scatter_view_config<ExecSpace, Kokkos::LayoutRight, Kokkos::Experimental::ScatterDuplicated, Kokkos::Experimental::ScatterNonAtomic>(n); - test_scatter_view_config<ExecSpace, Kokkos::LayoutRight, - Kokkos::Experimental::ScatterDuplicated, - Kokkos::Experimental::ScatterAtomic>(n); } }; @@ -127,6 +126,16 @@ struct TestDuplicatedScatterView<Kokkos::Cuda> { }; #endif +#ifdef KOKKOS_ENABLE_ROCM +// disable duplicated instantiation with ROCm until +// UniqueToken can support it +template <> +struct TestDuplicatedScatterView<Kokkos::Experimental::ROCm> { + TestDuplicatedScatterView(int) { + } +}; +#endif + template <typename ExecSpace> void test_scatter_view(int n) { @@ -142,16 +151,28 @@ void test_scatter_view(int n) Kokkos::Experimental::ScatterNonDuplicated, Kokkos::Experimental::ScatterNonAtomic>(n); } +#ifdef KOKKOS_ENABLE_SERIAL + if (!std::is_same<ExecSpace, Kokkos::Serial>::value) { +#endif test_scatter_view_config<ExecSpace, Kokkos::LayoutRight, Kokkos::Experimental::ScatterNonDuplicated, Kokkos::Experimental::ScatterAtomic>(n); +#ifdef KOKKOS_ENABLE_SERIAL + } +#endif TestDuplicatedScatterView<ExecSpace> duptest(n); } TEST_F( TEST_CATEGORY, scatterview) { +#ifndef KOKKOS_ENABLE_ROCM test_scatter_view<TEST_EXECSPACE>(10); +#ifdef KOKKOS_ENABLE_DEBUG + test_scatter_view<TEST_EXECSPACE>(100000); +#else test_scatter_view<TEST_EXECSPACE>(10000000); +#endif +#endif } } // namespace Test diff --git a/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp b/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp index 06d7ed824..7ba307079 100644 --- a/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp +++ b/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp @@ -46,6 +46,7 @@ #include <vector> #include <Kokkos_StaticCrsGraph.hpp> +#include <Kokkos_Core.hpp> /*--------------------------------------------------------------------------*/ namespace Test { diff --git a/packages/kokkos/containers/unit_tests/cuda/TestCuda_OffsetView.cpp b/packages/kokkos/containers/unit_tests/cuda/TestCuda_OffsetView.cpp new file mode 100644 index 000000000..546f6d603 --- /dev/null +++ b/packages/kokkos/containers/unit_tests/cuda/TestCuda_OffsetView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<cuda/TestCuda_Category.hpp> +#include<TestOffsetView.hpp> + diff --git a/packages/kokkos/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp b/packages/kokkos/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp new file mode 100644 index 000000000..169dae321 --- /dev/null +++ b/packages/kokkos/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmp/TestOpenMP_Category.hpp> +#include<TestOffsetView.hpp> + diff --git a/packages/kokkos/containers/unit_tests/rocm/TestROCm_Category.hpp b/packages/kokkos/containers/unit_tests/rocm/TestROCm_Category.hpp index 555ddd6bd..d520bbc5a 100644 --- a/packages/kokkos/containers/unit_tests/rocm/TestROCm_Category.hpp +++ b/packages/kokkos/containers/unit_tests/rocm/TestROCm_Category.hpp @@ -60,6 +60,6 @@ protected: } // namespace Test #define TEST_CATEGORY rocm -#define TEST_EXECSPACE Kokkos::ROCm +#define TEST_EXECSPACE Kokkos::Experimental::ROCm #endif diff --git a/packages/kokkos/containers/unit_tests/serial/TestSerial_OffsetView.cpp b/packages/kokkos/containers/unit_tests/serial/TestSerial_OffsetView.cpp new file mode 100644 index 000000000..fadd748ef --- /dev/null +++ b/packages/kokkos/containers/unit_tests/serial/TestSerial_OffsetView.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestOffsetView.hpp> + diff --git a/packages/kokkos/containers/unit_tests/threads/TestThreads_OffsetView.cpp b/packages/kokkos/containers/unit_tests/threads/TestThreads_OffsetView.cpp new file mode 100644 index 000000000..d1eaa265e --- /dev/null +++ b/packages/kokkos/containers/unit_tests/threads/TestThreads_OffsetView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<threads/TestThreads_Category.hpp> +#include<TestOffsetView.hpp> + diff --git a/packages/kokkos/core/src/CMakeLists.txt b/packages/kokkos/core/src/CMakeLists.txt index eb0261670..ab7f3f55c 100644 --- a/packages/kokkos/core/src/CMakeLists.txt +++ b/packages/kokkos/core/src/CMakeLists.txt @@ -108,3 +108,7 @@ else() endif() #----------------------------------------------------------------------------- + +# build and install pkgconfig file +CONFIGURE_FILE(kokkos.pc.in kokkos.pc @ONLY) +INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp index ada3f64fe..c31b7f5b5 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp @@ -208,7 +208,7 @@ struct CudaParallelLaunch< DriverType , const int shmem , const cudaStream_t stream = 0 ) { - if ( grid.x && ( block.x * block.y * block.z ) ) { + if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < sizeof( DriverType ) ) { @@ -264,7 +264,7 @@ struct CudaParallelLaunch< DriverType , const int shmem , const cudaStream_t stream = 0 ) { - if ( grid.x && ( block.x * block.y * block.z ) ) { + if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < sizeof( DriverType ) ) { @@ -321,7 +321,7 @@ struct CudaParallelLaunch< DriverType , const int shmem , const cudaStream_t stream = 0 ) { - if ( grid.x && ( block.x * block.y * block.z ) ) { + if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < sizeof( DriverType ) ) { @@ -370,7 +370,7 @@ struct CudaParallelLaunch< DriverType , const int shmem , const cudaStream_t stream = 0 ) { - if ( grid.x && ( block.x * block.y * block.z ) ) { + if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < sizeof( DriverType ) ) { diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 302cf13d4..4fa460996 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -453,6 +453,8 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space , arg_label.c_str() , SharedAllocationHeader::maximum_label_length ); + // Set last element zero, in case c_str is too long + header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; // Copy to device memory Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) ); @@ -491,6 +493,9 @@ SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space , arg_label.c_str() , SharedAllocationHeader::maximum_label_length ); + + // Set last element zero, in case c_str is too long + RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; } SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: @@ -525,6 +530,8 @@ SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space , arg_label.c_str() , SharedAllocationHeader::maximum_label_length ); + // Set last element zero, in case c_str is too long + RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; } //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp index 8249da6a8..16952a3ae 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp @@ -689,9 +689,13 @@ Cuda::size_type cuda_internal_multiprocessor_count() CudaSpace::size_type cuda_internal_maximum_concurrent_block_count() { + #if defined(KOKKOS_ARCH_KEPLER) + // Compute capability 3.0 through 3.7 + enum : int { max_resident_blocks_per_multiprocessor = 16 }; + #else // Compute capability 5.0 through 6.2 enum : int { max_resident_blocks_per_multiprocessor = 32 }; - + #endif return CudaInternal::singleton().m_multiProcCount * max_resident_blocks_per_multiprocessor ; }; diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp index 31f405dd8..145d93ed7 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp @@ -52,22 +52,22 @@ namespace Kokkos { namespace Impl { -template<class DriverType, bool Large> +template<class DriverType, class LaunchBounds, bool Large> struct CudaGetMaxBlockSize; -template<class DriverType, bool Large = (CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))> +template<class DriverType, class LaunchBounds> int cuda_get_max_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { - return CudaGetMaxBlockSize<DriverType,Large>::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread); + return CudaGetMaxBlockSize<DriverType,LaunchBounds,(CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))>::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread); } template<class DriverType> -struct CudaGetMaxBlockSize<DriverType,true> { +struct CudaGetMaxBlockSize<DriverType,Kokkos::LaunchBounds<>,true> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int numBlocks; - int blockSize=32; + int blockSize=1024; int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); cudaOccupancyMaxActiveBlocksPerMultiprocessor( @@ -76,8 +76,9 @@ struct CudaGetMaxBlockSize<DriverType,true> { blockSize, sharedmem); - while (blockSize<1024 && numBlocks>0) { - blockSize*=2; + if(numBlocks>0) return blockSize; + while (blockSize>32 && numBlocks==0) { + blockSize/=2; sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); @@ -87,19 +88,30 @@ struct CudaGetMaxBlockSize<DriverType,true> { blockSize, sharedmem); } - if(numBlocks>0) return blockSize; - else return blockSize/2; + int blockSizeUpperBound = blockSize*2; + while (blockSize<blockSizeUpperBound && numBlocks>0) { + blockSize+=32; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_constant_memory<DriverType>, + blockSize, + sharedmem); + } + return blockSize - 32; } }; template<class DriverType> -struct CudaGetMaxBlockSize<DriverType,false> { +struct CudaGetMaxBlockSize<DriverType,Kokkos::LaunchBounds<>,false> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int numBlocks; - int blockSize=32; - int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + unsigned int blockSize=1024; + unsigned int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); cudaOccupancyMaxActiveBlocksPerMultiprocessor( &numBlocks, @@ -107,8 +119,9 @@ struct CudaGetMaxBlockSize<DriverType,false> { blockSize, sharedmem); - while (blockSize<1024 && numBlocks>0) { - blockSize*=2; + if(numBlocks>0) return blockSize; + while (blockSize>32 && numBlocks==0) { + blockSize/=2; sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); @@ -118,24 +131,121 @@ struct CudaGetMaxBlockSize<DriverType,false> { blockSize, sharedmem); } - if(numBlocks>0) return blockSize; - else return blockSize/2; + unsigned int blockSizeUpperBound = blockSize*2; + while (blockSize<blockSizeUpperBound && numBlocks>0) { + blockSize+=32; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_local_memory<DriverType>, + blockSize, + sharedmem); + } + return blockSize - 32; } }; +template<class DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM> +struct CudaGetMaxBlockSize<DriverType,Kokkos::LaunchBounds<MaxThreadsPerBlock,MinBlocksPerSM>,true> { + static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + int numBlocks = 0, oldNumBlocks = 0; + unsigned int blockSize=MaxThreadsPerBlock; + unsigned int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_constant_memory<DriverType,MaxThreadsPerBlock,MinBlocksPerSM>, + blockSize, + sharedmem); + + if(static_cast<unsigned int>(numBlocks)>=MinBlocksPerSM) return blockSize; + while (blockSize>32 && static_cast<unsigned int>(numBlocks)<MinBlocksPerSM) { + blockSize/=2; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); -template<class DriverType, bool Large> + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_constant_memory<DriverType>, + blockSize, + sharedmem); + } + unsigned int blockSizeUpperBound = (blockSize*2<MaxThreadsPerBlock?blockSize*2:MaxThreadsPerBlock); + while (blockSize<blockSizeUpperBound && static_cast<unsigned int>(numBlocks)>MinBlocksPerSM) { + blockSize+=32; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + oldNumBlocks = numBlocks; + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_constant_memory<DriverType>, + blockSize, + sharedmem); + } + if(static_cast<unsigned int>(oldNumBlocks)>=MinBlocksPerSM) return blockSize - 32; + return -1; + } +}; + +template<class DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM> +struct CudaGetMaxBlockSize<DriverType,Kokkos::LaunchBounds<MaxThreadsPerBlock,MinBlocksPerSM>,false> { + static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + int numBlocks = 0, oldNumBlocks = 0; + unsigned int blockSize=MaxThreadsPerBlock; + int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_local_memory<DriverType,MaxThreadsPerBlock,MinBlocksPerSM>, + blockSize, + sharedmem); + if(static_cast<unsigned int>(numBlocks)>=MinBlocksPerSM) return blockSize; + + while (blockSize>32 && static_cast<unsigned int>(numBlocks)<MinBlocksPerSM) { + blockSize/=2; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_local_memory<DriverType>, + blockSize, + sharedmem); + } + unsigned int blockSizeUpperBound = (blockSize*2<MaxThreadsPerBlock?blockSize*2:MaxThreadsPerBlock); + while (blockSize<blockSizeUpperBound && static_cast<unsigned int>(numBlocks)>=MinBlocksPerSM) { + blockSize+=32; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + oldNumBlocks = numBlocks; + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_local_memory<DriverType>, + blockSize, + sharedmem); + } + if(static_cast<unsigned int>(oldNumBlocks)>=MinBlocksPerSM) return blockSize - 32; + return -1; + } +}; + + +template<class DriverType, class LaunchBounds, bool Large> struct CudaGetOptBlockSize; -template<class DriverType, bool Large = (CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))> +template<class DriverType, class LaunchBounds> int cuda_get_opt_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { - return CudaGetOptBlockSize<DriverType,Large>::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread); + return CudaGetOptBlockSize<DriverType,LaunchBounds,(CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))>::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread); } template<class DriverType> -struct CudaGetOptBlockSize<DriverType,true> { +struct CudaGetOptBlockSize<DriverType,Kokkos::LaunchBounds<>,true> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int blockSize=16; @@ -165,7 +275,7 @@ struct CudaGetOptBlockSize<DriverType,true> { }; template<class DriverType> -struct CudaGetOptBlockSize<DriverType,false> { +struct CudaGetOptBlockSize<DriverType,Kokkos::LaunchBounds<>,false> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int blockSize=16; @@ -194,6 +304,75 @@ struct CudaGetOptBlockSize<DriverType,false> { } }; +template<class DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM> +struct CudaGetOptBlockSize<DriverType,Kokkos::LaunchBounds< MaxThreadsPerBlock, MinBlocksPerSM >,true> { + static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + int blockSize=16; + int numBlocks; + int sharedmem; + int maxOccupancy=0; + int bestBlockSize=0; + int max_threads_per_block = std::min(MaxThreadsPerBlock,cuda_internal_maximum_warp_count()*CudaTraits::WarpSize); + + while(blockSize < max_threads_per_block ) { + blockSize*=2; + + //calculate the occupancy with that optBlockSize and check whether its larger than the largest one found so far + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_constant_memory<DriverType,MaxThreadsPerBlock,MinBlocksPerSM>, + blockSize, + sharedmem); + if(numBlocks >= int(MinBlocksPerSM) && blockSize<=int(MaxThreadsPerBlock)) { + if(maxOccupancy < numBlocks*blockSize) { + maxOccupancy = numBlocks*blockSize; + bestBlockSize = blockSize; + } + } + } + if(maxOccupancy > 0) + return bestBlockSize; + return -1; + } +}; + +template<class DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM> +struct CudaGetOptBlockSize<DriverType,Kokkos::LaunchBounds< MaxThreadsPerBlock, MinBlocksPerSM >,false> { + static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, + const size_t shmem_extra_block, const size_t shmem_extra_thread) { + int blockSize=16; + int numBlocks; + int sharedmem; + int maxOccupancy=0; + int bestBlockSize=0; + int max_threads_per_block = std::min(MaxThreadsPerBlock,cuda_internal_maximum_warp_count()*CudaTraits::WarpSize); + + while(blockSize < max_threads_per_block ) { + blockSize*=2; + sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) + + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length ); + + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + cuda_parallel_launch_local_memory<DriverType,MaxThreadsPerBlock,MinBlocksPerSM>, + blockSize, + sharedmem); + if(numBlocks >= int(MinBlocksPerSM) && blockSize<=int(MaxThreadsPerBlock)) { + if(maxOccupancy < numBlocks*blockSize) { + maxOccupancy = numBlocks*blockSize; + bestBlockSize = blockSize; + } + } + } + if(maxOccupancy > 0) + return bestBlockSize; + return -1; + } +}; + }} // namespace Kokkos::Impl #endif // KOKKOS_ENABLE_CUDA diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp index 80192bf33..8363a4566 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp @@ -148,6 +148,9 @@ namespace Kokkos { namespace Impl { namespace { static int lock_array_copied = 0; + inline int eliminate_warning_for_lock_array() { + return lock_array_copied; + } } } } diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index eac4abac1..2ae1cc0dd 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -60,6 +60,7 @@ #include <Cuda/Kokkos_Cuda_Internal.hpp> #include <Cuda/Kokkos_Cuda_Locks.hpp> #include <Kokkos_Vectorization.hpp> +#include <Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp> #if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_Interface.hpp> @@ -114,6 +115,7 @@ public: //---------------------------------------- +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > inline static int team_size_max( const FunctorType & functor ) @@ -131,7 +133,35 @@ public: return n ; } +#endif + + template<class FunctorType> + int team_size_max( const FunctorType& f, const ParallelForTag& ) const { + typedef Impl::ParallelFor< FunctorType , TeamPolicy<Properties...> > closure_type; + int block_size = Kokkos::Impl::cuda_get_max_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) ); + return block_size/vector_length(); + } + template<class FunctorType> + int team_size_max( const FunctorType& f, const ParallelReduceTag& ) const { + typedef Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,TeamPolicyInternal,FunctorType> functor_analysis_type; + typedef typename Impl::ParallelReduceReturnValue<void,typename functor_analysis_type::value_type,FunctorType>::reducer_type reducer_type; + typedef Impl::ParallelReduce< FunctorType , TeamPolicy<Properties...>, reducer_type > closure_type; + typedef Impl::FunctorValueTraits< FunctorType , typename traits::work_tag > functor_value_traits; + + int block_size = Kokkos::Impl::cuda_get_max_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) + + ((functor_value_traits::StaticValueSize!=0)?0:functor_value_traits::value_size( f ))); + + // Currently we require Power-of-2 team size for reductions. + int p2 = 1; + while(p2<=block_size) p2*=2; + p2/=2; + return p2/vector_length(); + } + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > static int team_size_recommended( const FunctorType & functor ) { return team_size_max( functor ); } @@ -143,11 +173,41 @@ public: if(max<1) max = 1; return max; } +#endif + + template<class FunctorType> + int team_size_recommended( const FunctorType& f, const ParallelForTag& ) const { + typedef Impl::ParallelFor< FunctorType , TeamPolicy<Properties...> > closure_type; + int block_size = Kokkos::Impl::cuda_get_opt_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double)); + return block_size/vector_length(); + } + + template<class FunctorType> + int team_size_recommended( const FunctorType& f, const ParallelReduceTag& ) const { + typedef Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,TeamPolicyInternal,FunctorType> functor_analysis_type; + typedef typename Impl::ParallelReduceReturnValue<void,typename functor_analysis_type::value_type,FunctorType>::reducer_type reducer_type; + typedef Impl::ParallelReduce< FunctorType , TeamPolicy<Properties...>, reducer_type > closure_type; + typedef Impl::FunctorValueTraits< FunctorType , typename traits::work_tag > functor_value_traits; + + int block_size = Kokkos::Impl::cuda_get_opt_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) + + ((functor_value_traits::StaticValueSize!=0)?0:functor_value_traits::value_size( f ))); + return block_size/vector_length(); + } + inline static int vector_length_max() { return Impl::CudaTraits::WarpSize; } + inline static + int scratch_size_max(int level) + { return (level==0? + 1024*40: // 48kB is the max for CUDA, but we need some for team_member.reduce etc. + 20*1024*1024); // arbitrarily setting this to 20MB, for a Volta V100 that would give us about 3.2GB for 2 teams per SM + } + //---------------------------------------- inline int vector_length() const { return m_vector_length ; } @@ -419,7 +479,7 @@ public: void execute() const { const typename Policy::index_type nwork = m_policy.end() - m_policy.begin(); - const int block_size = Kokkos::Impl::cuda_get_opt_block_size< ParallelFor >( m_functor , 1, 0 , 0 ); + const int block_size = Kokkos::Impl::cuda_get_opt_block_size< ParallelFor, LaunchBounds>( m_functor , 1, 0 , 0 ); const dim3 block( 1 , block_size , 1); const dim3 grid( std::min( typename Policy::index_type(( nwork + block.y - 1 ) / block.y) , typename Policy::index_type(cuda_internal_maximum_grid_count()) ) , 1 , 1); @@ -654,7 +714,7 @@ public: : m_functor( arg_functor ) , m_league_size( arg_policy.league_size() ) , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelFor >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) + Kokkos::Impl::cuda_get_opt_block_size< ParallelFor, LaunchBounds >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) , m_vector_size( arg_policy.vector_length() ) , m_shmem_begin( sizeof(double) * ( m_team_size + 2 ) ) , m_shmem_size( arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( m_functor , m_team_size ) ) @@ -670,7 +730,7 @@ public: } if ( int(m_team_size) > - int(Kokkos::Impl::cuda_get_max_block_size< ParallelFor > + int(Kokkos::Impl::cuda_get_max_block_size< ParallelFor, LaunchBounds > ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > requested too large team size.")); } @@ -725,12 +785,13 @@ public: const Policy m_policy ; const ReducerType m_reducer ; const pointer_type m_result_ptr ; + const bool m_result_ptr_device_accessible ; size_type * m_scratch_space ; size_type * m_scratch_flags ; size_type * m_unified_space ; - // Shall we use the shfl based reduction or not (only use it for static sized types of more than 128bit - enum { UseShflReduction = ((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) }; + // Shall we use the shfl based reduction or not (only use it for static sized types of more than 128bit) + enum { UseShflReduction = false };//((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) }; // Some crutch to do function overloading private: typedef double DummyShflReductionType; @@ -752,12 +813,12 @@ public: __device__ inline void operator() () const { - run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) ); +/* run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) ); } __device__ inline void run(const DummySHMEMReductionType& ) const - { + {*/ const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) ); @@ -786,7 +847,8 @@ public: // This is the final block with the final result at the final threads' location size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ; - size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; + size_type * const global = m_result_ptr_device_accessible? reinterpret_cast<size_type*>(m_result_ptr) : + ( m_unified_space ? m_unified_space : m_scratch_space ); if ( threadIdx.y == 0 ) { Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); @@ -798,10 +860,9 @@ public: } } - __device__ inline +/* __device__ inline void run(const DummyShflReductionType&) const { - value_type value; ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &value); // Number of blocks is bounded so that the reduction can be limited to two passes. @@ -832,7 +893,7 @@ public: *result = value; } } - } + }*/ // Determine block size constrained by shared memory: static inline @@ -863,16 +924,18 @@ public: CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute - Cuda::fence(); + if(!m_result_ptr_device_accessible) { + Cuda::fence(); - if ( m_result_ptr ) { - if ( m_unified_space ) { - const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); - for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } - } - else { - const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); - DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size ); + if ( m_result_ptr ) { + if ( m_unified_space ) { + const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } + } + else { + const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); + DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size ); + } } } } @@ -883,17 +946,18 @@ public: } } - template< class HostViewType > + template< class ViewType > ParallelReduce( const FunctorType & arg_functor , const Policy & arg_policy - , const HostViewType & arg_result + , const ViewType & arg_result , typename std::enable_if< - Kokkos::is_view< HostViewType >::value + Kokkos::is_view< ViewType >::value ,void*>::type = NULL) : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( InvalidType() ) , m_result_ptr( arg_result.data() ) + , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ViewType::memory_space>::accessible ) , m_scratch_space( 0 ) , m_scratch_flags( 0 ) , m_unified_space( 0 ) @@ -906,6 +970,7 @@ public: , m_policy( arg_policy ) , m_reducer( reducer ) , m_result_ptr( reducer.view().data() ) + , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ReducerType::result_view_type::memory_space>::accessible ) , m_scratch_space( 0 ) , m_scratch_flags( 0 ) , m_unified_space( 0 ) @@ -953,6 +1018,7 @@ public: const Policy m_policy ; // used for workrange and nwork const ReducerType m_reducer ; const pointer_type m_result_ptr ; + const bool m_result_ptr_device_accessible ; size_type * m_scratch_space ; size_type * m_scratch_flags ; size_type * m_unified_space ; @@ -960,7 +1026,7 @@ public: typedef typename Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank, Policy, FunctorType, typename Policy::work_tag, reference_type> DeviceIteratePattern; // Shall we use the shfl based reduction or not (only use it for static sized types of more than 128bit - enum { UseShflReduction = ((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) }; + enum { UseShflReduction = ((sizeof(value_type)>2*sizeof(double)) && (ValueTraits::StaticValueSize!=0)) }; // Some crutch to do function overloading private: typedef double DummyShflReductionType; @@ -978,12 +1044,12 @@ public: inline __device__ void operator() (void) const { - run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) ); +/* run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) ); } __device__ inline void run(const DummySHMEMReductionType& ) const - { + {*/ const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) ); @@ -1007,7 +1073,8 @@ public: // This is the final block with the final result at the final threads' location size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ; - size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; + size_type * const global = m_result_ptr_device_accessible? reinterpret_cast<size_type*>(m_result_ptr) : + ( m_unified_space ? m_unified_space : m_scratch_space ); if ( threadIdx.y == 0 ) { Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); @@ -1019,7 +1086,7 @@ public: } } - __device__ inline +/* __device__ inline void run(const DummyShflReductionType&) const { @@ -1051,7 +1118,7 @@ public: } } } - +*/ // Determine block size constrained by shared memory: static inline unsigned local_block_size( const FunctorType & f ) @@ -1089,16 +1156,18 @@ public: CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute - Cuda::fence(); + if(!m_result_ptr_device_accessible) { + Cuda::fence(); - if ( m_result_ptr ) { - if ( m_unified_space ) { - const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); - for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } - } - else { - const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); - DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size ); + if ( m_result_ptr ) { + if ( m_unified_space ) { + const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } + } + else { + const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); + DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size ); + } } } } @@ -1109,17 +1178,18 @@ public: } } - template< class HostViewType > + template< class ViewType > ParallelReduce( const FunctorType & arg_functor , const Policy & arg_policy - , const HostViewType & arg_result + , const ViewType & arg_result , typename std::enable_if< - Kokkos::is_view< HostViewType >::value + Kokkos::is_view< ViewType >::value ,void*>::type = NULL) : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( InvalidType() ) , m_result_ptr( arg_result.data() ) + , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ViewType::memory_space>::accessible ) , m_scratch_space( 0 ) , m_scratch_flags( 0 ) , m_unified_space( 0 ) @@ -1132,6 +1202,7 @@ public: , m_policy( arg_policy ) , m_reducer( reducer ) , m_result_ptr( reducer.view().data() ) + , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ReducerType::result_view_type::memory_space>::accessible ) , m_scratch_space( 0 ) , m_scratch_flags( 0 ) , m_unified_space( 0 ) @@ -1174,7 +1245,7 @@ public: typedef FunctorType functor_type ; typedef Cuda::size_type size_type ; - enum { UseShflReduction = (true && ValueTraits::StaticValueSize) }; + enum { UseShflReduction = (true && (ValueTraits::StaticValueSize!=0)) }; private: typedef double DummyShflReductionType; @@ -1191,6 +1262,7 @@ private: const FunctorType m_functor ; const ReducerType m_reducer ; const pointer_type m_result_ptr ; + const bool m_result_ptr_device_accessible ; size_type * m_scratch_space ; size_type * m_scratch_flags ; size_type * m_unified_space ; @@ -1279,7 +1351,8 @@ public: // This is the final block with the final result at the final threads' location size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ; - size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; + size_type * const global = m_result_ptr_device_accessible? reinterpret_cast<size_type*>(m_result_ptr) : + ( m_unified_space ? m_unified_space : m_scratch_space ); if ( threadIdx.y == 0 ) { Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); @@ -1312,12 +1385,18 @@ public: , value ); } - pointer_type const result = (pointer_type) (m_unified_space ? m_unified_space : m_scratch_space) ; + pointer_type const result = m_result_ptr_device_accessible? m_result_ptr : + (pointer_type) ( m_unified_space ? m_unified_space : m_scratch_space ); value_type init; ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init); - if(Impl::cuda_inter_block_reduction<FunctorType,ValueJoin,WorkTag> - (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,blockDim.y)) { + if( + Impl::cuda_inter_block_reduction<FunctorType,ValueJoin,WorkTag> + (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,blockDim.y) + //This breaks a test + // Kokkos::Impl::CudaReductionsFunctor<FunctorType,WorkTag,false,true>::scalar_inter_block_reduction(ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x , + // kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags) + ) { const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; if(id==0) { Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); @@ -1331,7 +1410,7 @@ public: { const int nwork = m_league_size * m_team_size ; if ( nwork ) { - const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024) ) + const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024*32) ) :std::min( m_league_size , m_team_size ); m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count ); @@ -1344,16 +1423,18 @@ public: CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem_size_total ); // copy to device and execute - Cuda::fence(); - - if ( m_result_ptr ) { - if ( m_unified_space ) { - const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); - for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } - } - else { - const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); - DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size ); + if(!m_result_ptr_device_accessible) { + Cuda::fence(); + + if ( m_result_ptr ) { + if ( m_unified_space ) { + const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; } + } + else { + const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); + DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size ); + } } } } @@ -1364,16 +1445,17 @@ public: } } - template< class HostViewType > + template< class ViewType > ParallelReduce( const FunctorType & arg_functor , const Policy & arg_policy - , const HostViewType & arg_result + , const ViewType & arg_result , typename std::enable_if< - Kokkos::is_view< HostViewType >::value + Kokkos::is_view< ViewType >::value ,void*>::type = NULL) : m_functor( arg_functor ) , m_reducer( InvalidType() ) , m_result_ptr( arg_result.data() ) + , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ViewType::memory_space>::accessible ) , m_scratch_space( 0 ) , m_scratch_flags( 0 ) , m_unified_space( 0 ) @@ -1383,17 +1465,17 @@ public: , m_scratch_ptr{NULL,NULL} , m_scratch_size{ arg_policy.scratch_size(0,( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) ), arg_policy.scratch_size(1,( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) )} , m_league_size( arg_policy.league_size() ) , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) , m_vector_size( arg_policy.vector_length() ) @@ -1430,9 +1512,7 @@ public: Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch memory")); } - if ( unsigned(m_team_size) > - unsigned(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce > - ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { + if ( int(m_team_size) > arg_policy.team_size_max(m_functor,ParallelReduceTag()) ) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size.")); } @@ -1444,6 +1524,7 @@ public: : m_functor( arg_functor ) , m_reducer( reducer ) , m_result_ptr( reducer.view().data() ) + , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ReducerType::result_view_type::memory_space>::accessible ) , m_scratch_space( 0 ) , m_scratch_flags( 0 ) , m_unified_space( 0 ) @@ -1453,7 +1534,7 @@ public: , m_scratch_ptr{NULL,NULL} , m_league_size( arg_policy.league_size() ) , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) , m_vector_size( arg_policy.vector_length() ) @@ -1486,10 +1567,7 @@ public: CudaTraits::SharedMemoryCapacity < shmem_size_total ) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); } - - if ( int(m_team_size) > - int(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce > - ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { + if ( int(m_team_size) > arg_policy.team_size_max(m_functor,ParallelReduceTag()) ) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size.")); } @@ -1753,7 +1831,7 @@ public: // Occupancy calculator assumes whole block. m_team_size = - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce > + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds > ( arg_functor , arg_policy.vector_length() , arg_policy.team_scratch_size(0) @@ -1970,7 +2048,9 @@ private: const WorkRange range( m_policy , blockIdx.x , gridDim.x ); for ( typename Policy::member_type iwork_base = range.begin(); iwork_base < range.end() ; iwork_base += blockDim.y ) { - + #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned MASK=KOKKOS_IMPL_CUDA_ACTIVEMASK; + #endif const typename Policy::member_type iwork = iwork_base + threadIdx.y ; __syncthreads(); // Don't overwrite previous iteration values until they are used @@ -1981,7 +2061,11 @@ private: for ( unsigned i = threadIdx.y ; i < word_count.value ; ++i ) { shared_data[i + word_count.value] = shared_data[i] = shared_accum[i] ; } - + #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + KOKKOS_IMPL_CUDA_SYNCWARP_MASK(MASK); + #else + KOKKOS_IMPL_CUDA_SYNCWARP_MASK; + #endif if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values. // Call functor to accumulate inclusive scan value for this work item @@ -2189,6 +2273,9 @@ private: const WorkRange range( m_policy , blockIdx.x , gridDim.x ); for ( typename Policy::member_type iwork_base = range.begin(); iwork_base < range.end() ; iwork_base += blockDim.y ) { + #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned MASK=KOKKOS_IMPL_CUDA_ACTIVEMASK; + #endif const typename Policy::member_type iwork = iwork_base + threadIdx.y ; @@ -2201,6 +2288,11 @@ private: shared_data[i + word_count.value] = shared_data[i] = shared_accum[i] ; } + #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + KOKKOS_IMPL_CUDA_SYNCWARP_MASK(MASK); + #else + KOKKOS_IMPL_CUDA_SYNCWARP_MASK; + #endif if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values. // Call functor to accumulate inclusive scan value for this work item diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index 676ba38c6..82d691f7d 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -194,8 +194,9 @@ void cuda_shfl_up( T & out , T const & in , int delta , */ template< class ValueType , class JoinOp> -__device__ -inline void cuda_intra_warp_reduction( ValueType& result, +__device__ inline +typename std::enable_if< !Kokkos::is_reducer<ValueType>::value >::type +cuda_intra_warp_reduction( ValueType& result, const JoinOp& join, const uint32_t max_active_thread = blockDim.y) { @@ -214,8 +215,9 @@ inline void cuda_intra_warp_reduction( ValueType& result, } template< class ValueType , class JoinOp> -__device__ -inline void cuda_inter_warp_reduction( ValueType& value, +__device__ inline +typename std::enable_if< !Kokkos::is_reducer<ValueType>::value >::type +cuda_inter_warp_reduction( ValueType& value, const JoinOp& join, const int max_active_thread = blockDim.y) { @@ -247,8 +249,9 @@ inline void cuda_inter_warp_reduction( ValueType& value, } template< class ValueType , class JoinOp> -__device__ -inline void cuda_intra_block_reduction( ValueType& value, +__device__ inline +typename std::enable_if< !Kokkos::is_reducer<ValueType>::value >::type +cuda_intra_block_reduction( ValueType& value, const JoinOp& join, const int max_active_thread = blockDim.y) { cuda_intra_warp_reduction(value,join,max_active_thread); @@ -314,31 +317,52 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT if( id + 1 < int(gridDim.x) ) join(value, tmp); } - int active = KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK; + int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif if (int(blockDim.x*blockDim.y) > 2) { value_type tmp = Kokkos::shfl_down(value, 2,32); if( id + 2 < int(gridDim.x) ) join(value, tmp); } - active += KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif if (int(blockDim.x*blockDim.y) > 4) { value_type tmp = Kokkos::shfl_down(value, 4,32); if( id + 4 < int(gridDim.x) ) join(value, tmp); } - active += KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif if (int(blockDim.x*blockDim.y) > 8) { value_type tmp = Kokkos::shfl_down(value, 8,32); if( id + 8 < int(gridDim.x) ) join(value, tmp); } - active += KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif if (int(blockDim.x*blockDim.y) > 16) { value_type tmp = Kokkos::shfl_down(value, 16,32); if( id + 16 < int(gridDim.x) ) join(value, tmp); } - active += KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif } } //The last block has in its thread=0 the global reduction value through "value" @@ -478,31 +502,52 @@ cuda_inter_block_reduction( const ReducerType& reducer, if( id + 1 < int(gridDim.x) ) reducer.join(value, tmp); } - int active = KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK; + int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif if (int(blockDim.x*blockDim.y) > 2) { value_type tmp = Kokkos::shfl_down(value, 2,32); if( id + 2 < int(gridDim.x) ) reducer.join(value, tmp); } - active += KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif if (int(blockDim.x*blockDim.y) > 4) { value_type tmp = Kokkos::shfl_down(value, 4,32); if( id + 4 < int(gridDim.x) ) reducer.join(value, tmp); } - active += KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif if (int(blockDim.x*blockDim.y) > 8) { value_type tmp = Kokkos::shfl_down(value, 8,32); if( id + 8 < int(gridDim.x) ) reducer.join(value, tmp); } - active += KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif if (int(blockDim.x*blockDim.y) > 16) { value_type tmp = Kokkos::shfl_down(value, 16,32); if( id + 16 < int(gridDim.x) ) reducer.join(value, tmp); } - active += KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif } } @@ -513,6 +558,213 @@ cuda_inter_block_reduction( const ReducerType& reducer, #endif } +template<class FunctorType, class ArgTag, bool DoScan, bool UseShfl> +struct CudaReductionsFunctor; + +template<class FunctorType, class ArgTag> +struct CudaReductionsFunctor<FunctorType, ArgTag, false, true> { + typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ; + typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ; + typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ; + typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ; + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::value_type Scalar; + + __device__ + static inline void scalar_intra_warp_reduction( + const FunctorType& functor, + Scalar value, // Contribution + const bool skip_vector, // Skip threads if Kokkos vector lanes are not part of the reduction + const int width, // How much of the warp participates + Scalar& result) + { + unsigned mask = width==32?0xffffffff:((1<<width)-1)<<((threadIdx.y*blockDim.x+threadIdx.x)%(32/width))*width; + for(int delta=skip_vector?blockDim.x:1; delta<width; delta*=2) { + Scalar tmp; + cuda_shfl_down(tmp,value,delta,width,mask); + ValueJoin::join( functor , &value, &tmp); + } + + cuda_shfl(result,value,0,width,mask); + } + + + __device__ + static inline void scalar_intra_block_reduction( + const FunctorType& functor, + Scalar value, + const bool skip, + Scalar* my_global_team_buffer_element, + const int shared_elements, + Scalar* shared_team_buffer_element) { + + const int warp_id = (threadIdx.y*blockDim.x)/32; + Scalar* const my_shared_team_buffer_element = + shared_team_buffer_element + warp_id%shared_elements; + + // Warp Level Reduction, ignoring Kokkos vector entries + scalar_intra_warp_reduction(functor,value,skip,32,value); + + if(warp_id<shared_elements) { + *my_shared_team_buffer_element=value; + } + // Wait for every warp to be done before using one warp to do final cross warp reduction + __syncthreads(); + + const int num_warps = blockDim.x*blockDim.y/32; + for(int w = shared_elements; w<num_warps; w+=shared_elements) { + if(warp_id>=w && warp_id<w+shared_elements) { + if((threadIdx.y*blockDim.x + threadIdx.x)%32==0) + ValueJoin::join( functor , my_shared_team_buffer_element, &value); + } + __syncthreads(); + } + + + if( warp_id == 0) { + ValueInit::init( functor , &value ); + for(unsigned int i=threadIdx.y*blockDim.x+threadIdx.x; i<blockDim.y*blockDim.x/32; i+=32) + ValueJoin::join( functor , &value,&shared_team_buffer_element[i]); + scalar_intra_warp_reduction(functor,value,false,32,*my_global_team_buffer_element); + } + } + + __device__ + static inline bool scalar_inter_block_reduction( + const FunctorType & functor , + const Cuda::size_type block_id , + const Cuda::size_type block_count , + Cuda::size_type * const shared_data , + Cuda::size_type * const global_data , + Cuda::size_type * const global_flags ) { + Scalar* const global_team_buffer_element = ((Scalar*) global_data); + Scalar* const my_global_team_buffer_element = global_team_buffer_element + blockIdx.x; + Scalar* shared_team_buffer_elements = ((Scalar*) shared_data); + Scalar value = shared_team_buffer_elements[threadIdx.y]; + int shared_elements=blockDim.x*blockDim.y/32; + int global_elements=block_count; + __syncthreads(); + + scalar_intra_block_reduction(functor,value,true,my_global_team_buffer_element,shared_elements,shared_team_buffer_elements); + __syncthreads(); + unsigned int num_teams_done = 0; + if(threadIdx.x + threadIdx.y == 0) { + __threadfence(); + num_teams_done = Kokkos::atomic_fetch_add(global_flags,1)+1; + } + bool is_last_block = false; + if(__syncthreads_or(num_teams_done == gridDim.x)) { + is_last_block=true; + *global_flags = 0; + ValueInit::init( functor, &value); + for(int i=threadIdx.y*blockDim.x+threadIdx.x; i<global_elements; i+=blockDim.x*blockDim.y) { + ValueJoin::join( functor , &value,&global_team_buffer_element[i]); + } + scalar_intra_block_reduction(functor,value,false,shared_team_buffer_elements+(blockDim.y-1),shared_elements,shared_team_buffer_elements); + } + return is_last_block; + } +}; + +template<class FunctorType, class ArgTag> +struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> { + typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ; + typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ; + typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ; + typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ; + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::value_type Scalar; + + __device__ + static inline void scalar_intra_warp_reduction( + const FunctorType& functor, + Scalar* value, // Contribution + const bool skip_vector, // Skip threads if Kokkos vector lanes are not part of the reduction + const int width) // How much of the warp participates + { +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned mask = width==32?0xffffffff:((1<<width)-1)<<((threadIdx.y*blockDim.x+threadIdx.x)%(32/width))*width; +#endif + const int lane_id = (threadIdx.y*blockDim.x+threadIdx.x)%32; + for(int delta=skip_vector?blockDim.x:1; delta<width; delta*=2) { + if(lane_id + delta<32) { + ValueJoin::join( functor , value, value+delta); + } +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + KOKKOS_IMPL_CUDA_SYNCWARP_MASK(mask); +#else + KOKKOS_IMPL_CUDA_SYNCWARP_MASK; +#endif + } + *value=*(value-lane_id); + } + + + __device__ + static inline void scalar_intra_block_reduction( + const FunctorType& functor, + Scalar value, + const bool skip, + Scalar* result, + const int shared_elements, + Scalar* shared_team_buffer_element) { + + const int warp_id = (threadIdx.y*blockDim.x)/32; + Scalar* const my_shared_team_buffer_element = + shared_team_buffer_element + threadIdx.y*blockDim.x+threadIdx.x; + *my_shared_team_buffer_element = value; + // Warp Level Reduction, ignoring Kokkos vector entries + scalar_intra_warp_reduction(functor,my_shared_team_buffer_element,skip,32); + // Wait for every warp to be done before using one warp to do final cross warp reduction + __syncthreads(); + + if( warp_id == 0) { + const unsigned int delta = (threadIdx.y*blockDim.x+threadIdx.x)*32; + if(delta<blockDim.x*blockDim.y) + *my_shared_team_buffer_element = shared_team_buffer_element[delta]; + KOKKOS_IMPL_CUDA_SYNCWARP; + scalar_intra_warp_reduction(functor,my_shared_team_buffer_element,false,blockDim.x*blockDim.y/32); + if(threadIdx.x + threadIdx.y == 0) *result = *shared_team_buffer_element; + } + } + + __device__ + static inline bool scalar_inter_block_reduction( + const FunctorType & functor , + const Cuda::size_type block_id , + const Cuda::size_type block_count , + Cuda::size_type * const shared_data , + Cuda::size_type * const global_data , + Cuda::size_type * const global_flags ) { + Scalar* const global_team_buffer_element = ((Scalar*) global_data); + Scalar* const my_global_team_buffer_element = global_team_buffer_element + blockIdx.x; + Scalar* shared_team_buffer_elements = ((Scalar*) shared_data); + Scalar value = shared_team_buffer_elements[threadIdx.y]; + int shared_elements=blockDim.x*blockDim.y/32; + int global_elements=block_count; + __syncthreads(); + + scalar_intra_block_reduction(functor,value,true,my_global_team_buffer_element,shared_elements,shared_team_buffer_elements); + __syncthreads(); + + unsigned int num_teams_done = 0; + if(threadIdx.x + threadIdx.y == 0) { + __threadfence(); + num_teams_done = Kokkos::atomic_fetch_add(global_flags,1)+1; + } + bool is_last_block = false; + if(__syncthreads_or(num_teams_done == gridDim.x)) { + is_last_block=true; + *global_flags = 0; + ValueInit::init( functor, &value); + for(int i=threadIdx.y*blockDim.x+threadIdx.x; i<global_elements; i+=blockDim.x*blockDim.y) { + ValueJoin::join( functor , &value,&global_team_buffer_element[i]); + } + scalar_intra_block_reduction(functor,value,false,shared_team_buffer_elements+(blockDim.y-1),shared_elements,shared_team_buffer_elements); + } + return is_last_block; + } +}; //---------------------------------------------------------------------------- // See section B.17 of Cuda C Programming Guide Version 3.2 // for discussion of @@ -639,14 +891,15 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor , * * Global reduce result is in the last threads' 'shared_data' location. */ + template< bool DoScan , class FunctorType , class ArgTag > __device__ -bool cuda_single_inter_block_reduce_scan( const FunctorType & functor , - const Cuda::size_type block_id , - const Cuda::size_type block_count , - Cuda::size_type * const shared_data , - Cuda::size_type * const global_data , - Cuda::size_type * const global_flags ) +bool cuda_single_inter_block_reduce_scan2( const FunctorType & functor , + const Cuda::size_type block_id , + const Cuda::size_type block_count , + Cuda::size_type * const shared_data , + Cuda::size_type * const global_data , + Cuda::size_type * const global_flags ) { typedef Cuda::size_type size_type ; typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ; @@ -655,7 +908,6 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType & functor , typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ; typedef typename ValueTraits::pointer_type pointer_type ; - //typedef typename ValueTraits::reference_type reference_type ; // '__ffs' = position of the least significant bit set to 1. // 'blockDim.y' is guaranteed to be a power of two so this @@ -678,12 +930,7 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType & functor , size_type * const shared = shared_data + word_count.value * BlockSizeMask ; size_type * const global = global_data + word_count.value * block_id ; -//#if (__CUDA_ARCH__ < 500) for ( int i = int(threadIdx.y) ; i < int(word_count.value) ; i += int(blockDim.y) ) { global[i] = shared[i] ; } -//#else -// for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; } -//#endif - } // Contributing blocks note that their contribution has been completed via an atomic-increment flag @@ -725,6 +972,22 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType & functor , return is_last_block ; } +template< bool DoScan , class FunctorType , class ArgTag > +__device__ +bool cuda_single_inter_block_reduce_scan( const FunctorType & functor , + const Cuda::size_type block_id , + const Cuda::size_type block_count , + Cuda::size_type * const shared_data , + Cuda::size_type * const global_data , + Cuda::size_type * const global_flags ) +{ + typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ; + if(!DoScan && ValueTraits::StaticValueSize) + return Kokkos::Impl::CudaReductionsFunctor<FunctorType,ArgTag,false,(ValueTraits::StaticValueSize>16)>::scalar_inter_block_reduction(functor,block_id,block_count,shared_data,global_data,global_flags); + else + return cuda_single_inter_block_reduce_scan2<DoScan, FunctorType, ArgTag>(functor, block_id, block_count, shared_data, global_data, global_flags); +} + // Size in bytes required for inter block reduce or scan template< bool DoScan , class FunctorType , class ArgTag > inline diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp index 73ec409b2..9eb32f07c 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp @@ -160,7 +160,7 @@ public: template<class ValueType> KOKKOS_INLINE_FUNCTION - void team_broadcast( ValueType & val, const int& thread_id) const + void team_broadcast( ValueType & val, const int& thread_id ) const { #ifdef __CUDA_ARCH__ if ( 1 == blockDim.z ) { // team == block @@ -178,6 +178,29 @@ public: } #endif } + + template<class Closure, class ValueType> + KOKKOS_INLINE_FUNCTION + void team_broadcast( Closure const & f, ValueType & val, const int& thread_id ) const + { + #ifdef __CUDA_ARCH__ + f( val ); + + if ( 1 == blockDim.z ) { // team == block + __syncthreads(); + // Wait for shared data write until all threads arrive here + if ( threadIdx.x == 0u && threadIdx.y == (uint32_t)thread_id ) { + *((ValueType*) m_team_reduce) = val ; + } + __syncthreads(); // Wait for shared data read until root thread writes + val = *((ValueType*) m_team_reduce); + } + else { // team <= warp + ValueType tmp( val ); // input might not be a register variable + cuda_shfl( val, tmp, blockDim.x * thread_id, blockDim.x * blockDim.y ); + } + #endif + } //-------------------------------------------------------------------------- /**\brief Reduction across a team @@ -200,92 +223,7 @@ public: team_reduce( ReducerType const & reducer ) const noexcept { #ifdef __CUDA_ARCH__ - - typedef typename ReducerType::value_type value_type ; - - value_type tmp( reducer.reference() ); - - // reduce within the warp using shuffle - - const int wx = - ( threadIdx.x + blockDim.x * threadIdx.y ) & CudaTraits::WarpIndexMask ; - - for ( int i = CudaTraits::WarpSize ; (int)blockDim.x <= ( i >>= 1 ) ; ) { - - cuda_shfl_down( reducer.reference() , tmp , i , CudaTraits::WarpSize ); - - // Root of each vector lane reduces: - if ( 0 == threadIdx.x && wx < i ) { - reducer.join( tmp , reducer.reference() ); - } - } - - if ( 1 < blockDim.z ) { // team <= warp - // broadcast result from root vector lange of root thread - - cuda_shfl( reducer.reference() , tmp - , blockDim.x * threadIdx.y , CudaTraits::WarpSize ); - - } - else { // team == block - // Reduce across warps using shared memory - // Broadcast result within block - - // Number of warps, blockDim.y may not be power of two: - const int nw = ( blockDim.x * blockDim.y + CudaTraits::WarpIndexMask ) >> CudaTraits::WarpIndexShift ; - - // Warp index: - const int wy = ( blockDim.x * threadIdx.y ) >> CudaTraits::WarpIndexShift ; - - // Number of shared memory entries for the reduction: - int nsh = m_team_reduce_size / sizeof(value_type); - - // Using at most one entry per warp: - if ( nw < nsh ) nsh = nw ; - - __syncthreads(); // Wait before shared data write - - if ( 0 == wx && wy < nsh ) { - ((value_type*) m_team_reduce)[wy] = tmp ; - } - - // When more warps than shared entries: - for ( int i = nsh ; i < nw ; i += nsh ) { - - __syncthreads(); - - if ( 0 == wx && i <= wy ) { - const int k = wy - i ; - if ( k < nsh ) { - reducer.join( *((value_type*) m_team_reduce + k) , tmp ); - } - } - } - - __syncthreads(); - - // One warp performs the inter-warp reduction: - - if ( 0 == wy ) { - - // Start at power of two covering nsh - - for ( int i = 1 << ( 32 - __clz(nsh-1) ) ; ( i >>= 1 ) ; ) { - const int k = wx + i ; - if ( wx < i && k < nsh ) { - reducer.join( ((value_type*)m_team_reduce)[wx] - , ((value_type*)m_team_reduce)[k] ); - __threadfence_block(); - } - } - } - - __syncthreads(); // Wait for reduction - - // Broadcast result to all threads - reducer.reference() = *((value_type*)m_team_reduce); - } - + cuda_intra_block_reduction(reducer,blockDim.y); #endif /* #ifdef __CUDA_ARCH__ */ } @@ -801,7 +739,11 @@ void parallel_for ; i += blockDim.x ) { closure(i); } + #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x); + #else + KOKKOS_IMPL_CUDA_SYNCWARP_MASK; + #endif #endif } @@ -970,7 +912,11 @@ KOKKOS_INLINE_FUNCTION void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0) lambda(); + #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x); + #else + KOKKOS_IMPL_CUDA_SYNCWARP_MASK; + #endif #endif } @@ -979,7 +925,11 @@ KOKKOS_INLINE_FUNCTION void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0 && threadIdx.y == 0) lambda(); + #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x); + #else + KOKKOS_IMPL_CUDA_SYNCWARP_MASK; + #endif #endif } diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp index 9a9448f67..25951b81b 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp @@ -2,9 +2,11 @@ #if defined( __CUDA_ARCH__ ) #if ( CUDA_VERSION < 9000 ) +#define KOKKOS_IMPL_CUDA_ACTIVEMASK 0 #define KOKKOS_IMPL_CUDA_SYNCWARP __threadfence_block() -#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(x) __threadfence_block() +#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK __threadfence_block() #define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot(x) +#define KOKKOS_IMPL_CUDA_BALLOT_MASK(x) __ballot(x) #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl(x,y,z) #define KOKKOS_IMPL_CUDA_SHFL_MASK(m,x,y,z) __shfl(x,y,z) #define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up(x,y,z) @@ -12,9 +14,11 @@ #define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) __shfl_down(x,y,z) #define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m,x,y,z) __shfl_down(x,y,z) #else +#define KOKKOS_IMPL_CUDA_ACTIVEMASK __activemask() #define KOKKOS_IMPL_CUDA_SYNCWARP __syncwarp(0xffffffff) -#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) __syncwarp(m) +#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) __syncwarp(m); #define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot_sync(__activemask(),x) +#define KOKKOS_IMPL_CUDA_BALLOT_MASK(m,x) __ballot_sync(m,x) #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl_sync(0xffffffff,x,y,z) #define KOKKOS_IMPL_CUDA_SHFL_MASK(m,x,y,z) __shfl_sync(m,x,y,z) #define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up_sync(0xffffffff,x,y,z) @@ -23,11 +27,16 @@ #define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m,x,y,z) __shfl_down_sync(m,x,y,z) #endif #else +#define KOKKOS_IMPL_CUDA_ACTIVEMASK 0 #define KOKKOS_IMPL_CUDA_SYNCWARP +#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK #define KOKKOS_IMPL_CUDA_BALLOT(x) 0 +#define KOKKOS_IMPL_CUDA_BALLOT_MASK(x) 0 #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) 0 +#define KOKKOS_IMPL_CUDA_SHFL_MASK(m,x,y,z) 0 #define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) 0 #define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) 0 +#define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m,x,y,z) 0 #endif #if ( CUDA_VERSION >= 9000 ) && (!defined(KOKKOS_COMPILER_CLANG)) diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index 49b11f3ae..af2aff8b3 100644 --- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -279,6 +279,8 @@ public: KOKKOS_INLINE_FUNCTION static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker ) { + if(arg_data_ptr == NULL) return handle_type(); + #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) // Assignment of texture = non-texture requires creation of a texture object // which can only occur on the host. In addition, 'get_record' is only valid diff --git a/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp index 976bdf05d..fb0d6cde8 100644 --- a/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp +++ b/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -46,6 +46,8 @@ #include <initializer_list> +#include <Kokkos_Layout.hpp> + #include<impl/KokkosExp_Host_IterateTile.hpp> #include <Kokkos_ExecPolicy.hpp> #include <Kokkos_Parallel.hpp> @@ -63,13 +65,15 @@ namespace Kokkos { // ------------------------------------------------------------------ // - +// Moved to Kokkos_Layout.hpp for more general accessibility +/* enum class Iterate { Default, // Default for the device Left, // Left indices stride fastest Right, // Right indices stride fastest }; +*/ template <typename ExecSpace> struct default_outer_direction diff --git a/packages/kokkos/core/src/Kokkos_Array.hpp b/packages/kokkos/core/src/Kokkos_Array.hpp index c602b0353..8e5862fe9 100644 --- a/packages/kokkos/core/src/Kokkos_Array.hpp +++ b/packages/kokkos/core/src/Kokkos_Array.hpp @@ -45,11 +45,13 @@ #define KOKKOS_ARRAY_HPP #include <Kokkos_Macros.hpp> +#include <impl/Kokkos_Error.hpp> #include <type_traits> #include <algorithm> #include <limits> #include <cstddef> +#include <string> namespace Kokkos { @@ -132,6 +134,7 @@ public: KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return N ; } KOKKOS_INLINE_FUNCTION static constexpr bool empty(){ return false ; } + KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return N ; } template< typename iType > KOKKOS_INLINE_FUNCTION @@ -160,7 +163,7 @@ public: return & m_internal_implementation_private_member_data[0]; } - #ifdef KOKKOS_ROCM_CLANG_WORKAROUND + #ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND // Do not default unless move and move-assignment are also defined KOKKOS_INLINE_FUNCTION ~Array() = default ; @@ -197,6 +200,7 @@ public: KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return 0 ; } KOKKOS_INLINE_FUNCTION static constexpr bool empty() { return true ; } + KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return 0 ; } template< typename iType > KOKKOS_INLINE_FUNCTION @@ -261,6 +265,7 @@ public: KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size ; } KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size ; } + KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return m_size ; } template< typename iType > KOKKOS_INLINE_FUNCTION @@ -336,6 +341,7 @@ public: KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size ; } KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size ; } + KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return m_size ; } template< typename iType > KOKKOS_INLINE_FUNCTION diff --git a/packages/kokkos/core/src/Kokkos_Concepts.hpp b/packages/kokkos/core/src/Kokkos_Concepts.hpp index 2e2507b27..117469b0a 100644 --- a/packages/kokkos/core/src/Kokkos_Concepts.hpp +++ b/packages/kokkos/core/src/Kokkos_Concepts.hpp @@ -105,7 +105,10 @@ namespace Kokkos { template< typename T > struct is_ ## CONCEPT { \ private: \ template< typename , typename = std::true_type > struct have : std::false_type {}; \ - template< typename U > struct have<U,typename std::is_same<U,typename U:: CONCEPT >::type> : std::true_type {}; \ + template< typename U > struct have<U,typename std::is_same< \ + typename std::remove_cv<U>::type, \ + typename std::remove_cv<typename U:: CONCEPT>::type \ + >::type> : std::true_type {}; \ public: \ enum { value = is_ ## CONCEPT::template have<T>::value }; \ }; diff --git a/packages/kokkos/core/src/Kokkos_CopyViews.hpp b/packages/kokkos/core/src/Kokkos_CopyViews.hpp index 31281bc31..86547420e 100644 --- a/packages/kokkos/core/src/Kokkos_CopyViews.hpp +++ b/packages/kokkos/core/src/Kokkos_CopyViews.hpp @@ -453,8 +453,9 @@ template<class ViewTypeA,class ViewTypeB, class Layout, class ExecSpace,typename struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,2,iType,KOKKOS_IMPL_COMPILING_LIBRARY> { ViewTypeA a; ViewTypeB b; - - typedef Kokkos::Rank<2,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type; + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern; + typedef Kokkos::Rank<2,outer_iteration_pattern,inner_iteration_pattern> iterate_type; typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { @@ -475,7 +476,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,3,iType,KOKKOS_IMPL_COMPILI ViewTypeA a; ViewTypeB b; - typedef Kokkos::Rank<3,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type; + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern; + typedef Kokkos::Rank<3,outer_iteration_pattern,inner_iteration_pattern> iterate_type; typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { @@ -496,7 +499,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,4,iType,KOKKOS_IMPL_COMPILI ViewTypeA a; ViewTypeB b; - typedef Kokkos::Rank<4,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type; + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern; + typedef Kokkos::Rank<4,outer_iteration_pattern,inner_iteration_pattern> iterate_type; typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { @@ -519,7 +524,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,5,iType,KOKKOS_IMPL_COMPILI ViewTypeA a; ViewTypeB b; - typedef Kokkos::Rank<5,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type; + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern; + typedef Kokkos::Rank<5,outer_iteration_pattern,inner_iteration_pattern> iterate_type; typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { @@ -542,7 +549,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,6,iType,KOKKOS_IMPL_COMPILI ViewTypeA a; ViewTypeB b; - typedef Kokkos::Rank<6,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type; + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern; + typedef Kokkos::Rank<6,outer_iteration_pattern,inner_iteration_pattern> iterate_type; typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { @@ -566,7 +575,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,7,iType,KOKKOS_IMPL_COMPILI ViewTypeA a; ViewTypeB b; - typedef Kokkos::Rank<6,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type; + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern; + typedef Kokkos::Rank<6,outer_iteration_pattern,inner_iteration_pattern> iterate_type; typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { @@ -590,7 +601,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,8,iType,KOKKOS_IMPL_COMPILI ViewTypeA a; ViewTypeB b; - typedef Kokkos::Rank<6,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type; + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern; + typedef Kokkos::Rank<6,outer_iteration_pattern,inner_iteration_pattern> iterate_type; typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { @@ -642,7 +655,9 @@ void view_copy(const DstType& dst, const SrcType& src) { int64_t strides[DstType::Rank+1]; dst.stride(strides); Kokkos::Iterate iterate; - if ( std::is_same<typename DstType::array_layout,Kokkos::LayoutRight>::value ) { + if ( Kokkos::is_layouttiled<typename DstType::array_layout>::value ) { + iterate = Kokkos::layout_iterate_type_selector<typename DstType::array_layout>::outer_iteration_pattern; + } else if ( std::is_same<typename DstType::array_layout,Kokkos::LayoutRight>::value ) { iterate = Kokkos::Iterate::Right; } else if ( std::is_same<typename DstType::array_layout,Kokkos::LayoutLeft>::value ) { iterate = Kokkos::Iterate::Left; @@ -1243,9 +1258,9 @@ void deep_copy ViewTypeFlat; ViewTypeFlat dst_flat(dst.data(),dst.size()); - if(dst.span() < std::numeric_limits<int>::max()) + if(dst.span() < std::numeric_limits<int>::max()) { Kokkos::Impl::ViewFill< ViewTypeFlat , Kokkos::LayoutRight, typename ViewType::execution_space, ViewTypeFlat::Rank, int >( dst_flat , value ); - else + } else Kokkos::Impl::ViewFill< ViewTypeFlat , Kokkos::LayoutRight, typename ViewType::execution_space, ViewTypeFlat::Rank, int64_t >( dst_flat , value ); Kokkos::fence(); return; @@ -1397,7 +1412,6 @@ void deep_copy enum { SrcExecCanAccessDst = Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible }; - // Checking for Overlapping Views. dst_value_type* dst_start = dst.data(); dst_value_type* dst_end = dst.data() + dst.span(); @@ -1493,7 +1507,7 @@ void deep_copy Kokkos::fence(); } else { Kokkos::fence(); - Impl::view_copy(typename dst_type::uniform_runtime_nomemspace_type(dst),typename src_type::uniform_runtime_const_nomemspace_type(src)); + Impl::view_copy(dst, src); Kokkos::fence(); } } @@ -1739,8 +1753,7 @@ void deep_copy exec_space.fence(); } else { exec_space.fence(); - Impl::view_copy(typename dst_type::uniform_runtime_nomemspace_type(dst), - typename src_type::uniform_runtime_const_nomemspace_type(src)); + Impl::view_copy(dst, src); exec_space.fence(); } } @@ -1917,4 +1930,213 @@ void realloc( Kokkos::View<T,P...> & v , } } /* namespace Kokkos */ +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +// Deduce Mirror Types +template<class Space, class T, class ... P> +struct MirrorViewType { + // The incoming view_type + typedef typename Kokkos::View<T,P...> src_view_type; + // The memory space for the mirror view + typedef typename Space::memory_space memory_space; + // Check whether it is the same memory space + enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; + // The array_layout + typedef typename src_view_type::array_layout array_layout; + // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. + typedef typename src_view_type::non_const_data_type data_type; + // The destination view type if it is not the same memory space + typedef Kokkos::View<data_type,array_layout,Space> dest_view_type; + // If it is the same memory_space return the existsing view_type + // This will also keep the unmanaged trait if necessary + typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type; +}; + +template<class Space, class T, class ... P> +struct MirrorType { + // The incoming view_type + typedef typename Kokkos::View<T,P...> src_view_type; + // The memory space for the mirror view + typedef typename Space::memory_space memory_space; + // Check whether it is the same memory space + enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; + // The array_layout + typedef typename src_view_type::array_layout array_layout; + // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. + typedef typename src_view_type::non_const_data_type data_type; + // The destination view type if it is not the same memory space + typedef Kokkos::View<data_type,array_layout,Space> view_type; +}; + +} + +template< class T , class ... P > +inline +typename Kokkos::View<T,P...>::HostMirror +create_mirror( const Kokkos::View<T,P...> & src + , typename std::enable_if< + std::is_same< typename ViewTraits<T,P...>::specialize , void >::value && + ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout + , Kokkos::LayoutStride >::value + >::type * = 0 + ) +{ + typedef View<T,P...> src_type ; + typedef typename src_type::HostMirror dst_type ; + + return dst_type( std::string( src.label() ).append("_mirror") +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + , src.extent(0) + , src.extent(1) + , src.extent(2) + , src.extent(3) + , src.extent(4) + , src.extent(5) + , src.extent(6) + , src.extent(7) ); +#else + , src.rank_dynamic > 0 ? src.extent(0): KOKKOS_IMPL_CTOR_DEFAULT_ARG + , src.rank_dynamic > 1 ? src.extent(1): KOKKOS_IMPL_CTOR_DEFAULT_ARG + , src.rank_dynamic > 2 ? src.extent(2): KOKKOS_IMPL_CTOR_DEFAULT_ARG + , src.rank_dynamic > 3 ? src.extent(3): KOKKOS_IMPL_CTOR_DEFAULT_ARG + , src.rank_dynamic > 4 ? src.extent(4): KOKKOS_IMPL_CTOR_DEFAULT_ARG + , src.rank_dynamic > 5 ? src.extent(5): KOKKOS_IMPL_CTOR_DEFAULT_ARG + , src.rank_dynamic > 6 ? src.extent(6): KOKKOS_IMPL_CTOR_DEFAULT_ARG + , src.rank_dynamic > 7 ? src.extent(7): KOKKOS_IMPL_CTOR_DEFAULT_ARG ); +#endif +} + +template< class T , class ... P > +inline +typename Kokkos::View<T,P...>::HostMirror +create_mirror( const Kokkos::View<T,P...> & src + , typename std::enable_if< + std::is_same< typename ViewTraits<T,P...>::specialize , void >::value && + std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout + , Kokkos::LayoutStride >::value + >::type * = 0 + ) +{ + typedef View<T,P...> src_type ; + typedef typename src_type::HostMirror dst_type ; + + Kokkos::LayoutStride layout ; + + layout.dimension[0] = src.extent(0); + layout.dimension[1] = src.extent(1); + layout.dimension[2] = src.extent(2); + layout.dimension[3] = src.extent(3); + layout.dimension[4] = src.extent(4); + layout.dimension[5] = src.extent(5); + layout.dimension[6] = src.extent(6); + layout.dimension[7] = src.extent(7); + + layout.stride[0] = src.stride_0(); + layout.stride[1] = src.stride_1(); + layout.stride[2] = src.stride_2(); + layout.stride[3] = src.stride_3(); + layout.stride[4] = src.stride_4(); + layout.stride[5] = src.stride_5(); + layout.stride[6] = src.stride_6(); + layout.stride[7] = src.stride_7(); + + return dst_type( std::string( src.label() ).append("_mirror") , layout ); +} + + +// Create a mirror in a new space (specialization for different space) +template<class Space, class T, class ... P> +typename Impl::MirrorType<Space,T,P ...>::view_type +create_mirror(const Space& , const Kokkos::View<T,P...> & src + , typename std::enable_if< + std::is_same< typename ViewTraits<T,P...>::specialize , void >::value + >::type * = 0) { + return typename Impl::MirrorType<Space,T,P ...>::view_type(src.label(),src.layout()); +} + +template< class T , class ... P > +inline +typename Kokkos::View<T,P...>::HostMirror +create_mirror_view( const Kokkos::View<T,P...> & src + , typename std::enable_if<( + std::is_same< typename Kokkos::View<T,P...>::memory_space + , typename Kokkos::View<T,P...>::HostMirror::memory_space + >::value + && + std::is_same< typename Kokkos::View<T,P...>::data_type + , typename Kokkos::View<T,P...>::HostMirror::data_type + >::value + )>::type * = 0 + ) +{ + return src ; +} + +template< class T , class ... P > +inline +typename Kokkos::View<T,P...>::HostMirror +create_mirror_view( const Kokkos::View<T,P...> & src + , typename std::enable_if< ! ( + std::is_same< typename Kokkos::View<T,P...>::memory_space + , typename Kokkos::View<T,P...>::HostMirror::memory_space + >::value + && + std::is_same< typename Kokkos::View<T,P...>::data_type + , typename Kokkos::View<T,P...>::HostMirror::data_type + >::value + )>::type * = 0 + ) +{ + return Kokkos::create_mirror( src ); +} + +// Create a mirror view in a new space (specialization for same space) +template<class Space, class T, class ... P> +typename Impl::MirrorViewType<Space,T,P ...>::view_type +create_mirror_view(const Space& , const Kokkos::View<T,P...> & src + , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + return src; +} + +// Create a mirror view in a new space (specialization for different space) +template<class Space, class T, class ... P> +typename Impl::MirrorViewType<Space,T,P ...>::view_type +create_mirror_view(const Space& , const Kokkos::View<T,P...> & src + , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + return typename Impl::MirrorViewType<Space,T,P ...>::view_type(src.label(),src.layout()); +} + +// Create a mirror view and deep_copy in a new space (specialization for same space) +template<class Space, class T, class ... P> +typename Impl::MirrorViewType<Space,T,P ...>::view_type +create_mirror_view_and_copy(const Space& , const Kokkos::View<T,P...> & src + , std::string const& name = "" + , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + (void)name; + return src; +} + +// Create a mirror view and deep_copy in a new space (specialization for different space) +template<class Space, class T, class ... P> +typename Impl::MirrorViewType<Space,T,P ...>::view_type +create_mirror_view_and_copy(const Space& , const Kokkos::View<T,P...> & src + , std::string const& name = "" + , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { + using Mirror = typename Impl::MirrorViewType<Space,T,P ...>::view_type; + std::string label = name.empty() ? src.label() : name; + auto mirror = Mirror(ViewAllocateWithoutInitializing(label), src.layout()); + deep_copy(mirror, src); + return mirror; +} + +} /* namespace Kokkos */ + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + #endif diff --git a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp index a33e28fcd..d4693b43c 100644 --- a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -57,6 +57,10 @@ namespace Kokkos { +struct ParallelForTag {}; +struct ParallelScanTag {}; +struct ParallelReduceTag {}; + struct ChunkSize { int value; ChunkSize(int value_):value(value_) {} @@ -320,6 +324,10 @@ public: template< class FunctorType > static int team_size_recommended( const FunctorType & , const int&); + + template<class FunctorType> + int team_size_recommended( const FunctorType & functor , const int vector_length); + //---------------------------------------- /** \brief Construct policy with the given instance of the execution space */ TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 ); diff --git a/packages/kokkos/core/src/Kokkos_Layout.hpp b/packages/kokkos/core/src/Kokkos_Layout.hpp index b0f92d8cf..43e117783 100644 --- a/packages/kokkos/core/src/Kokkos_Layout.hpp +++ b/packages/kokkos/core/src/Kokkos_Layout.hpp @@ -76,6 +76,8 @@ struct LayoutLeft { size_t dimension[ ARRAY_LAYOUT_MAX_RANK ]; + enum { is_extent_constructible = true }; + LayoutLeft( LayoutLeft const & ) = default ; LayoutLeft( LayoutLeft && ) = default ; LayoutLeft & operator = ( LayoutLeft const & ) = default ; @@ -108,6 +110,8 @@ struct LayoutRight { size_t dimension[ ARRAY_LAYOUT_MAX_RANK ]; + enum { is_extent_constructible = true }; + LayoutRight( LayoutRight const & ) = default ; LayoutRight( LayoutRight && ) = default ; LayoutRight & operator = ( LayoutRight const & ) = default ; @@ -132,6 +136,8 @@ struct LayoutStride { size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ; size_t stride[ ARRAY_LAYOUT_MAX_RANK ] ; + enum { is_extent_constructible = false }; + LayoutStride( LayoutStride const & ) = default ; LayoutStride( LayoutStride && ) = default ; LayoutStride & operator = ( LayoutStride const & ) = default ; @@ -222,6 +228,8 @@ struct LayoutTileLeft { size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ; + enum { is_extent_constructible = true }; + LayoutTileLeft( LayoutTileLeft const & ) = default ; LayoutTileLeft( LayoutTileLeft && ) = default ; LayoutTileLeft & operator = ( LayoutTileLeft const & ) = default ; @@ -235,6 +243,144 @@ struct LayoutTileLeft { : dimension { argN0 , argN1 , argN2 , argN3 , argN4 , argN5 , argN6 , argN7 } {} }; + +////////////////////////////////////////////////////////////////////////////////////// + +enum class Iterate +{ + Default, + Left, // Left indices stride fastest + Right // Right indices stride fastest +}; + +// To check for LayoutTiled +// This is to hide extra compile-time 'identifier' info within the LayoutTiled class by not relying on template specialization to include the ArgN*'s +template < typename LayoutTiledCheck, class Enable = void > +struct is_layouttiled : std::false_type {}; + +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE +template < typename LayoutTiledCheck > +struct is_layouttiled< LayoutTiledCheck, typename std::enable_if<LayoutTiledCheck::is_array_layout_tiled>::type > : std::true_type {}; + +namespace Experimental { + +/// LayoutTiled +// Must have Rank >= 2 +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, + unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 = 0, unsigned ArgN3 = 0, unsigned ArgN4 = 0, unsigned ArgN5 = 0, unsigned ArgN6 = 0, unsigned ArgN7 = 0, + bool IsPowerOfTwo = + ( Impl::is_integral_power_of_two(ArgN0) && + Impl::is_integral_power_of_two(ArgN1) && + (Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0) ) && + (Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0) ) && + (Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0) ) && + (Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0) ) && + (Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0) ) && + (Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0) ) + ) + > +struct LayoutTiled { + + static_assert( IsPowerOfTwo + , "LayoutTiled must be given power-of-two tile dimensions" ); + +#if 0 + static_assert( (Impl::is_integral_power_of_two(ArgN0) ) && + (Impl::is_integral_power_of_two(ArgN1) ) && + (Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0) ) && + (Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0) ) && + (Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0) ) && + (Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0) ) && + (Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0) ) && + (Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0) ) + , "LayoutTiled must be given power-of-two tile dimensions" ); +#endif + + typedef LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, IsPowerOfTwo> array_layout ; + static constexpr Iterate outer_pattern = OuterP; + static constexpr Iterate inner_pattern = InnerP; + + enum { N0 = ArgN0 }; + enum { N1 = ArgN1 }; + enum { N2 = ArgN2 }; + enum { N3 = ArgN3 }; + enum { N4 = ArgN4 }; + enum { N5 = ArgN5 }; + enum { N6 = ArgN6 }; + enum { N7 = ArgN7 }; + + size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ; + + enum { is_extent_constructible = true }; + + LayoutTiled( LayoutTiled const & ) = default ; + LayoutTiled( LayoutTiled && ) = default ; + LayoutTiled & operator = ( LayoutTiled const & ) = default ; + LayoutTiled & operator = ( LayoutTiled && ) = default ; + + KOKKOS_INLINE_FUNCTION + explicit constexpr + LayoutTiled( size_t argN0 = 0 , size_t argN1 = 0 , size_t argN2 = 0 , size_t argN3 = 0 + , size_t argN4 = 0 , size_t argN5 = 0 , size_t argN6 = 0 , size_t argN7 = 0 + ) + : dimension { argN0 , argN1 , argN2 , argN3 , argN4 , argN5 , argN6 , argN7 } {} +}; + +} // namespace Experimental +#endif + + +// For use with view_copy +template < typename ... Layout > +struct layout_iterate_type_selector { + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Default ; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Default ; +}; + +template <> +struct layout_iterate_type_selector< Kokkos::LayoutRight > { + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right ; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right ; +}; + +template <> +struct layout_iterate_type_selector< Kokkos::LayoutLeft > { + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left ; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left ; +}; + +template <> +struct layout_iterate_type_selector< Kokkos::LayoutStride > { + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Default ; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Default ; +}; + +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE +template < unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 , unsigned ArgN7 > +struct layout_iterate_type_selector< Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > { + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left ; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left ; +}; + +template < unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 , unsigned ArgN7 > +struct layout_iterate_type_selector< Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > { + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right ; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left ; +}; + +template < unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 , unsigned ArgN7 > +struct layout_iterate_type_selector< Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > { + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left ; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right ; +}; + +template < unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 , unsigned ArgN7 > +struct layout_iterate_type_selector< Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > { + static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right ; + static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right ; +}; +#endif + } // namespace Kokkos #endif // #ifndef KOKKOS_LAYOUT_HPP diff --git a/packages/kokkos/core/src/Kokkos_Macros.hpp b/packages/kokkos/core/src/Kokkos_Macros.hpp index 0cd19d223..96bd23e22 100644 --- a/packages/kokkos/core/src/Kokkos_Macros.hpp +++ b/packages/kokkos/core/src/Kokkos_Macros.hpp @@ -153,7 +153,7 @@ #else #define KOKKOS_LAMBDA [=]__host__ __device__ - #if defined( KOKKOS_ENABLE_CXX1Z ) + #if defined( KOKKOS_ENABLE_CXX17 ) || defined( KOKKOS_ENABLE_CXX20 ) #define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__ #endif #endif @@ -213,7 +213,7 @@ #define KOKKOS_LAMBDA [=] #endif -#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined( KOKKOS_CLASS_LAMBDA ) +#if (defined( KOKKOS_ENABLE_CXX17 ) || defined( KOKKOS_ENABLE_CXX20) )&& !defined( KOKKOS_CLASS_LAMBDA ) #define KOKKOS_CLASS_LAMBDA [=,*this] #endif @@ -521,6 +521,9 @@ #if defined ( KOKKOS_ENABLE_CUDA ) #if ( 9000 <= CUDA_VERSION ) #define KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND + #if ( __CUDA_ARCH__ ) + #define KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + #endif #endif #endif diff --git a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index 7bed7aa3d..190079451 100644 --- a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -793,7 +793,7 @@ struct ParallelReduceReturnValue<typename std::enable_if< static return_type return_value(ReturnType& return_val, const FunctorType& functor) { -#ifdef KOKOOS_ENABLE_DEPRECATED_CODE +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE return return_type(return_val,functor.value_count); #else if ( is_array<ReturnType>::value ) @@ -1002,7 +1002,8 @@ void parallel_reduce(const std::string& label, typename Impl::enable_if< Kokkos::Impl::is_execution_policy<PolicyType>::value >::type * = 0) { - Impl::ParallelReduceAdaptor<PolicyType,FunctorType,const ReturnType>::execute(label,policy,functor,return_value); + ReturnType return_value_impl = return_value; + Impl::ParallelReduceAdaptor<PolicyType,FunctorType,ReturnType>::execute(label,policy,functor,return_value_impl); } template< class PolicyType, class FunctorType, class ReturnType > @@ -1054,6 +1055,9 @@ void parallel_reduce(const std::string& label, , typename ValueTraits::pointer_type >::type value_type ; + static_assert(Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,PolicyType,FunctorType>:: + has_final_member_function,"Calling parallel_reduce without either return value or final function."); + typedef Kokkos::View< value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged @@ -1076,6 +1080,9 @@ void parallel_reduce(const PolicyType& policy, , typename ValueTraits::pointer_type >::type value_type ; + static_assert(Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,PolicyType,FunctorType>:: + has_final_member_function,"Calling parallel_reduce without either return value or final function."); + typedef Kokkos::View< value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged @@ -1096,6 +1103,9 @@ void parallel_reduce(const size_t& policy, , typename ValueTraits::pointer_type >::type value_type ; + static_assert(Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,RangePolicy<>,FunctorType>:: + has_final_member_function,"Calling parallel_reduce without either return value or final function."); + typedef Kokkos::View< value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged @@ -1117,6 +1127,9 @@ void parallel_reduce(const std::string& label, , typename ValueTraits::pointer_type >::type value_type ; + static_assert(Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,RangePolicy<>,FunctorType>:: + has_final_member_function,"Calling parallel_reduce without either return value or final function."); + typedef Kokkos::View< value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged diff --git a/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp b/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp index 4527dd4c1..86d803ccc 100644 --- a/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp +++ b/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp @@ -136,6 +136,55 @@ public: } } + + KOKKOS_INLINE_FUNCTION + void* get_shmem_aligned (const ptrdiff_t size, const ptrdiff_t alignment, int level = -1) const { + if(level == -1) + level = m_default_level; + if(level == 0) { + + char* previous = m_iter_L0; + const ptrdiff_t missalign = size_t(m_iter_L0)%alignment; + if(missalign) m_iter_L0 += alignment-missalign; + + void* tmp = m_iter_L0 + m_offset * size; + if (m_end_L0 < (m_iter_L0 += size * m_multiplier)) { + m_iter_L0 = previous; // put it back like it was + #ifdef KOKKOS_DEBUG + // mfh 23 Jun 2015: printf call consumes 25 registers + // in a CUDA build, so only print in debug mode. The + // function still returns NULL if not enough memory. + printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate " + "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size), + long(m_end_L0-m_iter_L0)); + #endif // KOKKOS_DEBUG + tmp = 0; + } + return tmp; + } else { + + char* previous = m_iter_L1; + const ptrdiff_t missalign = size_t(m_iter_L1)%alignment; + if(missalign) m_iter_L1 += alignment-missalign; + + void* tmp = m_iter_L1 + m_offset * size; + if (m_end_L1 < (m_iter_L1 += size * m_multiplier)) { + m_iter_L1 = previous; // put it back like it was + #ifdef KOKKOS_DEBUG + // mfh 23 Jun 2015: printf call consumes 25 registers + // in a CUDA build, so only print in debug mode. The + // function still returns NULL if not enough memory. + printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate " + "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size), + long(m_end_L1-m_iter_L1)); + #endif // KOKKOS_DEBUG + tmp = 0; + } + return tmp; + + } + } + template< typename IntType > KOKKOS_INLINE_FUNCTION ScratchMemorySpace( void * ptr_L0 , const IntType & size_L0 , void * ptr_L1 = NULL , const IntType & size_L1 = 0) diff --git a/packages/kokkos/core/src/Kokkos_Serial.hpp b/packages/kokkos/core/src/Kokkos_Serial.hpp index 911aba892..01701e53a 100644 --- a/packages/kokkos/core/src/Kokkos_Serial.hpp +++ b/packages/kokkos/core/src/Kokkos_Serial.hpp @@ -262,7 +262,7 @@ public: } //---------------------------------------- - +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > static int team_size_max( const FunctorType & ) { return 1 ; } @@ -274,6 +274,16 @@ public: template< class FunctorType > static int team_size_recommended( const FunctorType & , const int& ) { return 1 ; } +#endif + + template<class FunctorType> + int team_size_max( const FunctorType&, const ParallelForTag& ) const { return 1 ; } + template<class FunctorType> + int team_size_max( const FunctorType&, const ParallelReduceTag& ) const { return 1 ; } + template<class FunctorType> + int team_size_recommended( const FunctorType&, const ParallelForTag& ) const { return 1 ; } + template<class FunctorType> + int team_size_recommended( const FunctorType&, const ParallelReduceTag& ) const { return 1 ; } //---------------------------------------- @@ -281,6 +291,16 @@ public: inline int league_size() const { return m_league_size ; } inline size_t scratch_size(const int& level, int = 0) const { return m_team_scratch_size[level] + m_thread_scratch_size[level]; } + inline static + int vector_length_max() + { return 1024; } // Use arbitrary large number, is meant as a vectorizable length + + inline static + int scratch_size_max(int level) + { return (level==0? + 1024*32: + 20*1024*1024); + } /** \brief Specify league size, request team size */ TeamPolicyInternal( execution_space & , int league_size_request diff --git a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp index c3185853d..5045e9cbb 100644 --- a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -624,7 +624,6 @@ public: when_all( Future< A1 , A2 > const arg[] , int narg ) { using future_type = Future< execution_space > ; - using task_base = Kokkos::Impl::TaskBase< void , void , void > ; future_type f ; @@ -692,7 +691,6 @@ public: { using input_type = decltype( func(0) ); using future_type = Future< execution_space > ; - using task_base = Kokkos::Impl::TaskBase< void , void , void > ; static_assert( is_future< input_type >::value , "Functor must return a Kokkos::Future" ); diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp index 70301884c..da49aff22 100644 --- a/packages/kokkos/core/src/Kokkos_View.hpp +++ b/packages/kokkos/core/src/Kokkos_View.hpp @@ -707,10 +707,17 @@ public: //---------------------------------------- // Allow specializations to query their specialized map +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE KOKKOS_INLINE_FUNCTION const Kokkos::Impl::ViewMapping< traits , void > & implementation_map() const { return m_map ; } - +#endif + KOKKOS_INLINE_FUNCTION + const Kokkos::Impl::ViewMapping< traits , void > & + impl_map() const { return m_map ; } + KOKKOS_INLINE_FUNCTION + const Kokkos::Impl::SharedAllocationTracker & + impl_track() const { return m_track ; } //---------------------------------------- private: @@ -752,423 +759,421 @@ private: #endif public: - #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - template< class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( Kokkos::Impl::are_integral<Args...>::value - && ( 0 == Rank ) - ), reference_type >::type - operator()( Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,args...) ) - return m_map.reference(); - } - - template< typename I0 - , class ... Args> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,Args...>::value - && ( 1 == Rank ) - && ! is_default_map - ), reference_type >::type - operator()( const I0 & i0, - Args ... args) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) - return m_map.reference(i0); - } - - template< typename I0 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,Args...>::value - && ( 1 == Rank ) - && is_default_map - && ! is_layout_stride - ), reference_type >::type - operator()( const I0 & i0 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) - return m_map.m_handle[ i0 ]; - } + template< class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if<( Kokkos::Impl::are_integral<Args...>::value + && ( 0 == Rank ) + ), reference_type >::type + operator()( Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,args...) ) + return m_map.reference(); + } - template< typename I0 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,Args...>::value - && ( 1 == Rank ) - && is_default_map - && is_layout_stride - ), reference_type >::type - operator()( const I0 & i0 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) - return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; - } + template< typename I0 + , class ... Args> + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,Args...>::value + && ( 1 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0, + Args ... args) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) + return m_map.reference(i0); + } - //------------------------------ - // Rank 1 operator[] + template< typename I0 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,Args...>::value + && ( 1 == Rank ) + && is_default_map + && ! is_layout_stride + ), reference_type >::type + operator()( const I0 & i0 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) + return m_map.m_impl_handle[ i0 ]; + } - template< typename I0 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0>::value - && ( 1 == Rank ) - && ! is_default_map - ), reference_type >::type - operator[]( const I0 & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) - return m_map.reference(i0); - } + template< typename I0 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,Args...>::value + && ( 1 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator()( const I0 & i0 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) + return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ]; + } - template< typename I0 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0>::value - && ( 1 == Rank ) - && is_default_map - && ! is_layout_stride - ), reference_type >::type - operator[]( const I0 & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) - return m_map.m_handle[ i0 ]; - } + //------------------------------ + // Rank 1 operator[] + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && ! is_default_map + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) + return m_map.reference(i0); + } template< typename I0 > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0>::value - && ( 1 == Rank ) - && is_default_map - && is_layout_stride - ), reference_type >::type - operator[]( const I0 & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) - return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; - } - - - template< typename I0 , typename I1 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,Args...>::value - && ( 2 == Rank ) - && ! is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.reference(i0,i1); - } + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && is_default_map + && ! is_layout_stride + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) + return m_map.m_impl_handle[ i0 ]; + } + + template< typename I0 > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0>::value + && ( 1 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator[]( const I0 & i0 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) + return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ]; + } - template< typename I0 , typename I1 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,Args...>::value - && ( 2 == Rank ) - && is_default_map - && is_layout_left && ( traits::rank_dynamic == 0 ) - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ]; - } - - template< typename I0 , typename I1 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,Args...>::value - && ( 2 == Rank ) - && is_default_map - && is_layout_left && ( traits::rank_dynamic != 0 ) - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ]; - } + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) + return m_map.reference(i0,i1); + } - template< typename I0 , typename I1 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,Args...>::value - && ( 2 == Rank ) - && is_default_map - && is_layout_right && ( traits::rank_dynamic == 0 ) - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ]; - } + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_left && ( traits::rank_dynamic == 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) + return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_dim.N0 * i1 ]; + } - template< typename I0 , typename I1 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,Args...>::value - && ( 2 == Rank ) - && is_default_map - && is_layout_right && ( traits::rank_dynamic != 0 ) - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ]; - } + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_left && ( traits::rank_dynamic != 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) + return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_stride * i1 ]; + } - template< typename I0 , typename I1 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,Args...>::value - && ( 2 == Rank ) - && is_default_map - && is_layout_stride - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 + - i1 * m_map.m_offset.m_stride.S1 ]; - } + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_right && ( traits::rank_dynamic == 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) + return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_dim.N1 * i0 ]; + } - //------------------------------ - // Rank 3 + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_right && ( traits::rank_dynamic != 0 ) + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) + return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_stride * i0 ]; + } - template< typename I0 , typename I1 , typename I2 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value - && ( 3 == Rank ) - && is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ]; - } + template< typename I0 , typename I1 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,Args...>::value + && ( 2 == Rank ) + && is_default_map + && is_layout_stride + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) + return m_map.m_impl_handle[ i0 * m_map.m_impl_offset.m_stride.S0 + + i1 * m_map.m_impl_offset.m_stride.S1 ]; + } - template< typename I0 , typename I1 , typename I2 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value - && ( 3 == Rank ) - && ! is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) ) - return m_map.reference(i0,i1,i2); - } + //------------------------------ + // Rank 3 - //------------------------------ - // Rank 4 + template< typename I0 , typename I1 , typename I2 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value + && ( 3 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) ) + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2) ]; + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value - && ( 4 == Rank ) - && is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ]; - } + template< typename I0 , typename I1 , typename I2 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value + && ( 3 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) ) + return m_map.reference(i0,i1,i2); + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value - && ( 4 == Rank ) - && ! is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) ) - return m_map.reference(i0,i1,i2,i3); - } + //------------------------------ + // Rank 4 - //------------------------------ - // Rank 5 + template< typename I0 , typename I1 , typename I2 , typename I3 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value + && ( 4 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) ) + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3) ]; + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value - && ( 5 == Rank ) - && is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ]; - } + template< typename I0 , typename I1 , typename I2 , typename I3 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value + && ( 4 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) ) + return m_map.reference(i0,i1,i2,i3); + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value - && ( 5 == Rank ) - && ! is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) ) - return m_map.reference(i0,i1,i2,i3,i4); - } + //------------------------------ + // Rank 5 - //------------------------------ - // Rank 6 + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value + && ( 5 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) ) + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4) ]; + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value - && ( 6 == Rank ) - && is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ]; - } + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value + && ( 5 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) ) + return m_map.reference(i0,i1,i2,i3,i4); + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value - && ( 6 == Rank ) - && ! is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) ) - return m_map.reference(i0,i1,i2,i3,i4,i5); - } + //------------------------------ + // Rank 6 - //------------------------------ - // Rank 7 + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value + && ( 6 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) ) + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5) ]; + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 , typename I6 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value - && ( 7 == Rank ) - && is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 , const I6 & i6 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ]; - } + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value + && ( 6 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) ) + return m_map.reference(i0,i1,i2,i3,i4,i5); + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 , typename I6 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value - && ( 7 == Rank ) - && ! is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 , const I6 & i6 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,i6); - } + //------------------------------ + // Rank 7 - //------------------------------ - // Rank 8 + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value + && ( 7 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ]; + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 , typename I6 , typename I7 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value - && ( 8 == Rank ) - && is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; - } + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value + && ( 7 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,i6); + } - template< typename I0 , typename I1 , typename I2 , typename I3 - , typename I4 , typename I5 , typename I6 , typename I7 - , class ... Args > - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if< - ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value - && ( 8 == Rank ) - && ! is_default_map - ), reference_type >::type - operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 - , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 - , Args ... args ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7); - } + //------------------------------ + // Rank 8 + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value + && ( 8 == Rank ) + && is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; + } + + template< typename I0 , typename I1 , typename I2 , typename I3 + , typename I4 , typename I5 , typename I6 , typename I7 + , class ... Args > + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if< + ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value + && ( 8 == Rank ) + && ! is_default_map + ), reference_type >::type + operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 + , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 + , Args ... args ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7); + } - #else + #else //------------------------------ // Rank 0 operator() @@ -1206,7 +1211,7 @@ public: operator()( const I0 & i0 ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) - return m_map.m_handle[ i0 ]; + return m_map.m_impl_handle[ i0 ]; } template< typename I0 > @@ -1220,7 +1225,7 @@ public: operator()( const I0 & i0) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) - return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; + return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ]; } //------------------------------ // Rank 1 operator[] @@ -1249,7 +1254,7 @@ public: operator[]( const I0 & i0 ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) - return m_map.m_handle[ i0 ]; + return m_map.m_impl_handle[ i0 ]; } template< typename I0 > @@ -1263,7 +1268,7 @@ public: operator[]( const I0 & i0 ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) - return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; + return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ]; } @@ -1294,7 +1299,7 @@ public: operator()( const I0 & i0 , const I1 & i1) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) ) - return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ]; + return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_dim.N0 * i1 ]; } template< typename I0 , typename I1> @@ -1308,7 +1313,7 @@ public: operator()( const I0 & i0 , const I1 & i1) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) ) - return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ]; + return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_stride * i1 ]; } template< typename I0 , typename I1 > @@ -1322,7 +1327,7 @@ public: operator()( const I0 & i0 , const I1 & i1 ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) ) - return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ]; + return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_dim.N1 * i0 ]; } template< typename I0 , typename I1 > @@ -1336,7 +1341,7 @@ public: operator()( const I0 & i0 , const I1 & i1 ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) ) - return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ]; + return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_stride * i0 ]; } template< typename I0 , typename I1> @@ -1350,8 +1355,8 @@ public: operator()( const I0 & i0 , const I1 & i1 ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) ) - return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 + - i1 * m_map.m_offset.m_stride.S1 ]; + return m_map.m_impl_handle[ i0 * m_map.m_impl_offset.m_stride.S0 + + i1 * m_map.m_impl_offset.m_stride.S1 ]; } //------------------------------ @@ -1367,7 +1372,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2) ]; } template< typename I0 , typename I1 , typename I2> @@ -1396,7 +1401,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 > @@ -1427,7 +1432,7 @@ public: , const I4 & i4 ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1460,7 +1465,7 @@ public: , const I4 & i4 , const I5 & i5 ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1493,7 +1498,7 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1526,7 +1531,7 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1545,7 +1550,6 @@ public: } #endif - template< class ... Args > KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if<( Kokkos::Impl::are_integral<Args...>::value @@ -1585,7 +1589,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) - return m_map.m_handle[ i0 ]; + return m_map.m_impl_handle[ i0 ]; } template< typename I0 @@ -1601,7 +1605,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) - return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; + return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ]; } template< typename I0 , typename I1 @@ -1632,7 +1636,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ]; + return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_dim.N0 * i1 ]; } template< typename I0 , typename I1 @@ -1648,7 +1652,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ]; + return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_stride * i1 ]; } template< typename I0 , typename I1 @@ -1664,7 +1668,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ]; + return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_dim.N1 * i0 ]; } template< typename I0 , typename I1 @@ -1680,7 +1684,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ]; + return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_stride * i0 ]; } template< typename I0 , typename I1 @@ -1696,8 +1700,8 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) - return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 + - i1 * m_map.m_offset.m_stride.S1 ]; + return m_map.m_impl_handle[ i0 * m_map.m_impl_offset.m_stride.S0 + + i1 * m_map.m_impl_offset.m_stride.S1 ]; } //------------------------------ @@ -1715,7 +1719,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2) ]; } template< typename I0 , typename I1 , typename I2 @@ -1748,7 +1752,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1783,7 +1787,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1820,7 +1824,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1857,7 +1861,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1894,7 +1898,7 @@ public: , Args ... args ) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) - return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; + return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 @@ -1938,6 +1942,8 @@ public: KOKKOS_INLINE_FUNCTION View & operator = ( View && rhs ) { m_track = std::move(rhs.m_track) ; m_map = std::move(rhs.m_map) ; return *this ; } + + //---------------------------------------- // Compatible view copy constructor and assignment // may assign unmanaged from managed. @@ -2206,7 +2212,8 @@ public: , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ) { - + static_assert ( traits::array_layout::is_extent_constructible , "Layout is not extent constructible. A layout object should be passed too.\n" ); + #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST Impl::runtime_check_rank_host(traits::rank_dynamic, std::is_same<typename traits::specialize,void>::value, arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7, label()); @@ -2257,6 +2264,15 @@ public: #endif } + template <class Traits> + KOKKOS_INLINE_FUNCTION + View( const track_type & track, const Kokkos::Impl::ViewMapping< Traits , void > &map ) : + m_track(track), m_map() + { + typedef Kokkos::Impl::ViewMapping< traits , Traits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible View copy construction" ); + Mapping::assign( m_map , map , track ); + } //---------------------------------------- // Memory span required to wrap these dimensions. @@ -2346,7 +2362,7 @@ public: static inline size_t shmem_size( typename traits::array_layout const& arg_layout ) { - return map_type::memory_span( arg_layout ); + return map_type::memory_span( arg_layout )+sizeof(typename traits::value_type); } explicit KOKKOS_INLINE_FUNCTION @@ -2354,7 +2370,7 @@ public: , const typename traits::array_layout & arg_layout ) : View( Impl::ViewCtorProp<pointer_type>( reinterpret_cast<pointer_type>( - arg_space.get_shmem( map_type::memory_span( arg_layout ) ) ) ) + arg_space.get_shmem_aligned( map_type::memory_span( arg_layout ), sizeof(typename traits::value_type) ) ) ) , arg_layout ) {} @@ -2370,11 +2386,11 @@ public: , const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ) : View( Impl::ViewCtorProp<pointer_type>( reinterpret_cast<pointer_type>( - arg_space.get_shmem( + arg_space.get_shmem_aligned( map_type::memory_span( typename traits::array_layout ( arg_N0 , arg_N1 , arg_N2 , arg_N3 - , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ) ) ) ) + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ), sizeof(typename traits::value_type) ) ) ) , typename traits::array_layout ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) @@ -2515,209 +2531,6 @@ void shared_allocation_tracking_enable() } /* namespace Impl */ } /* namespace Kokkos */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -// Deduce Mirror Types -template<class Space, class T, class ... P> -struct MirrorViewType { - // The incoming view_type - typedef typename Kokkos::View<T,P...> src_view_type; - // The memory space for the mirror view - typedef typename Space::memory_space memory_space; - // Check whether it is the same memory space - enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; - // The array_layout - typedef typename src_view_type::array_layout array_layout; - // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. - typedef typename src_view_type::non_const_data_type data_type; - // The destination view type if it is not the same memory space - typedef Kokkos::View<data_type,array_layout,Space> dest_view_type; - // If it is the same memory_space return the existsing view_type - // This will also keep the unmanaged trait if necessary - typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type; -}; - -template<class Space, class T, class ... P> -struct MirrorType { - // The incoming view_type - typedef typename Kokkos::View<T,P...> src_view_type; - // The memory space for the mirror view - typedef typename Space::memory_space memory_space; - // Check whether it is the same memory space - enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value }; - // The array_layout - typedef typename src_view_type::array_layout array_layout; - // The data type (we probably want it non-const since otherwise we can't even deep_copy to it. - typedef typename src_view_type::non_const_data_type data_type; - // The destination view type if it is not the same memory space - typedef Kokkos::View<data_type,array_layout,Space> view_type; -}; - -} - -template< class T , class ... P > -inline -typename Kokkos::View<T,P...>::HostMirror -create_mirror( const Kokkos::View<T,P...> & src - , typename std::enable_if< - ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout - , Kokkos::LayoutStride >::value - >::type * = 0 - ) -{ - typedef View<T,P...> src_type ; - typedef typename src_type::HostMirror dst_type ; - - return dst_type( std::string( src.label() ).append("_mirror") -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - , src.extent(0) - , src.extent(1) - , src.extent(2) - , src.extent(3) - , src.extent(4) - , src.extent(5) - , src.extent(6) - , src.extent(7) ); -#else - , src.rank_dynamic > 0 ? src.extent(0): KOKKOS_IMPL_CTOR_DEFAULT_ARG - , src.rank_dynamic > 1 ? src.extent(1): KOKKOS_IMPL_CTOR_DEFAULT_ARG - , src.rank_dynamic > 2 ? src.extent(2): KOKKOS_IMPL_CTOR_DEFAULT_ARG - , src.rank_dynamic > 3 ? src.extent(3): KOKKOS_IMPL_CTOR_DEFAULT_ARG - , src.rank_dynamic > 4 ? src.extent(4): KOKKOS_IMPL_CTOR_DEFAULT_ARG - , src.rank_dynamic > 5 ? src.extent(5): KOKKOS_IMPL_CTOR_DEFAULT_ARG - , src.rank_dynamic > 6 ? src.extent(6): KOKKOS_IMPL_CTOR_DEFAULT_ARG - , src.rank_dynamic > 7 ? src.extent(7): KOKKOS_IMPL_CTOR_DEFAULT_ARG ); -#endif -} - -template< class T , class ... P > -inline -typename Kokkos::View<T,P...>::HostMirror -create_mirror( const Kokkos::View<T,P...> & src - , typename std::enable_if< - std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout - , Kokkos::LayoutStride >::value - >::type * = 0 - ) -{ - typedef View<T,P...> src_type ; - typedef typename src_type::HostMirror dst_type ; - - Kokkos::LayoutStride layout ; - - layout.dimension[0] = src.extent(0); - layout.dimension[1] = src.extent(1); - layout.dimension[2] = src.extent(2); - layout.dimension[3] = src.extent(3); - layout.dimension[4] = src.extent(4); - layout.dimension[5] = src.extent(5); - layout.dimension[6] = src.extent(6); - layout.dimension[7] = src.extent(7); - - layout.stride[0] = src.stride_0(); - layout.stride[1] = src.stride_1(); - layout.stride[2] = src.stride_2(); - layout.stride[3] = src.stride_3(); - layout.stride[4] = src.stride_4(); - layout.stride[5] = src.stride_5(); - layout.stride[6] = src.stride_6(); - layout.stride[7] = src.stride_7(); - - return dst_type( std::string( src.label() ).append("_mirror") , layout ); -} - - -// Create a mirror in a new space (specialization for different space) -template<class Space, class T, class ... P> -typename Impl::MirrorType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::View<T,P...> & src) { - return typename Impl::MirrorType<Space,T,P ...>::view_type(src.label(),src.layout()); -} - -template< class T , class ... P > -inline -typename Kokkos::View<T,P...>::HostMirror -create_mirror_view( const Kokkos::View<T,P...> & src - , typename std::enable_if<( - std::is_same< typename Kokkos::View<T,P...>::memory_space - , typename Kokkos::View<T,P...>::HostMirror::memory_space - >::value - && - std::is_same< typename Kokkos::View<T,P...>::data_type - , typename Kokkos::View<T,P...>::HostMirror::data_type - >::value - )>::type * = 0 - ) -{ - return src ; -} - -template< class T , class ... P > -inline -typename Kokkos::View<T,P...>::HostMirror -create_mirror_view( const Kokkos::View<T,P...> & src - , typename std::enable_if< ! ( - std::is_same< typename Kokkos::View<T,P...>::memory_space - , typename Kokkos::View<T,P...>::HostMirror::memory_space - >::value - && - std::is_same< typename Kokkos::View<T,P...>::data_type - , typename Kokkos::View<T,P...>::HostMirror::data_type - >::value - )>::type * = 0 - ) -{ - return Kokkos::create_mirror( src ); -} - -// Create a mirror view in a new space (specialization for same space) -template<class Space, class T, class ... P> -typename Impl::MirrorViewType<Space,T,P ...>::view_type -create_mirror_view(const Space& , const Kokkos::View<T,P...> & src - , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { - return src; -} - -// Create a mirror view in a new space (specialization for different space) -template<class Space, class T, class ... P> -typename Impl::MirrorViewType<Space,T,P ...>::view_type -create_mirror_view(const Space& , const Kokkos::View<T,P...> & src - , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { - return typename Impl::MirrorViewType<Space,T,P ...>::view_type(src.label(),src.layout()); -} - -// Create a mirror view and deep_copy in a new space (specialization for same space) -template<class Space, class T, class ... P> -typename Impl::MirrorViewType<Space,T,P ...>::view_type -create_mirror_view_and_copy(const Space& , const Kokkos::View<T,P...> & src - , std::string const& name = "" - , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { - (void)name; - return src; -} - -// Create a mirror view and deep_copy in a new space (specialization for different space) -template<class Space, class T, class ... P> -typename Impl::MirrorViewType<Space,T,P ...>::view_type -create_mirror_view_and_copy(const Space& , const Kokkos::View<T,P...> & src - , std::string const& name = "" - , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) { - using Mirror = typename Impl::MirrorViewType<Space,T,P ...>::view_type; - std::string label = name.empty() ? src.label() : name; - auto mirror = Mirror(ViewAllocateWithoutInitializing(label), src.layout()); - deep_copy(mirror, src); - return mirror; -} - -} /* namespace Kokkos */ - - //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/Makefile b/packages/kokkos/core/src/Makefile index 6ee5fec71..c2dbddf45 100644 --- a/packages/kokkos/core/src/Makefile +++ b/packages/kokkos/core/src/Makefile @@ -16,6 +16,7 @@ endif CXXFLAGS ?= -O3 LINK ?= $(CXX) LDFLAGS ?= +CP = cp include $(KOKKOS_PATH)/Makefile.kokkos include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists @@ -50,7 +51,12 @@ ifeq ($(KOKKOS_OS),Linux) COPY_FLAG = -u endif ifeq ($(KOKKOS_OS),Darwin) - COPY_FLAG = + COPY_FLAG = + # If Homebrew coreutils is installed, its cp will have the -u option + ifneq ("$(wildcard /usr/local/opt/coreutils/libexec/gnubin/cp)","") + CP = /usr/local/opt/coreutils/libexec/gnubin/cp + COPY_FLAG = -u + endif endif ifeq ($(KOKKOS_DEBUG),"no") @@ -66,36 +72,38 @@ mkdir: mkdir -p $(PREFIX)/bin mkdir -p $(PREFIX)/include mkdir -p $(PREFIX)/lib + mkdir -p $(PREFIX)/lib/pkgconfig mkdir -p $(PREFIX)/include/impl copy-cuda: mkdir mkdir -p $(PREFIX)/include/Cuda - cp $(COPY_FLAG) $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda copy-threads: mkdir mkdir -p $(PREFIX)/include/Threads - cp $(COPY_FLAG) $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads copy-qthreads: mkdir mkdir -p $(PREFIX)/include/Qthreads - cp $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREADS) $(PREFIX)/include/Qthreads + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREADS) $(PREFIX)/include/Qthreads copy-openmp: mkdir mkdir -p $(PREFIX)/include/OpenMP - cp $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP copy-rocm: mkdir mkdir -p $(PREFIX)/include/ROCm - cp $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm install: mkdir $(CONDITIONAL_COPIES) build-lib generate_build_settings - cp $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin - cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include - cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl - cp $(COPY_FLAG) $(KOKKOS_MAKEFILE) $(PREFIX) - cp $(COPY_FLAG) $(KOKKOS_CMAKEFILE) $(PREFIX) - cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib - cp $(COPY_FLAG) $(KOKKOS_CONFIG_HEADER) $(PREFIX)/include + $(CP) $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl + $(CP) $(COPY_FLAG) $(KOKKOS_MAKEFILE) $(PREFIX) + $(CP) $(COPY_FLAG) $(KOKKOS_CMAKEFILE) $(PREFIX) + $(CP) $(COPY_FLAG) $(KOKKOS_PKGCONFIG) $(PREFIX)/lib/pkgconfig + $(CP) $(COPY_FLAG) libkokkos.a $(PREFIX)/lib + $(CP) $(COPY_FLAG) $(KOKKOS_CONFIG_HEADER) $(PREFIX)/include clean: kokkos-clean - rm -f $(KOKKOS_MAKEFILE) $(KOKKOS_CMAKEFILE) + rm -f $(KOKKOS_MAKEFILE) $(KOKKOS_CMAKEFILE) $(KOKKOS_PKGCONFIG) diff --git a/packages/kokkos/core/src/Makefile.generate_build_files b/packages/kokkos/core/src/Makefile.generate_build_files index 7e0c6351f..cc856ee9a 100644 --- a/packages/kokkos/core/src/Makefile.generate_build_files +++ b/packages/kokkos/core/src/Makefile.generate_build_files @@ -5,6 +5,7 @@ # These files are generated by this makefile KOKKOS_MAKEFILE=Makefile.kokkos KOKKOS_CMAKEFILE=kokkos_generated_settings.cmake +KOKKOS_PKGCONFIG=kokkos.pc ifeq ($(KOKKOS_DEBUG),"no") KOKKOS_DEBUG_CMAKE = OFF @@ -33,11 +34,29 @@ kokkos_append_var = $(call kokkos_appendvar_makefile,$1); $(call kokkos_appendva kokkos_append_var2 = $(call kokkos_appendvar2_makefile,$1); $(call kokkos_appendvar_cmakefile,$1,$2) kokkos_append_varval = $(call kokkos_appendval_makefile,$1,$2); $(call kokkos_appendval_cmakefile,$1,$2,$3) +kokkos_fixup_sed_impl = sed \ + -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \ + -e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \ + -e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \ + -e 's|= $(KOKKOS_CONFIG_HEADER)|= $(PREFIX)/include/$(KOKKOS_CONFIG_HEADER)|g' $1 \ + > $1.tmp && mv -f $1.tmp $1 + +$(KOKKOS_PKGCONFIG): $(KOKKOS_PATH)/core/src/$(KOKKOS_PKGCONFIG).in + @sed -e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|g' \ + -e 's|@KOKKOS_CXXFLAGS@|$(patsubst -I%,,$(KOKKOS_CXXFLAGS))|g' \ + -e 's|@KOKKOS_EXTRA_LIBS_LIST@|$(KOKKOS_EXTRA_LIBS)|g' \ + -e 's|@KOKKOS_LINK_FLAGS@|$(KOKKOS_LINK_FLAGS)|g' \ + $< > $@ + +kokkos_fixup_sed = $(call kokkos_fixup_sed_impl,$(KOKKOS_MAKEFILE)); $(call kokkos_fixup_sed_impl,$(KOKKOS_CMAKEFILE)) + #This function should be used for variables whose values are different in GNU Make versus CMake, #especially lists which are delimited by commas in one case and semicolons in another kokkos_append_gmakevar = $(call kokkos_appendvar_makefile,$1); $(call kokkos_append_gmakevar_cmakefile,$1,$2) -generate_build_settings: $(KOKKOS_CONFIG_HEADER) +generate_build_settings: $(KOKKOS_CONFIG_HEADER) $(KOKKOS_PKGCONFIG) @rm -f $(KOKKOS_MAKEFILE) @rm -f $(KOKKOS_CMAKEFILE) @$(call kokkos_append_string, "#Global Settings used to generate this library") @@ -68,7 +87,6 @@ generate_build_settings: $(KOKKOS_CONFIG_HEADER) @$(call kokkos_append_var,KOKKOS_HEADERS_ROCM,'STRING "Kokkos headers ROCm list"') @$(call kokkos_append_var,KOKKOS_HEADERS_THREADS,'STRING "Kokkos headers Threads list"') @$(call kokkos_append_var,KOKKOS_HEADERS_QTHREADS,'STRING "Kokkos headers QThreads list"') - @$(call kokkos_append_var,KOKKOS_SRC,'STRING "Kokkos source list"') @$(call kokkos_append_string,"") @$(call kokkos_append_string,"#Variables used in application Makefiles") @$(call kokkos_append_var,KOKKOS_OS,'STRING ""') # This was not in original cmake gen @@ -94,19 +112,11 @@ generate_build_settings: $(KOKKOS_CONFIG_HEADER) @$(call kokkos_append_makefile,"#Fake kokkos-clean target") @$(call kokkos_append_makefile,"kokkos-clean:") @$(call kokkos_append_makefile,"") - @sed \ - -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ - -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ - -e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \ - -e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \ - -e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \ - -e 's|= $(KOKKOS_CONFIG_HEADER)|= $(PREFIX)/include/$(KOKKOS_CONFIG_HEADER)|g' $(KOKKOS_MAKEFILE) \ - > $(KOKKOS_MAKEFILE).tmp - @mv -f $(KOKKOS_MAKEFILE).tmp $(KOKKOS_MAKEFILE) + @$(call kokkos_fixup_sed) + @$(call kokkos_append_var,KOKKOS_SRC,'STRING "Kokkos source list"') @$(call kokkos_setvar_cmakefile,KOKKOS_CXX_FLAGS,$(KOKKOS_CXXFLAGS)) @$(call kokkos_setvar_cmakefile,KOKKOS_CPP_FLAGS,$(KOKKOS_CPPFLAGS)) @$(call kokkos_setvar_cmakefile,KOKKOS_LD_FLAGS,$(KOKKOS_LDFLAGS)) @$(call kokkos_setlist_cmakefile,KOKKOS_LIBS_LIST,$(KOKKOS_LIBS)) @$(call kokkos_setlist_cmakefile,KOKKOS_EXTRA_LIBS_LIST,$(KOKKOS_EXTRA_LIBS)) @$(call kokkos_setvar_cmakefile,KOKKOS_LINK_FLAGS,$(KOKKOS_LINK_FLAGS)) - diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 351f5f1ec..2f2c76846 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -103,8 +103,6 @@ public: void TaskQueueSpecialization< Kokkos::OpenMP >::execute ( TaskQueue< Kokkos::OpenMP > * const queue ) { - using execution_space = Kokkos::OpenMP ; - using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< void , void , void > ; using Member = Impl::HostThreadTeamMember< execution_space > ; @@ -213,8 +211,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: iff_single_thread_recursive_execute ( TaskQueue< Kokkos::OpenMP > * const queue ) { - using execution_space = Kokkos::OpenMP ; - using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< void , void , void > ; using Member = Impl::HostThreadTeamMember< execution_space > ; diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp index b530dca10..e8fbc467e 100644 --- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp +++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp @@ -76,14 +76,11 @@ public: //---------------------------------------- +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > inline static int team_size_max( const FunctorType & ) { -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE int pool_size = traits::execution_space::thread_pool_size(1); -#else - int pool_size = traits::execution_space::impl_thread_pool_size(1); -#endif int max_host_team_size = Impl::HostThreadTeamData::max_team_members; return pool_size<max_host_team_size?pool_size:max_host_team_size; } @@ -92,17 +89,47 @@ public: inline static int team_size_recommended( const FunctorType & ) { -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE return traits::execution_space::thread_pool_size(2); -#else - return traits::execution_space::impl_thread_pool_size(2); -#endif } template< class FunctorType > inline static int team_size_recommended( const FunctorType &, const int& ) { + return traits::execution_space::thread_pool_size(2); + } +#endif + + template<class FunctorType> + int team_size_max( const FunctorType&, const ParallelForTag& ) const { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + int pool_size = traits::execution_space::thread_pool_size(1); +#else + int pool_size = traits::execution_space::impl_thread_pool_size(1); +#endif + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size<max_host_team_size?pool_size:max_host_team_size; + } + template<class FunctorType> + int team_size_max( const FunctorType&, const ParallelReduceTag& ) const { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + int pool_size = traits::execution_space::thread_pool_size(1); +#else + int pool_size = traits::execution_space::impl_thread_pool_size(1); +#endif + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size<max_host_team_size?pool_size:max_host_team_size; + } + template<class FunctorType> + int team_size_recommended( const FunctorType&, const ParallelForTag& ) const { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + return traits::execution_space::thread_pool_size(2); +#else + return traits::execution_space::impl_thread_pool_size(2); +#endif + } + template<class FunctorType> + int team_size_recommended( const FunctorType&, const ParallelReduceTag& ) const { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE return traits::execution_space::thread_pool_size(2); #else @@ -110,6 +137,18 @@ public: #endif } + + inline static + int vector_length_max() + { return 1024; } // Use arbitrary large number, is meant as a vectorizable length + + inline static + int scratch_size_max(int level) + { return (level==0? + 1024*32: // Roughly L1 size + 20*1024*1024); // Limit to keep compatibility with CUDA + } + //---------------------------------------- private: diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp index 2d53670c8..c7d4defad 100644 --- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp @@ -160,7 +160,8 @@ SharedAllocationRecord( const Kokkos::Experimental::OpenMPTargetSpace & arg_spac , arg_label.c_str() , SharedAllocationHeader::maximum_label_length ); - + // Set last element zero, in case c_str is too long + header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; //TODO DeepCopy // DeepCopy diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp index 71643458b..87840bb37 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp @@ -44,8 +44,8 @@ #ifndef GUARD_CORE_KOKKOS_ROCM_CONFIG_HPP #define GUARD_CORE_KOKKOS_ROCM_CONFIG_HPP -#ifndef KOKKOS_ROCM_HAS_WORKAROUNDS -#define KOKKOS_ROCM_HAS_WORKAROUNDS 1 +#ifndef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND +#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1 #endif #endif diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp index 1c2bf303c..205e6a295 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp @@ -55,14 +55,14 @@ namespace Impl { struct ROCmTraits { // TODO: determine if needed - enum { WavefrontSize = 64 /* 64 */ }; - enum { WorkgroupSize = 64 /* 64 */ }; - enum { WavefrontIndexMask = 0x001f /* Mask for warpindex */ }; - enum { WavefrontIndexShift = 5 /* WarpSize == 1 << WarpShift */ }; + enum { WavefrontSize = 64 /* 64 */ }; + enum { WorkgroupSize = 256 /* 256 */ }; + enum { WavefrontIndexMask = 0x003f /* Mask for wavefrontindex */ }; + enum { WavefrontIndexShift = 6 /* WavefrontSize == 1 << WavefrontShift */ }; - enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ }; - enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ }; - enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ }; + enum { SharedMemoryBanks = 64 /* GCN */ }; + enum { SharedMemoryCapacity = 0x10000 /* 64k shared / 16k L1 Cache */ }; + enum { SharedMemoryUsage = 0x04000 /* 64k shared / 16k L1 Cache */ }; enum { UpperBoundExtentCount = 4294967295 /* Hard upper bound */ }; #if 0 @@ -84,6 +84,16 @@ size_t rocm_internal_maximum_workgroup_count(); size_t * rocm_internal_scratch_flags( const size_t size ); size_t * rocm_internal_scratch_space( const size_t size ); +// This pointer is the start of dynamic shared memory (LDS). +// Dynamic is at the end of LDS and it's size must be specified +// in a tile_block specification at kernel launch time. +template< typename T > +KOKKOS_INLINE_FUNCTION +T * kokkos_impl_rocm_shared_memory() +//{ return (T*) hc::get_group_segment_base_pointer() ; } +{ return (T*) hc::get_dynamic_group_segment_base_pointer() ; } + + } } // namespace Kokkos #define ROCM_SPACE_ATOMIC_MASK 0x1FFFF @@ -249,7 +259,6 @@ struct ROCmParallelLaunch< DriverType size_t bx = (grid.x > block.x)? block.x : grid.x; size_t by = (grid.y > block.y)? block.y : grid.y; size_t bz = (grid.z > block.z)? block.z : grid.z; - hc::parallel_for_each(ext.tile_with_dynamic(bz,by,bx,shmem), [=](const hc::index<3> & idx) [[hc]] diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp index 3ae312647..236042ccc 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp @@ -543,20 +543,13 @@ enum { sizeScratchGrain = sizeof(ScratchGrain) }; void rocmMemset( Kokkos::Experimental::ROCm::size_type * ptr , Kokkos::Experimental::ROCm::size_type value , Kokkos::Experimental::ROCm::size_type size) { char * mptr = (char * ) ptr; -#if 0 - parallel_for_each(hc::extent<1>(size), +/* parallel_for_each(hc::extent<1>(size), [=, &ptr] (hc::index<1> idx) __HC__ { int i = idx[0]; ptr[i] = value; - }).wait(); -#else - for (int i= 0; i<size ; i++) - { - mptr[i] = (char) value; - } -#endif + }).wait();*/ } Kokkos::Experimental::ROCm::size_type * @@ -567,9 +560,9 @@ ROCmInternal::scratch_flags( const Kokkos::Experimental::ROCm::size_type size ) m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ; - typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; + typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > Record ; - Record * const r = Record::allocate( Kokkos::HostSpace() + Record * const r = Record::allocate( Kokkos::Experimental::ROCmSpace() , "InternalScratchFlags" , ( sizeScratchGrain * m_scratchFlagsCount ) ); @@ -590,9 +583,9 @@ ROCmInternal::scratch_space( const Kokkos::Experimental::ROCm::size_type size ) m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ; - typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; + typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > Record ; - Record * const r = Record::allocate( Kokkos::HostSpace() + static Record * const r = Record::allocate( Kokkos::Experimental::ROCmSpace() , "InternalScratchSpace" , ( sizeScratchGrain * m_scratchSpaceCount ) ); @@ -616,7 +609,7 @@ void ROCmInternal::finalize() // scratch_lock_array_rocm_space_ptr(false); // threadid_lock_array_rocm_space_ptr(false); - typedef Kokkos::Impl::SharedAllocationRecord< HostSpace > RecordROCm ; + typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace > RecordROCm ; typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace > RecordHost ; RecordROCm::decrement( RecordROCm::get_record( m_scratchFlags ) ); diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp index 2978ae8f5..edd1c12e4 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp @@ -243,6 +243,15 @@ public: return(max); } + template< class FunctorType , class PatternTypeTag> + int team_size_max( const FunctorType& functor, PatternTypeTag) { + return 256/vector_length(); + } + template< class FunctorType , class PatternTypeTag> + int team_size_recommended( const FunctorType& functor, PatternTypeTag) { + return 128/vector_length(); + } + template<class F> KOKKOS_INLINE_FUNCTION int team_size(const F& f) const { return (m_team_size > 0) ? m_team_size : team_size_recommended(f); } KOKKOS_INLINE_FUNCTION int team_size() const { return (m_team_size > 0) ? m_team_size : Impl::get_max_tile_thread(); ; } @@ -261,6 +270,11 @@ public: return m_thread_scratch_size[level]; } + static int scratch_size_max(int level) { + return level==0 ? + 1024*40 : 1024*1204*20; + } + typedef Impl::ROCmTeamMember member_type; }; @@ -487,6 +501,7 @@ public: #endif } m_idx.barrier.wait(); + reducer.reference() = buffer[0]; } /** \brief Intra-team vector reduce @@ -541,19 +556,19 @@ public: } template< typename ReducerType > - KOKKOS_INLINE_FUNCTION static + KOKKOS_INLINE_FUNCTION typename std::enable_if< is_reducer< ReducerType >::value >::type - vector_reduce( ReducerType const & reducer ) + vector_reduce( ReducerType const & reducer ) const { #ifdef __HCC_ACCELERATOR__ - if(blockDim_x == 1) return; + if(m_vector_length == 1) return; // Intra vector lane shuffle reduction: typename ReducerType::value_type tmp ( reducer.reference() ); - for ( int i = blockDim_x ; ( i >>= 1 ) ; ) { - shfl_down( reducer.reference() , i , blockDim_x ); - if ( (int)threadIdx_x < i ) { reducer.join( tmp , reducer.reference() ); } + for ( int i = m_vector_length ; ( i >>= 1 ) ; ) { + reducer.reference() = shfl_down( tmp , i , m_vector_length ); + if ( (int)vector_rank() < i ) { reducer.join( tmp , reducer.reference() ); } } // Broadcast from root lane to all other lanes. @@ -561,7 +576,7 @@ public: // because floating point summation is not associative // and thus different threads could have different results. - shfl( reducer.reference() , 0 , blockDim_x ); + reducer.reference() = shfl( tmp , 0 , m_vector_length ); #endif } @@ -847,7 +862,7 @@ public: hc::extent< 1 > flat_extent( total_size ); - hc::tiled_extent< 1 > team_extent = flat_extent.tile(team_size*vector_length); + hc::tiled_extent< 1 > team_extent = flat_extent.tile(vector_length*team_size); hc::parallel_for_each( team_extent , [=](hc::tiled_index<1> idx) [[hc]] { rocm_invoke<typename Policy::work_tag>(f, typename Policy::member_type(idx, league_size, team_size, shared, shared_size, scratch_size0, scratch, scratch_size1,vector_length)); @@ -958,6 +973,176 @@ public: }; +//---------------------------------------------------------------------------- + +template< class FunctorType , class ReducerType, class... Traits > +class ParallelReduce< + FunctorType , Kokkos::MDRangePolicy< Traits... >, ReducerType, Kokkos::Experimental::ROCm > +{ +private: + typedef Kokkos::MDRangePolicy< Traits ... > Policy ; + using RP = Policy; + typedef typename Policy::array_index_type array_index_type; + typedef typename Policy::index_type index_type; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + typedef typename Policy::launch_bounds LaunchBounds; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ; + + +public: + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::value_type value_type ; + typedef typename ValueTraits::reference_type reference_type ; + typedef FunctorType functor_type ; + typedef Kokkos::Experimental::ROCm::size_type size_type ; + + // Algorithmic constraints: blockSize is a power of two AND blockDim.y == blockDim.z == 1 + + const FunctorType m_functor ; + const Policy m_policy ; // used for workrange and nwork + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + value_type * m_scratch_space ; + size_type * m_scratch_flags ; + + typedef typename Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank, Policy, FunctorType, typename Policy::work_tag, reference_type> DeviceIteratePattern; + + KOKKOS_INLINE_FUNCTION + void exec_range( reference_type update ) const + { + Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank,Policy,FunctorType,typename Policy::work_tag, reference_type>(m_policy, m_functor, update).exec_range(); + } + + + KOKKOS_INLINE_FUNCTION + void operator()(void) const + { + run(); + } + + KOKKOS_INLINE_FUNCTION + void run( ) const + { + const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(value_type) > + word_count( (ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) )) / sizeof(value_type) ); + // pointer to shared data accounts for the reserved space at the start + value_type * const shared = kokkos_impl_rocm_shared_memory<value_type>() + + 2*sizeof(uint64_t); + + { + reference_type value = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , shared + threadIdx_y * word_count.value ); + // Number of blocks is bounded so that the reduction can be limited to two passes. + // Each thread block is given an approximately equal amount of work to perform. + // Accumulate the values for this block. + // The accumulation ordering does not match the final pass, but is arithmatically equivalent. + + this-> exec_range( value ); + } + + // Reduce with final value at blockDim.y - 1 location. + // Problem: non power-of-two blockDim + + if ( rocm_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTagFwd>( + ReducerConditional::select(m_functor , m_reducer) , blockIdx_x , + gridDim_x , shared , m_scratch_space , m_scratch_flags ) ) { + + // This is the final block with the final result at the final threads' location + value_type * const tshared = shared + ( blockDim_y - 1 ) * word_count.value ; + value_type * const global = m_scratch_space ; + + if ( threadIdx_y == 0 ) { + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , tshared ); +// for ( unsigned i = 0 ; i < word_count.value ; i+=blockDim_y ) { global[i] = tshared[i]; } + for ( unsigned i = 0 ; i < word_count.value ; i++ ) { global[i] = tshared[i]; } + } + } + } + + + + // Determine block size constrained by shared memory: + static inline + unsigned local_block_size( const FunctorType & f ) + { + unsigned n = ROCmTraits::WavefrontSize * 8 ; + while ( n && ROCmTraits::SharedMemoryCapacity < rocm_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( f , n ) ) { n >>= 1 ; } + return n ; + } + + inline + void execute() + { + const int nwork = m_policy.m_num_tiles; + if ( nwork ) { + int block_size = m_policy.m_prod_tile_dims; + // CONSTRAINT: Algorithm requires block_size >= product of tile dimensions + // Nearest power of two + int exponent_pow_two = std::ceil( std::log2((float)block_size) ); + block_size = 1<<(exponent_pow_two); + + m_scratch_space = (value_type*)rocm_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size*nwork /* block_size == max block_count */ ); + m_scratch_flags = rocm_internal_scratch_flags( sizeof(size_type) ); + const dim3 block( 1 , block_size , 1 ); + // Required grid.x <= block.y + const dim3 grid( nwork, block_size , 1 ); + const int shmem = rocm_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( m_functor , block.y ); + + ROCmParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + + ROCM::fence(); + + if ( m_result_ptr ) { + const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); + DeepCopy<HostSpace,Kokkos::Experimental::ROCmSpace>( m_result_ptr , m_scratch_space , size ); + } + } + else { + if (m_result_ptr) { + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr ); + } + } + } + + + template< class HostViewType > + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const HostViewType & arg_result + , typename std::enable_if< + Kokkos::is_view< HostViewType >::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result.data() ) + , m_scratch_space( 0 ) + , m_scratch_flags( 0 ) + {} + + ParallelReduce( const FunctorType & arg_functor + , const Policy & arg_policy + , const ReducerType & reducer) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.view().data() ) + , m_scratch_space( 0 ) + , m_scratch_flags( 0 ) + {} + +}; +//---------------------------------------------------------------------------- + template< class FunctorType, class ReducerType, class... Traits > class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Traits... >, ReducerType, Kokkos::Experimental::ROCm > @@ -992,8 +1177,14 @@ public: const int scratch_size0 = policy.scratch_size(0,team_size); const int scratch_size1 = policy.scratch_size(1,team_size); const int total_size = league_size * team_size ; - - if(total_size == 0) return; + + typedef Kokkos::Impl::FunctorValueInit< FunctorType, typename Policy::work_tag > ValueInit ; + if(total_size==0) { + if (result_view.data()) { + ValueInit::init( f , result_view.data() ); + } + return; + } const int reduce_size = ValueTraits::value_size( f ); const int shared_size = FunctorTeamShmemSize< FunctorType >::value( f , team_size ); @@ -1042,7 +1233,16 @@ public: const int vector_length = policy.vector_length(); const int total_size = league_size * team_size; - if(total_size == 0) return; + typedef Kokkos::Impl::FunctorValueInit< ReducerType, typename Policy::work_tag > ValueInit ; + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, + FunctorType, ReducerType> ReducerConditional; + if(total_size==0) { + if (reducer.view().data()) { + ValueInit::init( ReducerConditional::select(f,reducer), + reducer.view().data() ); + } + return; + } const int reduce_size = ValueTraits::value_size( f ); const int shared_size = FunctorTeamShmemSize< FunctorType >::value( f , team_size ); @@ -1113,6 +1313,39 @@ public: //---------------------------------------- }; +template< class FunctorType , class ReturnType , class... Traits > +class ParallelScanWithTotal< FunctorType , Kokkos::RangePolicy< Traits... >, + ReturnType, Kokkos::Experimental::ROCm > +{ +private: + + typedef Kokkos::RangePolicy< Traits... > Policy; + typedef typename Policy::work_tag Tag; + typedef Kokkos::Impl::FunctorValueTraits< FunctorType, Tag> ValueTraits; + +public: + + //---------------------------------------- + + inline + ParallelScanWithTotal( const FunctorType & f + , const Policy & policy + , ReturnType & arg_returnvalue) + { + const auto len = policy.end()-policy.begin(); + + + if(len==0) return; + + scan_enqueue<Tag,ReturnType>(len, f, arg_returnvalue, [](hc::tiled_index<1> idx, int, int) { return idx.global[0]; }); + } + + KOKKOS_INLINE_FUNCTION + void execute() const {} + + //---------------------------------------- +}; + template< class FunctorType , class... Traits> class ParallelScan< FunctorType , Kokkos::TeamPolicy< Traits... >, Kokkos::Experimental::ROCm > { @@ -1350,22 +1583,17 @@ void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTe * val is performed and put into result. This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries, +typename std::enable_if< ! Kokkos::is_reducer< ValueType >::value >::type +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries, const Lambda & lambda, ValueType& result) { - result = ValueType(); + Kokkos::Sum<ValueType> reducer(result); + reducer.init( reducer.reference() ); for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; + lambda(i,reducer.reference()); } - result = loop_boundaries.thread.team_reduce(result, - Impl::JoinAdd<ValueType>()); -// Impl::rocm_intra_workgroup_reduction( loop_boundaries.thread, result, -// Impl::JoinAdd<ValueType>()); -// Impl::rocm_inter_workgroup_reduction( loop_boundaries.thread, result, -// Impl::JoinAdd<ValueType>()); + loop_boundaries.thread.team_reduce(reducer); } /** \brief Inter-thread thread range parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. @@ -1374,7 +1602,8 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROC * val is performed and put into result. This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ReducerType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries, +typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries, const Lambda & lambda, ReducerType const & reducer) { reducer.init( reducer.reference() ); @@ -1439,7 +1668,8 @@ void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCm * val is performed and put into result. This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >& +typename std::enable_if< !Kokkos::is_reducer< ValueType >::value >::type +parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >& loop_boundaries, const Lambda & lambda, ValueType& result) { result = ValueType(); @@ -1477,7 +1707,8 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::R * val is performed and put into result. This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ReducerType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >& +typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >& loop_boundaries, const Lambda & lambda, ReducerType const & reducer) { reducer.init( reducer.reference() ); @@ -1523,86 +1754,46 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROC typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; typedef typename ValueTraits::value_type value_type ; - value_type scan_val = value_type(); -#if (__ROCM_ARCH__ >= 800) -// adopt the cuda vector shuffle method - const int VectorLength = loop_boundaries.increment; - int lid = loop_boundaries.thread.lindex(); - int vector_rank = lid%VectorLength; - - iType loop_bound = ((loop_boundaries.end+VectorLength-1)/VectorLength) * VectorLength; - value_type val ; - for(int _i = vector_rank; _i < loop_bound; _i += VectorLength) { - val = value_type(); - if(_i<loop_boundaries.end) - lambda(_i , val , false); - - value_type tmp = val; - value_type result_i; - - if(vector_rank == 0) - result_i = tmp; - if (VectorLength > 1) { - const value_type tmp2 = shfl_up(tmp, 1,VectorLength); - if(vector_rank > 0) - tmp+=tmp2; - } - if(vector_rank == 1) - result_i = tmp; - if (VectorLength > 3) { - const value_type tmp2 = shfl_up(tmp, 2,VectorLength); - if(vector_rank > 1) - tmp+=tmp2; - } - if ((vector_rank >= 2) && - (vector_rank < 4)) - result_i = tmp; - if (VectorLength > 7) { - const value_type tmp2 = shfl_up(tmp, 4,VectorLength); - if(vector_rank > 3) - tmp+=tmp2; - } - if ((vector_rank >= 4) && - (vector_rank < 8)) - result_i = tmp; - if (VectorLength > 15) { - const value_type tmp2 = shfl_up(tmp, 8,VectorLength); - if(vector_rank > 7) - tmp+=tmp2; - } - if ((vector_rank >= 8) && - (vector_rank < 16)) - result_i = tmp; - if (VectorLength > 31) { - const value_type tmp2 = shfl_up(tmp, 16,VectorLength); - if(vector_rank > 15) - tmp+=tmp2; - } - if ((vector_rank >=16) && - (vector_rank < 32)) - result_i = tmp; - if (VectorLength > 63) { - const value_type tmp2 = shfl_up(tmp, 32,VectorLength); - if(vector_rank > 31) - tmp+=tmp2; + value_type val = value_type(); + const int vector_length = loop_boundaries.thread.vector_length(); + const int vector_rank = loop_boundaries.thread.vector_rank(); + + iType end = ((loop_boundaries.end+vector_length-1)/vector_length) * vector_length; + value_type accum = value_type(); + + for ( int i = vector_rank ; i < end ; i += vector_length ) { + + value_type val = 0 ; + + // First acquire per-lane contributions: + if ( i < loop_boundaries.end ) lambda( i , val , false ); + + value_type sval = val ; + + // Bottom up inclusive scan in triangular pattern + // where each thread is the root of a reduction tree + // from the zeroth "lane" to itself. + // [t] += [t-1] if t >= 1 + // [t] += [t-2] if t >= 2 + // [t] += [t-4] if t >= 4 + // ... + + for ( int j = 1 ; j < vector_length ; j <<= 1 ) { + value_type tmp = 0 ; + tmp = shfl_up(sval , j , vector_length ); + if ( j <= vector_rank ) { sval += tmp ; } } - if (vector_rank >= 32) - result_i = tmp; + // Include accumulation and remove value for exclusive scan: + val = accum + sval - val ; - val = scan_val + result_i - val; - scan_val += shfl(tmp,VectorLength-1,VectorLength); - if(_i<loop_boundaries.end) - lambda(_i , val , true); - } -#else -// for kaveri, call the LDS based thread_scan routine - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,scan_val,true); - } - scan_val = loop_boundaries.thread.team_scan(scan_val); + // Provide exclusive scan value: + if ( i < loop_boundaries.end ) lambda( i , val , true ); -#endif + // Accumulate the last value in the inclusive scan: + sval = shfl( sval , vector_length-1 , vector_length); + accum += sval ; + } } } // namespace Kokkos diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp index 7dd69e757..0321f3d53 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp @@ -57,7 +57,6 @@ #include <ROCm/Kokkos_ROCm_Tile.hpp> #include <ROCm/Kokkos_ROCm_Invoke.hpp> #include <ROCm/Kokkos_ROCm_Join.hpp> - ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// namespace Kokkos { @@ -75,7 +74,7 @@ T& reduce_value(T* x, std::false_type) [[hc]] return *x; } -#if KOKKOS_ROCM_HAS_WORKAROUNDS +#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND struct always_true { template<class... Ts> @@ -149,7 +148,7 @@ void reduce_enqueue( // Store the tile result in the global memory. if (local == 0) { -#if KOKKOS_ROCM_HAS_WORKAROUNDS +#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND // Workaround for assigning from LDS memory: std::copy should work // directly buffer.action_at(0, [&](T* x) @@ -158,7 +157,7 @@ void reduce_enqueue( // new ROCM 15 address space changes aren't implemented in std algorithms yet auto * src = reinterpret_cast<char *>(x); auto * dest = reinterpret_cast<char *>(result.data()+tile*output_length); - for(int i=0; i<sizeof(T);i++) dest[i] = src[i]; + for(int i=0; i<sizeof(T)*output_length;i++) dest[i] = src[i]; #else // Workaround: copy_if used to avoid memmove std::copy_if(x, x+output_length, result.data()+tile*output_length, always_true{} ); @@ -169,12 +168,10 @@ void reduce_enqueue( #endif } - }); if (output_result != nullptr) ValueInit::init(ReducerConditional::select(f, reducer), output_result); fut.wait(); - copy(result,result_cpu.data()); if (output_result != nullptr) { for(std::size_t i=0;i<td.num_tiles;i++) diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp index 3f67089b9..33efa0d6f 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp @@ -62,6 +62,76 @@ namespace Kokkos { namespace Impl { +//#if __KALMAR_ACCELERATOR__ == 1 +KOKKOS_INLINE_FUNCTION +void __syncthreads() [[hc]] +{ + amp_barrier(CLK_LOCAL_MEM_FENCE); +} + +#define LT0 ((threadIdx_x+threadIdx_y+threadIdx_z)?0:1) + + +// returns non-zero if and only if predicate is non-zero for all threads +// note that syncthreads_or uses the first 64 bits of dynamic group memory. +// this reserved memory must be accounted for everwhere +// that get_dynamic_group_segment_base_pointer is called. +KOKKOS_INLINE_FUNCTION +uint64_t __syncthreads_or(uint64_t pred) +{ + uint64_t *shared_var = (uint64_t *)hc::get_dynamic_group_segment_base_pointer(); + if(LT0) *shared_var = 0; + amp_barrier(CLK_LOCAL_MEM_FENCE); +#if __KALMAR_ACCELERATOR__ == 1 + if (pred) hc::atomic_or_uint64(shared_var,1); +#endif + amp_barrier(CLK_LOCAL_MEM_FENCE); + return (*shared_var); +} + +KOKKOS_INLINE_FUNCTION +void __threadfence() +{ + amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); +} + +KOKKOS_INLINE_FUNCTION +void __threadfence_block() +{ + amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); +} +//#endif +struct ROCm_atomic_CAS { + template<class OP> + KOKKOS_INLINE_FUNCTION + unsigned long operator () (volatile unsigned long * dest, OP &&op){ + unsigned long read,compare,val; + compare = *dest; + read = compare; + do { + compare = read; + val = op(compare); +#if __KALMAR_ACCELERATOR__ == 1 + hc::atomic_compare_exchange((uint64_t *)dest,&read,val); +#endif + } while (read != compare); + return val; + } +}; + + template<class OP> + KOKKOS_INLINE_FUNCTION + unsigned long atomic_cas_op (volatile unsigned long * dest, OP &&op) { + ROCm_atomic_CAS cas_op; + return cas_op(dest, std::forward<OP>(op)); + } + + KOKKOS_INLINE_FUNCTION + unsigned long atomicInc (volatile unsigned long * dest, const unsigned long& val) { + return atomic_cas_op(dest, [=](unsigned long old){return ((old>=val)?0:(old+1));}); + } + + //---------------------------------------------------------------------------- template< typename T > @@ -375,18 +445,7 @@ bool rocm_inter_block_reduction( ROCmTeamMember& team, #endif } #endif -#if 0 -//---------------------------------------------------------------------------- -// See section B.17 of ROCm C Programming Guide Version 3.2 -// for discussion of -// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) -// function qualifier which could be used to improve performance. -//---------------------------------------------------------------------------- -// Maximize shared memory and minimize L1 cache: -// rocmFuncSetCacheConfig(MyKernel, rocmFuncCachePreferShared ); -// For 2.0 capability: 48 KB shared and 16 KB L1 -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- /* * Algorithmic constraints: @@ -406,87 +465,105 @@ void rocm_intra_block_reduce_scan( const FunctorType & functor , typedef typename ValueTraits::pointer_type pointer_type ; const unsigned value_count = ValueTraits::value_count( functor ); - const unsigned BlockSizeMask = team.team_size() - 1 ; + const unsigned BlockSizeMask = blockDim_y - 1 ; // Must have power of two thread count - if ( BlockSizeMask & team.team_size() ) { Kokkos::abort("ROCm::rocm_intra_block_scan requires power-of-two blockDim"); } + if ( BlockSizeMask & blockDim_y ) { Kokkos::abort("ROCm::rocm_intra_block_scan requires power-of-two blockDim"); } #define BLOCK_REDUCE_STEP( R , TD , S ) \ - if ( ! ( R & ((1<<(S+1))-1) ) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); } + if ( ! (( R & ((1<<(S+1))-1) )|(blockDim_y<(1<<(S+1)))) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); } #define BLOCK_SCAN_STEP( TD , N , S ) \ if ( N == (1<<S) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S))); } +#define KOKKOS_IMPL_ROCM_SYNCWF __threadfence_block() - const unsigned rtid_intra = team.team_rank() ^ BlockSizeMask ; - const pointer_type tdata_intra = base_data + value_count * team.team_rank() ; + const unsigned rtid_intra = threadIdx_y ^ BlockSizeMask ; + const pointer_type tdata_intra = base_data + value_count * threadIdx_y ; - { // Intra-workgroup reduction: + { // Intra-workgroup reduction: min blocksize of 64 + KOKKOS_IMPL_ROCM_SYNCWF; BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,0) + KOKKOS_IMPL_ROCM_SYNCWF; BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,1) + KOKKOS_IMPL_ROCM_SYNCWF; BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,2) + KOKKOS_IMPL_ROCM_SYNCWF; BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,3) + KOKKOS_IMPL_ROCM_SYNCWF; BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,4) + KOKKOS_IMPL_ROCM_SYNCWF; + BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,5) + KOKKOS_IMPL_ROCM_SYNCWF; } - team.team_barrier(); // Wait for all workgroups to reduce + __syncthreads(); // Wait for all workgroups to reduce { // Inter-workgroup reduce-scan by a single workgroup to avoid extra synchronizations - const unsigned rtid_inter = ( team.team_rank() ^ BlockSizeMask ) << ROCmTraits::WarpIndexShift ; + if(threadIdx_y < value_count) { + for(int i=blockDim_y-65; i>0; i-= 64) + ValueJoin::join( functor , base_data + (blockDim_y-1)*value_count + threadIdx_y , base_data + i*value_count + threadIdx_y ); + } + __syncthreads(); +#if 0 + const unsigned rtid_inter = ( threadIdx_y ^ BlockSizeMask ) << ROCmTraits::WavefrontIndexShift ; + + if ( rtid_inter < blockDim_y ) { - if ( rtid_inter < team.team_size() ) { const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask ); +// +// remove these comments +// for rocm, we start with a block size of 64, so the 5 step is already done. +// The remaining steps are only done if block size is > 64, so we leave them +// in place until we tune blocksize for performance, then remove the ones +// that will never be used. +// if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) } +// if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) } +// if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) } +// if ( (1<<9) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,9) } - if ( (1<<5) < BlockSizeMask ) { BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) } - if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) } - if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) } - if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) } if ( DoScan ) { - int n = ( rtid_inter & 32 ) ? 32 : ( - ( rtid_inter & 64 ) ? 64 : ( + int n = ( rtid_inter & 64 ) ? 64 : ( ( rtid_inter & 128 ) ? 128 : ( - ( rtid_inter & 256 ) ? 256 : 0 ))); + ( rtid_inter & 256 ) ? 256 : 0 )); - if ( ! ( rtid_inter + n < team.team_size() ) ) n = 0 ; + if ( ! ( rtid_inter + n < blockDim_y ) ) n = 0 ; __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,8) __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,7) __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,6) - __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,5) +// __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,5) } } +#endif } - team.team_barrier(); // Wait for inter-workgroup reduce-scan to complete + __syncthreads(); // Wait for inter-workgroup reduce-scan to complete if ( DoScan ) { int n = ( rtid_intra & 1 ) ? 1 : ( ( rtid_intra & 2 ) ? 2 : ( ( rtid_intra & 4 ) ? 4 : ( ( rtid_intra & 8 ) ? 8 : ( - ( rtid_intra & 16 ) ? 16 : 0 )))); - - if ( ! ( rtid_intra + n < team.team_size() ) ) n = 0 ; - #ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND - BLOCK_SCAN_STEP(tdata_intra,n,4) team.team_barrier();//__threadfence_block(); - BLOCK_SCAN_STEP(tdata_intra,n,3) team.team_barrier();//__threadfence_block(); - BLOCK_SCAN_STEP(tdata_intra,n,2) team.team_barrier();//__threadfence_block(); - BLOCK_SCAN_STEP(tdata_intra,n,1) team.team_barrier();//__threadfence_block(); - BLOCK_SCAN_STEP(tdata_intra,n,0) team.team_barrier(); - #else - BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block(); + ( rtid_intra & 16 ) ? 16 : ( + ( rtid_intra & 32 ) ? 32 : 0 ))))); + + if ( ! ( rtid_intra + n < blockDim_y ) ) n = 0 ; + +// BLOCK_SCAN_STEP(tdata_intra,n,5) __threadfence_block(); +// BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block(); BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block(); BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block(); BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block(); BLOCK_SCAN_STEP(tdata_intra,n,0) __threadfence_block(); - #endif } #undef BLOCK_SCAN_STEP #undef BLOCK_REDUCE_STEP +#undef KOKKOS_IMPL_ROCM_SYNCWF } //---------------------------------------------------------------------------- @@ -497,16 +574,18 @@ void rocm_intra_block_reduce_scan( const FunctorType & functor , * * Global reduce result is in the last threads' 'shared_data' location. */ +using ROCM = Kokkos::Experimental::ROCm ; + template< bool DoScan , class FunctorType , class ArgTag > KOKKOS_INLINE_FUNCTION bool rocm_single_inter_block_reduce_scan( const FunctorType & functor , - const ROCm::size_type block_id , - const ROCm::size_type block_count , - ROCm::size_type * const shared_data , - ROCm::size_type * const global_data , - ROCm::size_type * const global_flags ) + const ROCM::size_type block_id , + const ROCM::size_type block_count , + typename FunctorValueTraits<FunctorType, ArgTag>::value_type * const shared_data , + typename FunctorValueTraits<FunctorType, ArgTag>::value_type * const global_data , + ROCM::size_type * const global_flags ) { - typedef ROCm::size_type size_type ; + typedef ROCM::size_type size_type ; typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ; typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ; typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ; @@ -517,16 +596,17 @@ bool rocm_single_inter_block_reduce_scan( const FunctorType & functor , typedef typename ValueTraits::value_type value_type ; // '__ffs' = position of the least significant bit set to 1. - // 'team.team_size()' is guaranteed to be a power of two so this + // blockDim_y is guaranteed to be a power of two so this // is the integral shift value that can replace an integral divide. - const unsigned BlockSizeShift = __ffs( team.team_size() ) - 1 ; - const unsigned BlockSizeMask = team.team_size() - 1 ; + // const unsigned long BlockSizeShift = __ffs( blockDim_y ) - 1 ; + const unsigned long BlockSizeShift = __lastbit_u32_u32( blockDim_y ) ; + const unsigned long BlockSizeMask = blockDim_y - 1 ; // Must have power of two thread count - if ( BlockSizeMask & team.team_size() ) { Kokkos::abort("ROCm::rocm_single_inter_block_reduce_scan requires power-of-two blockDim"); } + if ( BlockSizeMask & blockDim_y ) { Kokkos::abort("ROCm::rocm_single_inter_block_reduce_scan requires power-of-two blockDim"); } - const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > - word_count( ValueTraits::value_size( functor ) / sizeof(size_type) ); + const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(value_type) > + word_count( ValueTraits::value_size( functor )/ sizeof(value_type) ); // Reduce the accumulation for the entire block. rocm_intra_block_reduce_scan<false,FunctorType,ArgTag>( functor , pointer_type(shared_data) ); @@ -534,54 +614,47 @@ bool rocm_single_inter_block_reduce_scan( const FunctorType & functor , { // Write accumulation total to global scratch space. // Accumulation total is the last thread's data. - size_type * const shared = shared_data + word_count.value * BlockSizeMask ; - size_type * const global = global_data + word_count.value * block_id ; - -#if (__ROCM_ARCH__ < 500) - for ( size_type i = team.team_rank() ; i < word_count.value ; i += team.team_size() ) { global[i] = shared[i] ; } -#else - for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; } -#endif + value_type * const shared = shared_data + + word_count.value * BlockSizeMask ; + value_type * const global = global_data + word_count.value * block_id ; + for ( int i = int(threadIdx_y) ; i < word_count.value ; i += blockDim_y ) { global[i] = shared[i] ; } } // Contributing blocks note that their contribution has been completed via an atomic-increment flag // If this block is not the last block to contribute to this group then the block is done. - team.team_barrier(); + const bool is_last_block = - ! team.team_reduce( team.team_rank() ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) ,Impl::JoinAdd<ValueType>()); - + ! __syncthreads_or( threadIdx_y ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) ); if ( is_last_block ) { - const size_type b = ( long(block_count) * long(team.team_rank()) ) >> BlockSizeShift ; - const size_type e = ( long(block_count) * long( team.team_rank() + 1 ) ) >> BlockSizeShift ; + const size_type b = ( long(block_count) * long(threadIdx_y )) >> BlockSizeShift ; + const size_type e = ( long(block_count) * long(threadIdx_y + 1 ) ) >> BlockSizeShift ; { - void * const shared_ptr = shared_data + word_count.value * team.team_rank() ; - reference_type shared_value = ValueInit::init( functor , shared_ptr ); + value_type * const shared_ptr = shared_data + word_count.value * threadIdx_y ; + ValueInit::init( functor , shared_ptr ); + for ( size_type i = b ; i < e ; ++i ) { ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i ); } } - rocm_intra_block_reduce_scan<DoScan,FunctorType,ArgTag>( functor , pointer_type(shared_data) ); if ( DoScan ) { + value_type * const shared_value = shared_data + word_count.value * ( threadIdx_y ? threadIdx_y - 1 : blockDim_y ); - size_type * const shared_value = shared_data + word_count.value * ( team.team_rank() ? team.team_rank() - 1 : team.team_size() ); - - if ( ! team.team_rank() ) { ValueInit::init( functor , shared_value ); } + if ( ! threadIdx_y ) { ValueInit::init( functor , shared_value ); } // Join previous inclusive scan value to each member for ( size_type i = b ; i < e ; ++i ) { - size_type * const global_value = global_data + word_count.value * i ; + value_type * const global_value = global_data + word_count.value * i ; ValueJoin::join( functor , shared_value , global_value ); ValueOps ::copy( functor , global_value , shared_value ); } } } - return is_last_block ; } @@ -592,7 +665,6 @@ unsigned rocm_single_inter_block_reduce_scan_shmem( const FunctorType & functor { return ( BlockSize + 2 ) * Impl::FunctorValueTraits< FunctorType , ArgTag >::value_size( functor ); } -#endif } // namespace Impl } // namespace Kokkos diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp index 9890598bc..f24db42ce 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp @@ -98,7 +98,109 @@ void scan_enqueue( { auto j = i + d - 1; auto k = i + d2 - 1; -// join(k, j); // no longer needed with ROCm 1.6 + + ValueJoin::join(f, &buffer[k], &buffer[j]); + } + } + t_idx.barrier.wait(); + + result[tile] = buffer[buffer.size()-1]; + buffer[buffer.size()-1] = 0; + // Down sweep phase + for(std::size_t d=buffer.size()/2;d>0;d/=2) + { + auto d2 = 2*d; + auto i = local*d2; + if(i<len) + { + auto j = i + d - 1; + auto k = i + d2 - 1; + auto t = buffer[k]; + + ValueJoin::join(f, &buffer[k], &buffer[j]); + buffer[j] = t; + } + t_idx.barrier.wait(); + } + // Copy tiles into global memory + if (global < len) scratch[global] = buffer[local]; + }).wait(); + copy(result,result_cpu.data()); + + for(int i=1; i<td.num_tiles; i++) + ValueJoin::join(f, &result_cpu[i], &result_cpu[i-1]); + + copy(result_cpu.data(),result); + size_t launch_len = (((len - 1) / td.tile_size) + 1) * td.tile_size; + hc::parallel_for_each(hc::extent<1>(launch_len).tile(td.tile_size), [&,f,len,td](hc::tiled_index<1> t_idx) [[hc]] + { + const auto global = t_idx.global[0]; + const auto tile = t_idx.tile[0]; + + if (global < len) + { + auto final_state = scratch[global]; + + if (tile != 0) ValueJoin::join(f, &final_state, &result[tile-1]); + rocm_invoke<Tag>(f, transform_index(t_idx, td.tile_size, td.num_tiles), final_state, true); + } + }).wait(); +} + +template< class Tag, class ReturnType, class F, class TransformIndex> +void scan_enqueue( + const int len, + const F & f, + ReturnType & return_val, + TransformIndex transform_index) +{ + typedef Kokkos::Impl::FunctorValueTraits< F, Tag> ValueTraits; + typedef Kokkos::Impl::FunctorValueInit< F, Tag> ValueInit; + typedef Kokkos::Impl::FunctorValueJoin< F, Tag> ValueJoin; + typedef Kokkos::Impl::FunctorValueOps< F, Tag> ValueOps; + + typedef typename ValueTraits::value_type value_type; + typedef typename ValueTraits::pointer_type pointer_type; + typedef typename ValueTraits::reference_type reference_type; + + const auto td = get_tile_desc<value_type>(len); + std::vector<value_type> result_cpu(td.num_tiles); + hc::array<value_type> result(td.num_tiles); + hc::array<value_type> scratch(len); + std::vector<ReturnType> total_cpu(1); + hc::array<ReturnType> total(1); + + tile_for<value_type>(td, [&,f,len,td](hc::tiled_index<1> t_idx, tile_buffer<value_type> buffer) [[hc]] + { + const auto local = t_idx.local[0]; + const auto global = t_idx.global[0]; + const auto tile = t_idx.tile[0]; + + // Join tile buffer elements + const auto join = [&](std::size_t i, std::size_t j) + { + buffer.action_at(i, j, [&](value_type& x, const value_type& y) + { + ValueJoin::join(f, &x, &y); + }); + }; + + // Copy into tile + buffer.action_at(local, [&](value_type& state) + { + ValueInit::init(f, &state); + if (global < len) rocm_invoke<Tag>(f, transform_index(t_idx, td.tile_size, td.num_tiles), state, false); + }); + t_idx.barrier.wait(); + // Up sweep phase + for(std::size_t d=1;d<buffer.size();d*=2) + { + auto d2 = 2*d; + auto i = local*d2; + if(i<len) + { + auto j = i + d - 1; + auto k = i + d2 - 1; ValueJoin::join(f, &buffer[k], &buffer[j]); } } @@ -116,7 +218,6 @@ void scan_enqueue( auto j = i + d - 1; auto k = i + d2 - 1; auto t = buffer[k]; -// join(k, j); // no longer needed with ROCm 1.6 ValueJoin::join(f, &buffer[k], &buffer[j]); buffer[j] = t; } @@ -127,17 +228,13 @@ void scan_enqueue( }).wait(); copy(result,result_cpu.data()); -// The std::partial_sum was segfaulting, despite that this is cpu code. -// if(td.num_tiles>1) -// std::partial_sum(result_cpu.data(), result_cpu.data()+(td.num_tiles-1)*sizeof(value_type), result_cpu.data(), make_join_operator<ValueJoin>(f)); -// use this implementation instead. for(int i=1; i<td.num_tiles; i++) ValueJoin::join(f, &result_cpu[i], &result_cpu[i-1]); copy(result_cpu.data(),result); - hc::parallel_for_each(hc::extent<1>(len).tile(td.tile_size), [&,f,len,td](hc::tiled_index<1> t_idx) [[hc]] + size_t launch_len = (((len - 1) / td.tile_size) + 1) * td.tile_size; + hc::parallel_for_each(hc::extent<1>(launch_len).tile(td.tile_size), [&,f,len,td](hc::tiled_index<1> t_idx) [[hc]] { -// const auto local = t_idx.local[0]; const auto global = t_idx.global[0]; const auto tile = t_idx.tile[0]; @@ -145,12 +242,13 @@ void scan_enqueue( { auto final_state = scratch[global]; -// the join is locking up, at least with 1.6 - if (tile != 0) final_state += result[tile-1]; -// if (tile != 0) ValueJoin::join(f, &final_state, &result[tile-1]); + if (tile != 0) ValueJoin::join(f, &final_state, &result[tile-1]); rocm_invoke<Tag>(f, transform_index(t_idx, td.tile_size, td.num_tiles), final_state, true); + if(global==(len-1)) total[0] = final_state; } }).wait(); + copy(total,total_cpu.data()); + return_val = total_cpu[0]; } } // namespace Impl diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp index 12f34373c..2fe0c4192 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp @@ -362,6 +362,8 @@ SharedAllocationRecord( const Kokkos::Experimental::ROCmSpace & arg_space , arg_label.c_str() , SharedAllocationHeader::maximum_label_length ); + // Set last element zero, in case c_str is too long + header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; // Copy to device memory Kokkos::Impl::DeepCopy<Kokkos::Experimental::ROCmSpace,HostSpace>( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) ); @@ -399,6 +401,8 @@ SharedAllocationRecord( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_sp , arg_label.c_str() , SharedAllocationHeader::maximum_label_length ); + // Set last element zero, in case c_str is too long + RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; } //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp index db16db3f9..b4436ae15 100644 --- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp +++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp @@ -278,7 +278,7 @@ struct single_action void action_at(std::size_t i, Action a) [[hc]] { auto& value = static_cast<Derived&>(*this)[i]; -#if KOKKOS_ROCM_HAS_WORKAROUNDS +#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND T state = value; a(state); value = state; @@ -347,7 +347,7 @@ struct tile_buffer<T[]> #if defined (ROCM15) a(value); #else -#if KOKKOS_ROCM_HAS_WORKAROUNDS +#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND if (m > get_max_tile_array_size()) return; T state[get_max_tile_array_size()]; // std::copy(value, value+m, state); @@ -372,7 +372,6 @@ struct tile_buffer<T[]> #if defined (ROCM15) a(value); #else -//#if KOKKOS_ROCM_HAS_WORKAROUNDS if (m > get_max_tile_array_size()) return; T state[get_max_tile_array_size()]; // std::copy(value, value+m, state); diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index e63f868c5..e88abdba5 100644 --- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -175,6 +175,27 @@ public: #endif } + template<class Closure, class ValueType> + KOKKOS_INLINE_FUNCTION + void team_broadcast(Closure const & f, ValueType& value, const int& thread_id) const + { +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { } +#else + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE + , ValueType , void >::type type ; + f( value ); + if ( m_team_base ) { + type * const local_value = ((type*) m_team_base[0]->scratch_memory()); + if(team_rank() == thread_id) *local_value = value; + memory_fence(); + team_barrier(); + value = *local_value; + } +#endif + } + template< typename Type > KOKKOS_INLINE_FUNCTION typename std::enable_if< !Kokkos::is_reducer< Type >::value , Type>::type @@ -626,39 +647,77 @@ public: //---------------------------------------- +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > inline static int team_size_max( const FunctorType & ) { -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - int pool_size = traits::execution_space::thread_pool_size(1); -#else - int pool_size = traits::execution_space::impl_thread_pool_size(1); -#endif - int max_host_team_size = Impl::HostThreadTeamData::max_team_members; - return pool_size<max_host_team_size?pool_size:max_host_team_size; - } - + int pool_size = traits::execution_space::thread_pool_size(1); + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size<max_host_team_size?pool_size:max_host_team_size; + } template< class FunctorType > - static int team_size_recommended( const FunctorType & ) - { -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - return traits::execution_space::thread_pool_size(2); -#else - return traits::execution_space::impl_thread_pool_size(2); -#endif - } - + inline static + int team_size_recommended( const FunctorType & ) + { + return traits::execution_space::thread_pool_size(2); + } template< class FunctorType > inline static int team_size_recommended( const FunctorType &, const int& ) - { + { + return traits::execution_space::thread_pool_size(2); + } +#endif + + template<class FunctorType> + int team_size_max( const FunctorType&, const ParallelForTag& ) const { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + int pool_size = traits::execution_space::thread_pool_size(1); +#else + int pool_size = traits::execution_space::impl_thread_pool_size(1); +#endif + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size<max_host_team_size?pool_size:max_host_team_size; + } + template<class FunctorType> + int team_size_max( const FunctorType&, const ParallelReduceTag& ) const { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - return traits::execution_space::thread_pool_size(2); + int pool_size = traits::execution_space::thread_pool_size(1); #else - return traits::execution_space::impl_thread_pool_size(2); + int pool_size = traits::execution_space::impl_thread_pool_size(1); #endif + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size<max_host_team_size?pool_size:max_host_team_size; + } + template<class FunctorType> + int team_size_recommended( const FunctorType&, const ParallelForTag& ) const { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + return traits::execution_space::thread_pool_size(2); +#else + return traits::execution_space::impl_thread_pool_size(2); +#endif + } + template<class FunctorType> + int team_size_recommended( const FunctorType&, const ParallelReduceTag& ) const { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + return traits::execution_space::thread_pool_size(2); +#else + return traits::execution_space::impl_thread_pool_size(2); +#endif + } + + + inline static + int vector_length_max() + { return 1024; } // Use arbitrary large number, is meant as a vectorizable length + + inline static + int scratch_size_max(int level) + { return (level==0? + 1024*32: // Roughly L1 size + 20*1024*1024); // Limit to keep compatibility with CUDA } //---------------------------------------- diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp similarity index 100% rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Makefile.eti_Experimental::ROCm b/packages/kokkos/core/src/eti/ROCm/Makefile.eti_Experimental::ROCm deleted file mode 100644 index 054360fd1..000000000 --- a/packages/kokkos/core/src/eti/ROCm/Makefile.eti_Experimental::ROCm +++ /dev/null @@ -1,288 +0,0 @@ -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp -Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/eti/ROCm/Makefile.eti_ROCm b/packages/kokkos/core/src/eti/ROCm/Makefile.eti_ROCm new file mode 100644 index 000000000..0423c6feb --- /dev/null +++ b/packages/kokkos/core/src/eti/ROCm/Makefile.eti_ROCm @@ -0,0 +1,288 @@ +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp +Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index ad115dd8f..e2028db8c 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -107,7 +107,12 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK; + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -119,7 +124,11 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare , done = 1; } } - done_active = KOKKOS_IMPL_CUDA_BALLOT(done); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done); +#else + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done); +#endif } return return_val; } diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp index 801a8091d..4e41cb125 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -130,7 +130,12 @@ T atomic_exchange( volatile T * const dest , #endif int done = 0; - unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK; + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -141,7 +146,11 @@ T atomic_exchange( volatile T * const dest , done = 1; } } - done_active = KOKKOS_IMPL_CUDA_BALLOT(done); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done); +#else + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done); +#endif } return return_val; } diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index 8249e709d..e2e23bb5f 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -143,7 +143,12 @@ T atomic_fetch_add( volatile T * const dest , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK; + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -155,7 +160,12 @@ T atomic_fetch_add( volatile T * const dest , done = 1; } } - done_active = KOKKOS_IMPL_CUDA_BALLOT(done); + +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done); +#else + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done); +#endif } return return_val; } diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index 3f58c5539..dd69c967c 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -135,7 +135,12 @@ T atomic_fetch_sub( volatile T * const dest , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK; + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -146,7 +151,11 @@ T atomic_fetch_sub( volatile T * const dest , done = 1; } } - done_active = KOKKOS_IMPL_CUDA_BALLOT(done); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done); +#else + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done); +#endif } return return_val; } diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp index 6140d4589..74e9db303 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -246,7 +246,12 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , // This is a way to (hopefully) avoid dead lock in a warp T return_val; int done = 0; - unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK; + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -257,7 +262,11 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , done=1; } } - done_active = KOKKOS_IMPL_CUDA_BALLOT(done); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done); +#else + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done); +#endif } return return_val; #endif @@ -285,7 +294,12 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK; + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1); +#else + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1); +#endif unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -296,7 +310,11 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , done=1; } } - done_active = KOKKOS_IMPL_CUDA_BALLOT(done); +#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done); +#else + done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done); +#endif } return return_val; #endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp index eeec2d1f4..b18134f40 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp @@ -45,13 +45,17 @@ #ifdef _WIN32 +#ifndef NOMINMAX #define NOMINMAX +#endif #include <winsock2.h> -#include <Windows.h> +#include <windows.h> namespace Kokkos { namespace Impl { +#ifdef _MSC_VER _declspec(align(16)) +#endif struct cas128_t { LONGLONG lower; @@ -60,7 +64,11 @@ namespace Kokkos { bool operator != (const cas128_t& a) const { return (lower != a.lower) || upper != a.upper; } - }; + } +#ifdef __GNUC__ + __attribute__ ((aligned (16))) +#endif + ; } template < typename T > diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp index 8184dad3e..628e070a0 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp @@ -780,8 +780,20 @@ void print_configuration( std::ostream & out , const bool detail ) #else msg << "no" << std::endl; #endif - msg << " KOKKOS_ENABLE_CXX1Z: "; -#ifdef KOKKOS_ENABLE_CXX1Z + msg << " KOKKOS_ENABLE_CXX14: "; +#ifdef KOKKOS_ENABLE_CXX14 + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CXX17: "; +#ifdef KOKKOS_ENABLE_CXX17 + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CXX20: "; +#ifdef KOKKOS_ENABLE_CXX20 msg << "yes" << std::endl; #else msg << "no" << std::endl; diff --git a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index 699902e32..680e937db 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -235,6 +235,8 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space , arg_label.c_str() , SharedAllocationHeader::maximum_label_length ); + // Set last element zero, in case c_str is too long + RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; } //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp index da9ce6b9f..d8cb7593b 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -356,6 +356,8 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space , arg_label.c_str() , SharedAllocationHeader::maximum_label_length ); + // Set last element zero, in case c_str is too long + RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; } //---------------------------------------------------------------------------- diff --git a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index 558eef9e4..fff48e87f 100644 --- a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -144,6 +144,30 @@ public: return m_team_rank == 0; } + + inline + bool team_rendezvous(const int source_team_rank) const noexcept + { + int * ptr = (int *)(m_team_scratch + m_team_rendezvous); + HostBarrier::split_arrive( ptr + , m_team_size + , m_team_rendezvous_step + ); + if (m_team_rank != source_team_rank) { + HostBarrier::wait( ptr + , m_team_size + , m_team_rendezvous_step + ); + } + else { + HostBarrier::split_master_wait( ptr + , m_team_size + , m_team_rendezvous_step + ); + } + + return (m_team_rank == source_team_rank); + } inline void team_rendezvous_release() const noexcept @@ -540,15 +564,16 @@ public: { if ( 1 < m_data.m_team_size ) { T volatile * const shared_value = (T*) m_data.team_reduce(); - + // Don't overwrite shared memory until all threads arrive - if ( m_data.team_rendezvous() ) { + if ( m_data.team_rendezvous(source_team_rank) ) { + // All threads have entered 'team_rendezvous' // only this thread returned from 'team_rendezvous' // with a return value of 'true' - *shared_value = value ; + *shared_value = value; m_data.team_rendezvous_release(); // This thread released all other threads from 'team_rendezvous' @@ -574,7 +599,7 @@ public: // Don't overwrite shared memory until all threads arrive - if ( m_data.team_rendezvous() ) { + if ( m_data.team_rendezvous(source_team_rank) ) { // All threads have entered 'team_rendezvous' // only this thread returned from 'team_rendezvous' diff --git a/packages/kokkos/core/src/impl/Kokkos_OldMacros.hpp b/packages/kokkos/core/src/impl/Kokkos_OldMacros.hpp index bad158c29..c3198c0f1 100644 --- a/packages/kokkos/core/src/impl/Kokkos_OldMacros.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_OldMacros.hpp @@ -142,18 +142,13 @@ #endif #endif -#ifdef KOKKOS_HAVE_CXX1Z + +#if defined(KOKKOS_HAVE_CXX1Z) || defined(KOKKOS_ENABLE_CXX17) #ifndef KOKKOS_ENABLE_CXX1Z #define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z #endif #endif -#ifdef KOKKOS_HAVE_DEBUG -#ifndef KOKKOS_DEBUG -#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG -#endif -#endif - #ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA #ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA @@ -482,6 +477,12 @@ #define KOKKOS_HAVE_DEBUG 1 #endif +#ifdef KOKKOS_HAVE_DEBUG +#ifndef KOKKOS_DEBUG +#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG +#endif +#endif + #if (!defined(KOKKOS_HAVE_HWLOC)) && defined(KOKKOS_ENABLE_HWLOC) #define KOKKOS_HAVE_HWLOC 1 #endif diff --git a/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp index 7b85909ed..d84a85462 100644 --- a/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -60,10 +60,10 @@ template class TaskQueue< Kokkos::Serial > ; void TaskQueueSpecialization< Kokkos::Serial >::execute ( TaskQueue< Kokkos::Serial > * const queue ) { - using execution_space = Kokkos::Serial ; - using queue_type = TaskQueue< execution_space > ; + using exec_space = Kokkos::Serial ; + using tqs_queue_type = TaskQueue< exec_space > ; using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< execution_space > ; + using Member = Impl::HostThreadTeamMember< exec_space > ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; @@ -83,9 +83,9 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute task_root_type * task = end ; - for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) { for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); + task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] ); } } @@ -120,10 +120,10 @@ void TaskQueueSpecialization< Kokkos::Serial > :: iff_single_thread_recursive_execute( TaskQueue< Kokkos::Serial > * const queue ) { - using execution_space = Kokkos::Serial ; - using queue_type = TaskQueue< execution_space > ; + using exec_space = Kokkos::Serial ; + using tqs_queue_type = TaskQueue< exec_space > ; using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< execution_space > ; + using Member = Impl::HostThreadTeamMember< exec_space > ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; @@ -139,9 +139,9 @@ void TaskQueueSpecialization< Kokkos::Serial > :: task = end ; - for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) { for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); + task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] ); } } diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp index 3c306ec94..d4e3a03d3 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp @@ -123,8 +123,8 @@ private: typedef typename Traits::value_type::pointer handle_type ; - handle_type m_handle ; - offset_type m_offset ; + handle_type m_impl_handle ; + offset_type m_impl_offset ; size_t m_stride ; typedef typename Traits::value_type::value_type scalar_type ; @@ -140,8 +140,8 @@ private: KOKKOS_INLINE_FUNCTION ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset ) - : m_handle( arg_handle ) - , m_offset( arg_offset ) + : m_impl_handle( arg_handle ) + , m_impl_offset( arg_offset ) , m_stride( is_contiguous_reference ? 0 : arg_offset.span() ) {} @@ -154,44 +154,44 @@ public: template< typename iType > KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const - { return m_offset.m_dim.extent(r); } + { return m_impl_offset.m_dim.extent(r); } KOKKOS_INLINE_FUNCTION constexpr typename Traits::array_layout layout() const - { return m_offset.layout(); } + { return m_impl_offset.layout(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_impl_offset.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_impl_offset.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_impl_offset.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_impl_offset.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_impl_offset.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_impl_offset.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_impl_offset.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_impl_offset.dimension_7(); } // Is a regular layout with uniform striding for each index. using is_regular = typename offset_type::is_regular ; - KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_impl_offset.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_impl_offset.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_impl_offset.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_impl_offset.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_impl_offset.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_impl_offset.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_impl_offset.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_impl_offset.stride_7(); } //---------------------------------------- // Range span /** \brief Span of the mapped range */ KOKKOS_INLINE_FUNCTION constexpr size_t span() const - { return m_offset.span() * Array_N ; } + { return m_impl_offset.span() * Array_N ; } /** \brief Is the mapped range span contiguous */ KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const - { return m_offset.span_is_contiguous(); } + { return m_impl_offset.span_is_contiguous(); } typedef typename std::conditional< is_contiguous_reference , contiguous_reference , strided_reference >::type reference_type ; @@ -199,63 +199,63 @@ public: /** \brief If data references are lvalue_reference than can query pointer to memory */ KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const - { return m_handle ; } + { return m_impl_handle ; } //---------------------------------------- // The View class performs all rank and bounds checking before // calling these element reference methods. KOKKOS_FORCEINLINE_FUNCTION - reference_type reference() const { return reference_type( m_handle + 0 , Array_N , 0 ); } + reference_type reference() const { return reference_type( m_impl_handle + 0 , Array_N , 0 ); } template< typename I0 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 ) const - { return reference_type( m_handle + m_offset(i0) * Array_S , Array_N , m_stride ); } + { return reference_type( m_impl_handle + m_impl_offset(i0) * Array_S , Array_N , m_stride ); } template< typename I0 , typename I1 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 ) const - { return reference_type( m_handle + m_offset(i0,i1) * Array_S , Array_N , m_stride ); } + { return reference_type( m_impl_handle + m_impl_offset(i0,i1) * Array_S , Array_N , m_stride ); } template< typename I0 , typename I1 , typename I2 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const - { return reference_type( m_handle + m_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); } + { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); } template< typename I0 , typename I1 , typename I2 , typename I3 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const - { return reference_type( m_handle + m_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); } + { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); } template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , const I4 & i4 ) const - { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); } + { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); } template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , const I4 & i4 , const I5 & i5 ) const - { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); } + { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); } template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const - { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); } + { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); } template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 , typename I7 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const - { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); } + { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); } //---------------------------------------- @@ -269,31 +269,31 @@ public: /** \brief Span, in bytes, of the referenced memory */ KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const { - return ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + return ( m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); } //---------------------------------------- KOKKOS_INLINE_FUNCTION ~ViewMapping() {} - KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset(), m_stride(0) {} + KOKKOS_INLINE_FUNCTION ViewMapping() : m_impl_handle(), m_impl_offset(), m_stride(0) {} KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs ) - : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {} + : m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ), m_stride( rhs.m_stride ) {} KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs ) - { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; ; return *this ; } + { m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; m_stride = rhs.m_stride ; ; return *this ; } KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs ) - : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {} + : m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ), m_stride( rhs.m_stride ) {} KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs ) - { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; return *this ; } + { m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; m_stride = rhs.m_stride ; return *this ; } //---------------------------------------- template< class ... Args > KOKKOS_INLINE_FUNCTION ViewMapping( pointer_type ptr , Args ... args ) - : m_handle( ptr ) - , m_offset( std::integral_constant< unsigned , 0 >() , args... ) - , m_stride( m_offset.span() ) + : m_impl_handle( ptr ) + , m_impl_offset( std::integral_constant< unsigned , 0 >() , args... ) + , m_stride( m_impl_offset.span() ) {} //---------------------------------------- @@ -315,10 +315,10 @@ public: typedef std::integral_constant< unsigned , alloc_prop::allow_padding ? sizeof(scalar_type) : 0 > padding ; - m_offset = offset_type( padding(), arg_layout ); + m_impl_offset = offset_type( padding(), arg_layout ); const size_t alloc_size = - ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + ( m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); // Allocate memory from the memory space and create tracking record. record_type * const record = @@ -327,14 +327,14 @@ public: , alloc_size ); if ( alloc_size ) { - m_handle = + m_impl_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) ); if ( alloc_prop::initialize ) { // The functor constructs and destroys record->m_destroy = functor_type( ((Kokkos::Impl::ViewCtorProp<void,execution_space> const & )arg_prop).value - , (pointer_type) m_handle - , m_offset.span() * Array_N + , (pointer_type) m_impl_handle + , m_impl_offset.span() * Array_N ); record->m_destroy.construct_shared_allocation(); @@ -397,8 +397,8 @@ public: typedef typename DstType::offset_type dst_offset_type ; - dst.m_offset = dst_offset_type( src.m_offset ); - dst.m_handle = src.m_handle ; + dst.m_impl_offset = dst_offset_type( src.m_impl_offset ); + dst.m_impl_handle = src.m_impl_handle ; dst.m_stride = src.m_stride ; } }; @@ -448,7 +448,7 @@ public: // Array dimension becomes the last dimension. // Arguments beyond the destination rank are ignored. if ( src.span_is_contiguous() ) { // not padded - dst.m_offset = dst_offset_type( std::integral_constant<unsigned,0>() , + dst.m_impl_offset = dst_offset_type( std::integral_constant<unsigned,0>() , typename DstTraits::array_layout ( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() ) , ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() ) @@ -463,7 +463,7 @@ public: else { // is padded typedef std::integral_constant<unsigned,sizeof(typename SrcTraits::value_type::value_type)> padded ; - dst.m_offset = dst_offset_type( padded() , + dst.m_impl_offset = dst_offset_type( padded() , typename DstTraits::array_layout ( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() ) , ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() ) @@ -476,7 +476,7 @@ public: ) ); } - dst.m_handle = src.m_handle ; + dst.m_impl_handle = src.m_impl_handle ; } }; @@ -579,11 +579,11 @@ public: typedef typename DstType::handle_type dst_handle_type ; const SubviewExtents< SrcTraits::rank , rank > - extents( src.m_offset.m_dim , args... ); + extents( src.m_impl_offset.m_dim , args... ); - dst.m_offset = dst_offset_type( src.m_offset , extents ); - dst.m_handle = dst_handle_type( src.m_handle + - src.m_offset( extents.domain_offset(0) + dst.m_impl_offset = dst_offset_type( src.m_impl_offset , extents ); + dst.m_impl_handle = dst_handle_type( src.m_impl_handle + + src.m_impl_offset( extents.domain_offset(0) , extents.domain_offset(1) , extents.domain_offset(2) , extents.domain_offset(3) diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp new file mode 100644 index 000000000..a64101110 --- /dev/null +++ b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp @@ -0,0 +1,945 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP +#define KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP + +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE + +#include <Kokkos_Layout.hpp> +#include <Kokkos_View.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// View offset and mapping for tiled view's + +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 > +struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, 0, 0, 0, 0, 0, 0, true> > : public std::true_type {}; + +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 > +struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, 0, 0, 0, 0, 0, true> > : public std::true_type {}; + +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 > +struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, 0, 0, 0, 0, true> > : public std::true_type {}; + +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 > +struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, 0, 0, 0, true> > : public std::true_type {}; + +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 > +struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, 0, 0, true> > : public std::true_type {}; + +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 > +struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, 0, true> > : public std::true_type {}; + +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 , unsigned ArgN7 > +struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > : public std::true_type {}; + + +template< class L > +struct is_array_layout_tiled : public std::false_type {}; + +template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 , unsigned ArgN3 , unsigned ArgN4 , unsigned ArgN5 , unsigned ArgN6 , unsigned ArgN7 , bool IsPowerTwo > +struct is_array_layout_tiled < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, IsPowerTwo> > : public std::true_type {}; // Last template parameter "true" meaning this currently only supports powers-of-two + + +namespace Impl { + +template< class Dimension , class Layout > +struct ViewOffset< Dimension , Layout , + typename std::enable_if<( + ( Dimension::rank <= 8 ) + && + ( Dimension::rank >= 2 ) + && + is_array_layout< Layout >::value + && + is_array_layout_tiled< Layout >::value + )>::type > +{ +public: + +// enum { outer_pattern = Layout::outer_pattern }; +// enum { inner_pattern = Layout::inner_pattern }; + static constexpr Kokkos::Iterate outer_pattern = Layout::outer_pattern; + static constexpr Kokkos::Iterate inner_pattern = Layout::inner_pattern; + + enum { VORank = Dimension::rank }; + + enum { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) }; + enum { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) }; + enum { SHIFT_2 = Kokkos::Impl::integral_power_of_two(Layout::N2) }; + enum { SHIFT_3 = Kokkos::Impl::integral_power_of_two(Layout::N3) }; + enum { SHIFT_4 = Kokkos::Impl::integral_power_of_two(Layout::N4) }; + enum { SHIFT_5 = Kokkos::Impl::integral_power_of_two(Layout::N5) }; + enum { SHIFT_6 = Kokkos::Impl::integral_power_of_two(Layout::N6) }; + enum { SHIFT_7 = Kokkos::Impl::integral_power_of_two(Layout::N7) }; + enum { MASK_0 = Layout::N0 - 1 }; + enum { MASK_1 = Layout::N1 - 1 }; + enum { MASK_2 = Layout::N2 - 1 }; + enum { MASK_3 = Layout::N3 - 1 }; + enum { MASK_4 = Layout::N4 - 1 }; + enum { MASK_5 = Layout::N5 - 1 }; + enum { MASK_6 = Layout::N6 - 1 }; + enum { MASK_7 = Layout::N7 - 1 }; + + enum { SHIFT_2T = SHIFT_0 + SHIFT_1 }; + enum { SHIFT_3T = SHIFT_0 + SHIFT_1 + SHIFT_2 }; + enum { SHIFT_4T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 }; + enum { SHIFT_5T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 }; + enum { SHIFT_6T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 }; + enum { SHIFT_7T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6 }; + enum { SHIFT_8T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6 + SHIFT_7 }; + + // Is an irregular layout that does not have uniform striding for each index. + using is_mapping_plugin = std::true_type ; + using is_regular = std::false_type ; + + typedef size_t size_type ; + typedef Dimension dimension_type ; + typedef Layout array_layout ; + + dimension_type m_dim ; + size_type m_tile_N0 ; // Num tiles dim 0 + size_type m_tile_N1 ; + size_type m_tile_N2 ; + size_type m_tile_N3 ; + size_type m_tile_N4 ; + size_type m_tile_N5 ; + size_type m_tile_N6 ; + size_type m_tile_N7 ; + + //---------------------------------------- + +#define DEBUG_OUTPUT_CHECK 0 + + // Rank 2 + template< typename I0 , typename I1 > + KOKKOS_INLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1 ) const { + auto tile_offset = (outer_pattern == (Kokkos::Iterate::Left)) + ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1)) ) << SHIFT_2T) + : ( ( (m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) ) << SHIFT_2T) ; + // ( num_tiles[1] * ti0 + ti1 ) * FTD + + auto local_offset = (inner_pattern == (Kokkos::Iterate::Left)) + ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) + : ( ((i0 & MASK_0) << SHIFT_1) + (i1 & MASK_1) ) ; + // ( tile_dim[1] * li0 + li1 ) + +#if DEBUG_OUTPUT_CHECK + std::cout << "Am I Outer Left? " << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl; + std::cout << "Am I Inner Left? " << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl; + std::cout << "i0 = " << i0 + << " i1 = " << i1 + << "\ntilei0 = " << (i0>>SHIFT_0) + << " tilei1 = " << (i1>>SHIFT_1) + << "locali0 = " << (i0 & MASK_0) + << "\nlocali1 = " << (i1 & MASK_1) + << std::endl; +#endif + + return tile_offset + local_offset; + } + + // Rank 3 + template< typename I0 , typename I1 , typename I2 > + KOKKOS_INLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 ) const { + auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) + ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*(i2>>SHIFT_2)) ) << SHIFT_3T) + : ( ( m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2) ) << SHIFT_3T) ; + + auto local_offset = (inner_pattern == Kokkos::Iterate::Left) + ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) ) + : ( ((i0 & MASK_0) << (SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_2)) + (i2 & MASK_2) ) ; + +#if DEBUG_OUTPUT_CHECK + std::cout << "Am I Outer Left? " << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl; + std::cout << "Am I Inner Left? " << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl; + std::cout << "i0 = " << i0 + << " i1 = " << i1 + << " i2 = " << i2 + << "\ntilei0 = " << (i0>>SHIFT_0) + << " tilei1 = " << (i1>>SHIFT_1) + << " tilei2 = " << (i2>>SHIFT_2) + << "\nlocali0 = " << (i0 & MASK_0) + << "locali1 = " << (i1 & MASK_1) + << "locali2 = " << (i2 & MASK_2) + << std::endl; +#endif + + return tile_offset + local_offset; + } + + // Rank 4 + template< typename I0 , typename I1 , typename I2 , typename I3 > + KOKKOS_INLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 ) const { + auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) + ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*(i3>>SHIFT_3))) ) << SHIFT_4T) + : ( ( m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3) ) << SHIFT_4T) ; + + auto local_offset = (inner_pattern == Kokkos::Iterate::Left) + ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) ) + : ( ((i0 & MASK_0) << (SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_3)) + (i3 & MASK_3) ) ; + + return tile_offset + local_offset; + } + + // Rank 5 + template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 > + KOKKOS_INLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 ) const { + auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) + ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*(i4>>SHIFT_4)))) ) << SHIFT_5T) + : ( ( m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4) ) << SHIFT_5T) ; + + auto local_offset = (inner_pattern == Kokkos::Iterate::Left) + ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) ) + : ( ((i0 & MASK_0) << (SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_4)) + (i4 & MASK_4) ) ; + + return tile_offset + local_offset; + } + + // Rank 6 + template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 > + KOKKOS_INLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 ) const { + auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) + ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*(i5>>SHIFT_5))))) ) << SHIFT_6T) + : ( ( m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5) ) << SHIFT_6T) ; + + auto local_offset = (inner_pattern == Kokkos::Iterate::Left) + ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) ) + : ( ((i0 & MASK_0) << (SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_5)) + (i5 & MASK_5) ) ; + + return tile_offset + local_offset; + } + + // Rank 7 + template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 > + KOKKOS_INLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 , I6 const & i6 ) const { + auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) + ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*((i5>>SHIFT_5) + m_tile_N5*(i6>>SHIFT_6)))))) ) << SHIFT_7T) + : ( ( m_tile_N6*(m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5)) + (i6>>SHIFT_6) ) << SHIFT_7T) ; + + auto local_offset = (inner_pattern == Kokkos::Iterate::Left) + ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) + ((i6 & MASK_6)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5)) ) + : ( ((i0 & MASK_0) << (SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_6+SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_6+SHIFT_5)) + ((i5 & MASK_5)<<(SHIFT_6)) + (i6 & MASK_6) ) ; + + return tile_offset + local_offset; + } + + // Rank 8 + template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 , typename I7 > + KOKKOS_INLINE_FUNCTION + size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 , I6 const & i6 , I7 const & i7 ) const { + auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) + ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*((i5>>SHIFT_5) + m_tile_N5*((i6>>SHIFT_6) + m_tile_N6*(i7>>SHIFT_7))))))) ) << SHIFT_8T) + : ( ( m_tile_N7*(m_tile_N6*(m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5)) + (i6>>SHIFT_6)) + (i7>>SHIFT_7) ) << SHIFT_8T) ; + + auto local_offset = (inner_pattern == Kokkos::Iterate::Left) + ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) + ((i6 & MASK_6)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5)) + ((i7 & MASK_7)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5+SHIFT_6)) ) + : ( ((i0 & MASK_0) << (SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_7+SHIFT_6+SHIFT_5)) + ((i5 & MASK_5)<<(SHIFT_7+SHIFT_6)) + ((i6 & MASK_6)<<(SHIFT_7)) + (i7 & MASK_7) ) ; + + return tile_offset + local_offset; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION constexpr + array_layout layout() const + { return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N2 , m_dim.N3 , m_dim.N4 , m_dim.N5 , m_dim.N6 , m_dim.N7 ); } + + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; } + + KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; } + + // Strides are meaningless due to irregularity + KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0 ; } + KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0 ; } + + // Stride with [ rank ] value is the total length + template< typename iType > + KOKKOS_INLINE_FUNCTION + void stride( iType * const s ) const + { + s[0] = 0 ; + if ( 0 < dimension_type::rank ) { s[1] = 0 ; } + if ( 1 < dimension_type::rank ) { s[2] = 0 ; } + if ( 2 < dimension_type::rank ) { s[3] = 0 ; } + if ( 3 < dimension_type::rank ) { s[4] = 0 ; } + if ( 4 < dimension_type::rank ) { s[5] = 0 ; } + if ( 5 < dimension_type::rank ) { s[6] = 0 ; } + if ( 6 < dimension_type::rank ) { s[7] = 0 ; } + if ( 7 < dimension_type::rank ) { s[8] = 0 ; } + } + + KOKKOS_INLINE_FUNCTION constexpr size_type span() const + { + // Rank2: ( NumTile0 * ( NumTile1 ) ) * TileSize, etc + return ( VORank == 2 ) ? ( m_tile_N0 * m_tile_N1 ) << SHIFT_2T + : ( VORank == 3 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 ) << SHIFT_3T + : ( VORank == 4 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 ) << SHIFT_4T + : ( VORank == 5 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 ) << SHIFT_5T + : ( VORank == 6 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 ) << SHIFT_6T + : ( VORank == 7 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 * m_tile_N6 ) << SHIFT_7T + : ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 * m_tile_N6 * m_tile_N7 ) << SHIFT_8T ; + } + + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const + { + return true; + } + + //---------------------------------------- + +#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND + KOKKOS_INLINE_FUNCTION ~ViewOffset() {} + KOKKOS_INLINE_FUNCTION ViewOffset() {} + KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & rhs ) + : m_dim(rhs.m_dim) + , m_tile_N0(rhs.m_tile_N0) + , m_tile_N1(rhs.m_tile_N1) + , m_tile_N2(rhs.m_tile_N2) + , m_tile_N3(rhs.m_tile_N3) + , m_tile_N4(rhs.m_tile_N4) + , m_tile_N5(rhs.m_tile_N5) + , m_tile_N6(rhs.m_tile_N6) + , m_tile_N7(rhs.m_tile_N7) + {} + + KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & rhs ) { + m_dim = rhs.m_dim; + m_tile_N0 = rhs.m_tile_N0; + m_tile_N1 = rhs.m_tile_N1; + m_tile_N2 = rhs.m_tile_N2; + m_tile_N3 = rhs.m_tile_N3; + m_tile_N4 = rhs.m_tile_N4; + m_tile_N5 = rhs.m_tile_N5; + m_tile_N6 = rhs.m_tile_N6; + m_tile_N7 = rhs.m_tile_N7; + return *this; + } + +#else + KOKKOS_INLINE_FUNCTION ~ViewOffset() = default; + KOKKOS_INLINE_FUNCTION ViewOffset() = default; + KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default; + KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default; +#endif + + template< unsigned TrivialScalarSize > + KOKKOS_INLINE_FUNCTION + constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const & , + array_layout const arg_layout ) + : m_dim( arg_layout.dimension[0], arg_layout.dimension[1], arg_layout.dimension[2], arg_layout.dimension[3], arg_layout.dimension[4], arg_layout.dimension[5], arg_layout.dimension[6], arg_layout.dimension[7] ) + , m_tile_N0( ( arg_layout.dimension[0] + MASK_0 ) >> SHIFT_0 /* number of tiles in first dimension */ ) + , m_tile_N1( ( arg_layout.dimension[1] + MASK_1 ) >> SHIFT_1 ) + , m_tile_N2( (VORank > 2 ) ? ( arg_layout.dimension[2] + MASK_2 ) >> SHIFT_2 : 0 ) + , m_tile_N3( (VORank > 3 ) ? ( arg_layout.dimension[3] + MASK_3 ) >> SHIFT_3 : 0 ) + , m_tile_N4( (VORank > 4 ) ? ( arg_layout.dimension[4] + MASK_4 ) >> SHIFT_4 : 0 ) + , m_tile_N5( (VORank > 5 ) ? ( arg_layout.dimension[5] + MASK_5 ) >> SHIFT_5 : 0 ) + , m_tile_N6( (VORank > 6 ) ? ( arg_layout.dimension[6] + MASK_6 ) >> SHIFT_6 : 0 ) + , m_tile_N7( (VORank > 7 ) ? ( arg_layout.dimension[7] + MASK_7 ) >> SHIFT_7 : 0 ) + {} + +}; + + +//---------------------------------------- + + +// ViewMapping assign method needed in order to return a 'subview' tile as a proper View +// The outer iteration pattern determines the mapping of the pointer offset to the beginning of requested tile +// The inner iteration pattern is needed for the layout of the tile's View to be returned +// Rank 2 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + , typename iType0 , typename iType1 + > +struct ViewMapping + < typename std::enable_if< (N2 == 0 && N3 == 0 && N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void + , Kokkos::ViewTraits<T**,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...> + , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> + , iType0 + , iType1 > +{ + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ; + typedef Kokkos::ViewTraits< T** , src_layout , P... > src_traits ; + + enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; + enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::ViewTraits< T[N0][N1] , array_layout , P ... > traits ; + typedef Kokkos::View< T[N0][N1] , array_layout , P ... > type ; + + KOKKOS_INLINE_FUNCTION static + void assign( ViewMapping< traits , void > & dst + , const ViewMapping< src_traits , void > & src + , const src_layout & + , const iType0 i_tile0 + , const iType1 i_tile1 + ) + { + typedef ViewMapping< traits , void > dst_map_type ; + typedef ViewMapping< src_traits , void > src_map_type ; + typedef typename dst_map_type::handle_type dst_handle_type ; + typedef typename dst_map_type::offset_type dst_offset_type ; + typedef typename src_map_type::offset_type src_offset_type ; + + dst = dst_map_type( + dst_handle_type( src.m_impl_handle + + ( is_outer_left ? ( (i_tile0 + src.m_impl_offset.m_tile_N0 * i_tile1) << src_offset_type::SHIFT_2T ) + : ( (src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1) << src_offset_type::SHIFT_2T ) + ) // offset to start of the tile + ) + , dst_offset_type() ); + } +}; + +// Rank 3 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + , typename iType0 , typename iType1 , typename iType2 + > +struct ViewMapping + < typename std::enable_if< (N3 == 0 && N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void + , Kokkos::ViewTraits<T***,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...> + , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> + , iType0 + , iType1 + , iType2 > +{ + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ; + typedef Kokkos::ViewTraits< T*** , src_layout , P... > src_traits ; + + enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; + enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::ViewTraits< T[N0][N1][N2] , array_layout , P ... > traits ; + typedef Kokkos::View< T[N0][N1][N2] , array_layout , P ... > type ; + + KOKKOS_INLINE_FUNCTION static + void assign( ViewMapping< traits , void > & dst + , const ViewMapping< src_traits , void > & src + , const src_layout & + , const iType0 i_tile0 + , const iType1 i_tile1 + , const iType2 i_tile2 + ) + { + typedef ViewMapping< traits , void > dst_map_type ; + typedef ViewMapping< src_traits , void > src_map_type ; + typedef typename dst_map_type::handle_type dst_handle_type ; + typedef typename dst_map_type::offset_type dst_offset_type ; + typedef typename src_map_type::offset_type src_offset_type ; + + dst = dst_map_type( + dst_handle_type( src.m_impl_handle + + ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * i_tile2 ) ) << src_offset_type::SHIFT_3T ) + : ( ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) << src_offset_type::SHIFT_3T ) + ) + ) // offset to start of the tile + , dst_offset_type() ); + } +}; + +// Rank 4 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + , typename iType0 , typename iType1 , typename iType2 , typename iType3 + > +struct ViewMapping + < typename std::enable_if< (N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void + , Kokkos::ViewTraits<T****,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...> + , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> + , iType0 + , iType1 + , iType2 + , iType3 > +{ + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ; + typedef Kokkos::ViewTraits< T**** , src_layout , P... > src_traits ; + + enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; + enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::ViewTraits< T[N0][N1][N2][N3] , array_layout , P ... > traits ; + typedef Kokkos::View< T[N0][N1][N2][N3] , array_layout , P ... > type ; + + KOKKOS_INLINE_FUNCTION static + void assign( ViewMapping< traits , void > & dst + , const ViewMapping< src_traits , void > & src + , const src_layout & + , const iType0 i_tile0 + , const iType1 i_tile1 + , const iType2 i_tile2 + , const iType3 i_tile3 + ) + { + typedef ViewMapping< traits , void > dst_map_type ; + typedef ViewMapping< src_traits , void > src_map_type ; + typedef typename dst_map_type::handle_type dst_handle_type ; + typedef typename dst_map_type::offset_type dst_offset_type ; + typedef typename src_map_type::offset_type src_offset_type ; + + dst = dst_map_type( + dst_handle_type( src.m_impl_handle + + ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * i_tile3 ) ) ) << src_offset_type::SHIFT_4T ) + : ( ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) << src_offset_type::SHIFT_4T ) + ) + ) // offset to start of the tile + , dst_offset_type() ); + } +}; + +// Rank 5 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + , typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 + > +struct ViewMapping + < typename std::enable_if< (N5 == 0 && N6 == 0 && N7 == 0) >::type //void + , Kokkos::ViewTraits<T*****,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...> + , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> + , iType0 + , iType1 + , iType2 + , iType3 + , iType4 > +{ + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ; + typedef Kokkos::ViewTraits< T***** , src_layout , P... > src_traits ; + + enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; + enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4] , array_layout , P ... > traits ; + typedef Kokkos::View< T[N0][N1][N2][N3][N4] , array_layout , P ... > type ; + + KOKKOS_INLINE_FUNCTION static + void assign( ViewMapping< traits , void > & dst + , const ViewMapping< src_traits , void > & src + , const src_layout & + , const iType0 i_tile0 + , const iType1 i_tile1 + , const iType2 i_tile2 + , const iType3 i_tile3 + , const iType4 i_tile4 + ) + { + typedef ViewMapping< traits , void > dst_map_type ; + typedef ViewMapping< src_traits , void > src_map_type ; + typedef typename dst_map_type::handle_type dst_handle_type ; + typedef typename dst_map_type::offset_type dst_offset_type ; + typedef typename src_map_type::offset_type src_offset_type ; + + dst = dst_map_type( + dst_handle_type( src.m_impl_handle + + ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * i_tile4 ) ) ) ) << src_offset_type::SHIFT_5T ) + : ( ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) << src_offset_type::SHIFT_5T ) + ) + ) // offset to start of the tile + , dst_offset_type() ); + } +}; + +// Rank 6 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + , typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5 + > +struct ViewMapping + < typename std::enable_if< (N6 == 0 && N7 == 0) >::type //void + , Kokkos::ViewTraits<T******,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...> + , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> + , iType0 + , iType1 + , iType2 + , iType3 + , iType4 + , iType5 > +{ + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ; + typedef Kokkos::ViewTraits< T****** , src_layout , P... > src_traits ; + + enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; + enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5] , array_layout , P ... > traits ; + typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5] , array_layout , P ... > type ; + + KOKKOS_INLINE_FUNCTION static + void assign( ViewMapping< traits , void > & dst + , const ViewMapping< src_traits , void > & src + , const src_layout & + , const iType0 i_tile0 + , const iType1 i_tile1 + , const iType2 i_tile2 + , const iType3 i_tile3 + , const iType4 i_tile4 + , const iType5 i_tile5 + ) + { + typedef ViewMapping< traits , void > dst_map_type ; + typedef ViewMapping< src_traits , void > src_map_type ; + typedef typename dst_map_type::handle_type dst_handle_type ; + typedef typename dst_map_type::offset_type dst_offset_type ; + typedef typename src_map_type::offset_type src_offset_type ; + + dst = dst_map_type( + dst_handle_type( src.m_impl_handle + + ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * i_tile5 ) ) ) ) ) << src_offset_type::SHIFT_6T ) + : ( ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) << src_offset_type::SHIFT_6T ) + ) + ) // offset to start of the tile + , dst_offset_type() ); + } +}; + +// Rank 7 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + , typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5 , typename iType6 + > +struct ViewMapping + < typename std::enable_if< (N7 == 0) >::type //void + , Kokkos::ViewTraits<T*******,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...> + , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> + , iType0 + , iType1 + , iType2 + , iType3 + , iType4 + , iType5 + , iType6 > +{ + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ; + typedef Kokkos::ViewTraits< T******* , src_layout , P... > src_traits ; + + enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; + enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P ... > traits ; + typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P ... > type ; + + KOKKOS_INLINE_FUNCTION static + void assign( ViewMapping< traits , void > & dst + , const ViewMapping< src_traits , void > & src + , const src_layout & + , const iType0 i_tile0 + , const iType1 i_tile1 + , const iType2 i_tile2 + , const iType3 i_tile3 + , const iType4 i_tile4 + , const iType5 i_tile5 + , const iType6 i_tile6 + ) + { + typedef ViewMapping< traits , void > dst_map_type ; + typedef ViewMapping< src_traits , void > src_map_type ; + typedef typename dst_map_type::handle_type dst_handle_type ; + typedef typename dst_map_type::offset_type dst_offset_type ; + typedef typename src_map_type::offset_type src_offset_type ; + + dst = dst_map_type( + dst_handle_type( src.m_impl_handle + + ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * ( i_tile5 + src.m_impl_offset.m_tile_N5 * i_tile6 ) ) ) ) ) ) << src_offset_type::SHIFT_7T ) + : ( ( src.m_impl_offset.m_tile_N6 * ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) + i_tile6 ) << src_offset_type::SHIFT_7T ) + ) + ) // offset to start of the tile + , dst_offset_type() ); + } +}; + +// Rank 8 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + , typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5 , typename iType6 , typename iType7 + > +struct ViewMapping + < typename std::enable_if< (N0 != 0 && N1 != 0 && N2 != 0 && N3 != 0 && N4 != 0 && N5 != 0 && N6 != 0 && N7 != 0) >::type //void + , Kokkos::ViewTraits<T********,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...> + , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> + , iType0 + , iType1 + , iType2 + , iType3 + , iType4 + , iType5 + , iType6 + , iType7 > +{ + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> src_layout ; + typedef Kokkos::ViewTraits< T******** , src_layout , P... > src_traits ; + + enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) }; + enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) }; + typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P ... > traits ; + typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P ... > type ; + + KOKKOS_INLINE_FUNCTION static + void assign( ViewMapping< traits , void > & dst + , const ViewMapping< src_traits , void > & src + , const src_layout & + , const iType0 i_tile0 + , const iType1 i_tile1 + , const iType2 i_tile2 + , const iType3 i_tile3 + , const iType4 i_tile4 + , const iType5 i_tile5 + , const iType6 i_tile6 + , const iType7 i_tile7 + ) + { + typedef ViewMapping< traits , void > dst_map_type ; + typedef ViewMapping< src_traits , void > src_map_type ; + typedef typename dst_map_type::handle_type dst_handle_type ; + typedef typename dst_map_type::offset_type dst_offset_type ; + typedef typename src_map_type::offset_type src_offset_type ; + + dst = dst_map_type( + dst_handle_type( src.m_impl_handle + + ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * ( i_tile5 + src.m_impl_offset.m_tile_N5 * ( i_tile6 + src.m_impl_offset.m_tile_N6 * i_tile7 ) ) ) ) ) ) ) << src_offset_type::SHIFT_8T ) + : ( ( src.m_impl_offset.m_tile_N7 * ( src.m_impl_offset.m_tile_N6 * ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) + i_tile6 ) + i_tile7 ) << src_offset_type::SHIFT_8T ) + ) + ) // offset to start of the tile + , dst_offset_type() ); + } +}; + + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------- + +namespace Kokkos { + +// Rank 2 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + > +KOKKOS_INLINE_FUNCTION +Kokkos::View< T[N0][N1] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... > +tile_subview( const Kokkos::View<T**, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src + , const size_t i_tile0 + , const size_t i_tile1 + ) +{ + // Force the specialized ViewMapping for extracting a tile + // by using the first subview argument as the layout. + typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ; + + return Kokkos::View< T[N0][N1] , array_layout , P... > + ( src , SrcLayout() , i_tile0 , i_tile1 ); +} + +// Rank 3 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + > +KOKKOS_INLINE_FUNCTION +Kokkos::View< T[N0][N1][N2] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... > +tile_subview( const Kokkos::View<T***, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src + , const size_t i_tile0 + , const size_t i_tile1 + , const size_t i_tile2 + ) +{ + // Force the specialized ViewMapping for extracting a tile + // by using the first subview argument as the layout. + typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ; + + return Kokkos::View< T[N0][N1][N2] , array_layout , P... > + ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 ); +} + +// Rank 4 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + > +KOKKOS_INLINE_FUNCTION +Kokkos::View< T[N0][N1][N2][N3] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... > +tile_subview( const Kokkos::View<T****, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src + , const size_t i_tile0 + , const size_t i_tile1 + , const size_t i_tile2 + , const size_t i_tile3 + ) +{ + // Force the specialized ViewMapping for extracting a tile + // by using the first subview argument as the layout. + typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ; + + return Kokkos::View< T[N0][N1][N2][N3] , array_layout , P... > + ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 ); +} + +// Rank 5 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + > +KOKKOS_INLINE_FUNCTION +Kokkos::View< T[N0][N1][N2][N3][N4] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... > +tile_subview( const Kokkos::View<T*****, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src + , const size_t i_tile0 + , const size_t i_tile1 + , const size_t i_tile2 + , const size_t i_tile3 + , const size_t i_tile4 + ) +{ + // Force the specialized ViewMapping for extracting a tile + // by using the first subview argument as the layout. + typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ; + + return Kokkos::View< T[N0][N1][N2][N3][N4] , array_layout , P... > + ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 ); +} + +// Rank 6 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + > +KOKKOS_INLINE_FUNCTION +Kokkos::View< T[N0][N1][N2][N3][N4][N5] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... > +tile_subview( const Kokkos::View<T******, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src + , const size_t i_tile0 + , const size_t i_tile1 + , const size_t i_tile2 + , const size_t i_tile3 + , const size_t i_tile4 + , const size_t i_tile5 + ) +{ + // Force the specialized ViewMapping for extracting a tile + // by using the first subview argument as the layout. + typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ; + + return Kokkos::View< T[N0][N1][N2][N3][N4][N5] , array_layout , P... > + ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 ); +} + +// Rank 7 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + > +KOKKOS_INLINE_FUNCTION +Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... > +tile_subview( const Kokkos::View<T*******, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src + , const size_t i_tile0 + , const size_t i_tile1 + , const size_t i_tile2 + , const size_t i_tile3 + , const size_t i_tile4 + , const size_t i_tile5 + , const size_t i_tile6 + ) +{ + // Force the specialized ViewMapping for extracting a tile + // by using the first subview argument as the layout. + typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ; + + return Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P... > + ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 , i_tile6 ); +} + +// Rank 8 +template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 + , class ... P + > +KOKKOS_INLINE_FUNCTION +Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... > +tile_subview( const Kokkos::View<T********, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src + , const size_t i_tile0 + , const size_t i_tile1 + , const size_t i_tile2 + , const size_t i_tile3 + , const size_t i_tile4 + , const size_t i_tile5 + , const size_t i_tile6 + , const size_t i_tile7 + ) +{ + // Force the specialized ViewMapping for extracting a tile + // by using the first subview argument as the layout. + typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout; + typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ; + + return Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P... > + ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 , i_tile6 , i_tile7 ); +} + +} /* namespace Kokkos */ +#endif //!defined(KOKKOS_ENABLE_DEPRECATED_CODE +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWLAYOUTTILE_HPP */ + diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index d4890c534..bb3bcfd33 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -260,6 +260,9 @@ namespace Impl { struct ALL_t { KOKKOS_INLINE_FUNCTION constexpr const ALL_t & operator()() const { return *this ; } + + KOKKOS_INLINE_FUNCTION + constexpr bool operator == ( const ALL_t & right) const { return true;} }; }} // namespace Kokkos::Impl @@ -1030,13 +1033,6 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) { - static_assert( - ( DimRHS::rank == 0 && - dimension_type::rank == 0 ) || - ( DimRHS::rank == 1 && - dimension_type::rank == 1 && - dimension_type::rank_dynamic == 1 ) - , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank <= 1" ); if ( rhs.m_stride.S0 != 1 ) { Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" ); } @@ -1275,6 +1271,18 @@ public: // Also requires equal static dimensions ... } + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + , m_stride( rhs.stride_1() ) + { + if ( rhs.m_stride.S0 != 1 ) { + Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" ); + } + } + //---------------------------------------- // Subview construction // This subview must be 2 == rank and 2 == rank_dynamic @@ -1518,16 +1526,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 ) { - static_assert( - ( DimRHS::rank == 0 && - dimension_type::rank == 0 ) || - ( DimRHS::rank == 1 && - dimension_type::rank == 1 && - dimension_type::rank_dynamic == 1 ) - , "ViewOffset LayoutRight and LayoutString are only compatible when rank <= 1" ); - if ( rhs.m_stride.S0 != 1 ) { - Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft/Right from LayoutStride requires stride == 1" ); - } + } //---------------------------------------- @@ -1771,6 +1770,23 @@ public: // Also requires equal static dimensions ... } + template< class DimRHS > + KOKKOS_INLINE_FUNCTION + ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs ) + : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3 + , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 ) + , m_stride( rhs.stride_0() ) + { + if ( ((dimension_type::rank == 2)?rhs.m_stride.S1: + ((dimension_type::rank == 3)?rhs.m_stride.S2: + ((dimension_type::rank == 4)?rhs.m_stride.S3: + ((dimension_type::rank == 5)?rhs.m_stride.S4: + ((dimension_type::rank == 6)?rhs.m_stride.S5: + ((dimension_type::rank == 7)?rhs.m_stride.S6:rhs.m_stride.S7)))))) != 1 ){ + Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutRight from LayoutStride requires right-most stride == 1" ); + } + } + //---------------------------------------- // Subview construction // Last dimension must be non-zero @@ -2498,7 +2514,7 @@ struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ > #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelFor("Kokkos::View::initialization", 0, &kpID); + Kokkos::Profiling::beginParallelFor((destroy ? "Kokkos::View::destruction" : "Kokkos::View::initialization"), 0, &kpID); } #endif const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > @@ -2588,11 +2604,8 @@ class ViewMapping< Traits , , void >::is_mapping_plugin::value )>::type > { -private: - - template< class , class ... > friend class ViewMapping ; - template< class , class ... > friend class Kokkos::View ; +public: typedef ViewOffset< typename Traits::dimension , typename Traits::array_layout , void @@ -2600,13 +2613,17 @@ private: typedef typename ViewDataHandle< Traits >::handle_type handle_type ; - handle_type m_handle ; - offset_type m_offset ; + handle_type m_impl_handle ; + offset_type m_impl_offset ; + +private: + + template < class , class ...> friend class ViewMapping; KOKKOS_INLINE_FUNCTION ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset ) - : m_handle( arg_handle ) - , m_offset( arg_offset ) + : m_impl_handle( arg_handle ) + , m_impl_offset( arg_offset ) {} public: @@ -2621,44 +2638,44 @@ public: template< typename iType > KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const - { return m_offset.m_dim.extent(r); } + { return m_impl_offset.m_dim.extent(r); } KOKKOS_INLINE_FUNCTION constexpr typename Traits::array_layout layout() const - { return m_offset.layout(); } + { return m_impl_offset.layout(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_impl_offset.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_impl_offset.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_impl_offset.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_impl_offset.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_impl_offset.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_impl_offset.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_impl_offset.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_impl_offset.dimension_7(); } // Is a regular layout with uniform striding for each index. using is_regular = typename offset_type::is_regular ; - KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_impl_offset.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_impl_offset.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_impl_offset.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_impl_offset.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_impl_offset.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_impl_offset.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_impl_offset.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_impl_offset.stride_7(); } template< typename iType > - KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); } + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_impl_offset.stride(s); } //---------------------------------------- // Range span /** \brief Span of the mapped range */ - KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); } + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_impl_offset.span(); } /** \brief Is the mapped range span contiguous */ - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_impl_offset.span_is_contiguous(); } typedef typename ViewDataHandle< Traits >::return_type reference_type ; typedef typename Traits::value_type * pointer_type ; @@ -2666,7 +2683,7 @@ public: /** \brief Query raw pointer to memory */ KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { - return m_handle; + return m_impl_handle; } //---------------------------------------- @@ -2674,7 +2691,7 @@ public: // calling these element reference methods. KOKKOS_FORCEINLINE_FUNCTION - reference_type reference() const { return m_handle[0]; } + reference_type reference() const { return m_impl_handle[0]; } template< typename I0 > KOKKOS_FORCEINLINE_FUNCTION @@ -2682,7 +2699,7 @@ public: std::enable_if< std::is_integral<I0>::value && ! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value , reference_type >::type - reference( const I0 & i0 ) const { return m_handle[i0]; } + reference( const I0 & i0 ) const { return m_impl_handle[i0]; } template< typename I0 > KOKKOS_FORCEINLINE_FUNCTION @@ -2690,50 +2707,50 @@ public: std::enable_if< std::is_integral<I0>::value && std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value , reference_type >::type - reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; } + reference( const I0 & i0 ) const { return m_impl_handle[ m_impl_offset(i0) ]; } template< typename I0 , typename I1 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 ) const - { return m_handle[ m_offset(i0,i1) ]; } + { return m_impl_handle[ m_impl_offset(i0,i1) ]; } template< typename I0 , typename I1 , typename I2 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const - { return m_handle[ m_offset(i0,i1,i2) ]; } + { return m_impl_handle[ m_impl_offset(i0,i1,i2) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const - { return m_handle[ m_offset(i0,i1,i2,i3) ]; } + { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , const I4 & i4 ) const - { return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; } + { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , const I4 & i4 , const I5 & i5 ) const - { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; } + { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const - { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; } + { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ]; } template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 , typename I7 > KOKKOS_FORCEINLINE_FUNCTION reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const - { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } + { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } //---------------------------------------- @@ -2747,22 +2764,22 @@ public: /** \brief Span, in bytes, of the referenced memory */ KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const { - return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask); + return ( m_impl_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask); } //---------------------------------------- KOKKOS_INLINE_FUNCTION ~ViewMapping() {} - KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {} + KOKKOS_INLINE_FUNCTION ViewMapping() : m_impl_handle(), m_impl_offset() {} KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs ) - : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {} + : m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ) {} KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs ) - { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; } + { m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; return *this ; } KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs ) - : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {} + : m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ) {} KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs ) - { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; } + { m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; return *this ; } //---------------------------------------- @@ -2780,14 +2797,14 @@ public: ViewMapping( Kokkos::Impl::ViewCtorProp< P ... > const & arg_prop , typename Traits::array_layout const & arg_layout ) - : m_handle( ( (Kokkos::Impl::ViewCtorProp<void,pointer_type> const &) arg_prop ).value ) - , m_offset( std::integral_constant< unsigned , 0 >() , arg_layout ) + : m_impl_handle( ( (Kokkos::Impl::ViewCtorProp<void,pointer_type> const &) arg_prop ).value ) + , m_impl_offset( std::integral_constant< unsigned , 0 >() , arg_layout ) {} /**\brief Assign data */ KOKKOS_INLINE_FUNCTION void assign_data( pointer_type arg_ptr ) - { m_handle = handle_type( arg_ptr ); } + { m_impl_handle = handle_type( arg_ptr ); } //---------------------------------------- /* Allocate and construct mapped array. @@ -2815,10 +2832,10 @@ public: , alloc_prop::allow_padding ? sizeof(value_type) : 0 > padding ; - m_offset = offset_type( padding(), arg_layout ); + m_impl_offset = offset_type( padding(), arg_layout ); const size_t alloc_size = - ( m_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); + ( m_impl_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask); // Create shared memory tracking record with allocate memory from the memory space record_type * const record = @@ -2829,7 +2846,7 @@ public: #ifdef KOKKOS_ENABLE_DEPRECATED_CODE if ( alloc_size ) { #endif - m_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) ); + m_impl_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) ); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE } #endif @@ -2840,8 +2857,8 @@ public: // Assume destruction is only required when construction is requested. // The ViewValueFunctor has both value construction and destruction operators. record->m_destroy = functor_type( ( (Kokkos::Impl::ViewCtorProp<void,execution_space> const &) arg_prop).value - , (value_type *) m_handle - , m_offset.span() + , (value_type *) m_impl_handle + , m_impl_offset.span() ); // Construct values @@ -2859,16 +2876,17 @@ public: template< class DstTraits , class SrcTraits > class ViewMapping< DstTraits , SrcTraits , typename std::enable_if<( - /* default mappings */ + !(std::is_same<typename SrcTraits::array_layout, LayoutStride>::value) && //Added to have a new specialization for SrcType of LayoutStride + // default mappings std::is_same< typename DstTraits::specialize , void >::value && std::is_same< typename SrcTraits::specialize , void >::value && ( - /* same layout */ + // same layout std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value || - /* known layout */ + // known layout ( ( std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || @@ -2968,8 +2986,127 @@ public: if(!assignable) Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension."); } - dst.m_offset = dst_offset_type( src.m_offset ); - dst.m_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track ); + dst.m_impl_offset = dst_offset_type( src.m_impl_offset ); + dst.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_impl_handle , src_track ); + } +}; + +//---------------------------------------------------------------------------- +//Create new specialization for SrcType of LayoutStride. Runtime check for compatible layout +template< class DstTraits , class SrcTraits > +class ViewMapping< DstTraits , SrcTraits , + typename std::enable_if<( + std::is_same< typename SrcTraits::array_layout, Kokkos::LayoutStride >::value + && + std::is_same< typename DstTraits::specialize , void >::value + && + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + // same layout + std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value + || + // known layout + ( + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value + ) + ) + )>::type > +{ +private: + + enum { is_assignable_space = + Kokkos::Impl::MemorySpaceAccess + < typename DstTraits::memory_space + , typename SrcTraits::memory_space >::assignable }; + + enum { is_assignable_value_type = + std::is_same< typename DstTraits::value_type + , typename SrcTraits::value_type >::value || + std::is_same< typename DstTraits::value_type + , typename SrcTraits::const_value_type >::value }; + + enum { is_assignable_dimension = + ViewDimensionAssignable< typename DstTraits::dimension + , typename SrcTraits::dimension >::value }; + +public: + + enum { is_assignable = is_assignable_space && + is_assignable_value_type && + is_assignable_dimension }; + + typedef Kokkos::Impl::SharedAllocationTracker TrackType ; + typedef ViewMapping< DstTraits , void > DstType ; + typedef ViewMapping< SrcTraits , void > SrcType ; + + KOKKOS_INLINE_FUNCTION + static bool assignable_layout_check(DstType & dst, const SrcType & src) //Runtime check + { + size_t strides[9]; + bool assignable = true; + src.stride(strides); + size_t exp_stride = 1; + if (std::is_same< typename DstTraits::array_layout, Kokkos::LayoutLeft >::value) { + for(int i=0; i<src.Rank; i++) { + if (i>0) exp_stride *= src.extent(i-1); + if (strides[i] != exp_stride){assignable=false;break;} + } + } + else if (std::is_same< typename DstTraits::array_layout, Kokkos::LayoutRight >::value) { + for(int i=src.Rank-1; i>=0; i--) { + if (i<src.Rank-1) exp_stride *= src.extent(i+1); + if (strides[i] != exp_stride){assignable=false;break;} + } + } + return assignable; + } + + KOKKOS_INLINE_FUNCTION + static void assign( DstType & dst , const SrcType & src , const TrackType & src_track ) + { + static_assert( is_assignable_space + , "View assignment must have compatible spaces" ); + + static_assert( is_assignable_value_type + , "View assignment must have same value type or const = non-const" ); + + static_assert( is_assignable_dimension + , "View assignment must have compatible dimensions" ); + + bool assignable_layout = assignable_layout_check(dst, src); //Runtime check + if(!assignable_layout) + Kokkos::abort("View assignment must have compatible layouts\n"); + + typedef typename DstType::offset_type dst_offset_type ; + + if ( size_t(DstTraits::dimension::rank_dynamic) < size_t(SrcTraits::dimension::rank_dynamic) ) { + typedef typename DstTraits::dimension dst_dim; + bool assignable = + ( ( 1 > DstTraits::dimension::rank_dynamic && 1 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN0 == src.dimension_0() : true ) && + ( ( 2 > DstTraits::dimension::rank_dynamic && 2 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN1 == src.dimension_1() : true ) && + ( ( 3 > DstTraits::dimension::rank_dynamic && 3 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN2 == src.dimension_2() : true ) && + ( ( 4 > DstTraits::dimension::rank_dynamic && 4 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN3 == src.dimension_3() : true ) && + ( ( 5 > DstTraits::dimension::rank_dynamic && 5 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN4 == src.dimension_4() : true ) && + ( ( 6 > DstTraits::dimension::rank_dynamic && 6 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN5 == src.dimension_5() : true ) && + ( ( 7 > DstTraits::dimension::rank_dynamic && 7 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN6 == src.dimension_6() : true ) && + ( ( 8 > DstTraits::dimension::rank_dynamic && 8 <= SrcTraits::dimension::rank_dynamic ) ? + dst_dim::ArgN7 == src.dimension_7() : true ) + ; + if(!assignable) + Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension."); + } + dst.m_impl_offset = dst_offset_type( src.m_impl_offset ); + dst.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_impl_handle , src_track ); } }; @@ -3106,12 +3243,12 @@ public: typedef typename DstType::offset_type dst_offset_type ; const SubviewExtents< SrcTraits::rank , rank > - extents( src.m_offset.m_dim , args... ); + extents( src.m_impl_offset.m_dim , args... ); - dst.m_offset = dst_offset_type( src.m_offset , extents ); + dst.m_impl_offset = dst_offset_type( src.m_impl_offset , extents ); - dst.m_handle = ViewDataHandle< DstTraits >::assign(src.m_handle, - src.m_offset( extents.domain_offset(0) + dst.m_impl_handle = ViewDataHandle< DstTraits >::assign(src.m_impl_handle, + src.m_impl_offset( extents.domain_offset(0) , extents.domain_offset(1) , extents.domain_offset(2) , extents.domain_offset(3) @@ -3152,6 +3289,7 @@ bool view_verify_operator_bounds && view_verify_operator_bounds<R+1>( map , args ... ); } + template< unsigned , class MapType > inline void view_error_operator_bounds( char * , int , const MapType & ) @@ -3176,6 +3314,7 @@ void view_error_operator_bounds view_error_operator_bounds<R+1>(buf+n,len-n,map,args...); } + #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) /* Check #3: is the View managed as determined by the MemoryTraits? */ @@ -3275,6 +3414,8 @@ void view_verify_operator_bounds } } + + } /* namespace Impl */ } /* namespace Kokkos */ diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewTile.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewTile.hpp index 42bc8c230..716b9ceca 100644 --- a/packages/kokkos/core/src/impl/Kokkos_ViewTile.hpp +++ b/packages/kokkos/core/src/impl/Kokkos_ViewTile.hpp @@ -202,8 +202,8 @@ struct ViewMapping typedef typename src_map_type::offset_type src_offset_type ; dst = dst_map_type( - dst_handle_type( src.m_handle + - ( ( i_tile0 + src.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) , + dst_handle_type( src.m_impl_handle + + ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) , dst_offset_type() ); } }; diff --git a/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp b/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp index 7dc8a5356..423944962 100644 --- a/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp +++ b/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp @@ -336,11 +336,11 @@ Sentinel::Sentinel() const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 ); - if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { + if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) { hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc(); - hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset ); + hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->cpuset ); bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology , s_process_no_core_zero , @@ -402,14 +402,14 @@ Sentinel::Sentinel() const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i ); - if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) { + if ( hwloc_bitmap_intersects( s_process_binding , root->cpuset ) ) { ++root_count ; // Remember which root (NUMA) object the master thread is running on. // This will be logical NUMA rank #0 for this process. - if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) { + if ( hwloc_bitmap_intersects( proc_cpuset_location, root->cpuset ) ) { root_base = i ; } @@ -417,7 +417,7 @@ Sentinel::Sentinel() const unsigned max_core = hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , - root->allowed_cpuset , + root->cpuset , HWLOC_OBJ_CORE ); unsigned core_count = 0 ; @@ -426,7 +426,7 @@ Sentinel::Sentinel() const hwloc_obj_t core = hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology , - root->allowed_cpuset , + root->cpuset , HWLOC_OBJ_CORE , j ); // If process' cpuset intersects core's cpuset then process can access this core. @@ -438,13 +438,13 @@ Sentinel::Sentinel() // This assumes that it would be performance-detrimental // to spawn more than one MPI process per core and use nested threading. - if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { + if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) { ++core_count ; const unsigned pu_count = hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , - core->allowed_cpuset , + core->cpuset , HWLOC_OBJ_PU ); if ( pu_per_core == 0 ) pu_per_core = pu_count ; @@ -480,11 +480,11 @@ Sentinel::Sentinel() const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank ); - if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) { + if ( hwloc_bitmap_intersects( s_process_binding , root->cpuset ) ) { const unsigned max_core = hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , - root->allowed_cpuset , + root->cpuset , HWLOC_OBJ_CORE ); unsigned core_count = 0 ; @@ -493,12 +493,12 @@ Sentinel::Sentinel() const hwloc_obj_t core = hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology , - root->allowed_cpuset , + root->cpuset , HWLOC_OBJ_CORE , j ); - if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { + if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) { - s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ; + s_core[ core_count + core_per_root * i ] = core->cpuset ; ++core_count ; } diff --git a/packages/kokkos/core/src/kokkos.pc.in b/packages/kokkos/core/src/kokkos.pc.in new file mode 100644 index 000000000..f27b57c96 --- /dev/null +++ b/packages/kokkos/core/src/kokkos.pc.in @@ -0,0 +1,71 @@ +# +# Kokkos v. 2.0 +# Copyright (2014) Sandia Corporation +# +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Questions? Contact Christian R. Trott (crtrott@sandia.gov) +# + +# Add the directory where kokkos.pc got installed to your PKG_CONFIG_PATH + +# Use this on commandline with: +# c++ `pkg-config --cflags --libs kokkos` -o myapp myapp.cpp + +# Use this in a Makefile: +# myapp: myapp.cpp +# $(CC) `pkg-config --cflags --libs kokkos` -o $@ $< + +# Use this in autotools: +# configure.ac: +# PKG_CHECK_MODULES([KOKKOS], [kokkos]) +# Makefile.am: +# myapp_CFLAGS = $(KOKKOS_CFLAGS) +# myapp_LDADD = $(KOKKOS_LIBS) + +# Use this in CMake: +# CMakeLists.txt: +# find_package(PkgConfig) +# pkg_check_modules(KOKKOS IMPORTED_TARGET kokkos) +# target_link_libraries(<lib> PkgConfig::KOKKOS) + +prefix=@CMAKE_INSTALL_PREFIX@ +libdir=${prefix}/lib +includedir=${prefix}/include + +Name: kokkos +Description: Kokkos C++ Performance Portability Programming EcoSystem +URL: https://github.com/kokkos +Version: +Requires: +Libs: -L${libdir} -lkokkos @KOKKOS_EXTRA_LIBS_LIST@ @KOKKOS_LINK_FLAGS@ +Libs.private: -lm +Cflags: -I${includedir} @KOKKOS_CXXFLAGS@ diff --git a/packages/kokkos/core/unit_test/CMakeLists.txt b/packages/kokkos/core/unit_test/CMakeLists.txt index 651abf04d..fad4e1d45 100644 --- a/packages/kokkos/core/unit_test/CMakeLists.txt +++ b/packages/kokkos/core/unit_test/CMakeLists.txt @@ -284,6 +284,7 @@ IF(Kokkos_ENABLE_Cuda) SOURCES UnitTestMainInit.cpp cuda/TestCudaHostPinned_SharedAlloc.cpp + cuda/TestCudaHostPinned_ViewCopy.cpp cuda/TestCudaHostPinned_ViewAPI_a.cpp cuda/TestCudaHostPinned_ViewAPI_b.cpp cuda/TestCudaHostPinned_ViewAPI_c.cpp @@ -293,6 +294,7 @@ IF(Kokkos_ENABLE_Cuda) cuda/TestCudaHostPinned_ViewMapping_b.cpp cuda/TestCudaHostPinned_ViewMapping_subview.cpp cuda/TestCudaUVM_SharedAlloc.cpp + cuda/TestCudaUVM_ViewCopy.cpp cuda/TestCudaUVM_ViewAPI_a.cpp cuda/TestCudaUVM_ViewAPI_b.cpp cuda/TestCudaUVM_ViewAPI_c.cpp diff --git a/packages/kokkos/core/unit_test/Makefile b/packages/kokkos/core/unit_test/Makefile index b50222e37..72832271c 100644 --- a/packages/kokkos/core/unit_test/Makefile +++ b/packages/kokkos/core/unit_test/Makefile @@ -43,12 +43,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o OBJ_CUDA += TestCuda_RangePolicy.o OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o - OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o - OBJ_CUDA += TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o + OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewLayoutStrideAssignment.o + OBJ_CUDA += TestCudaUVM_ViewCopy.o TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o - OBJ_CUDA += TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o + OBJ_CUDA += TestCudaHostPinned_ViewCopy.o TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o - OBJ_CUDA += TestCuda_View_64bit.o + OBJ_CUDA += TestCuda_View_64bit.o OBJ_CUDA += TestCuda_ViewOfClass.o OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o @@ -57,13 +57,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o OBJ_CUDA += TestCuda_SubView_c13.o OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o + OBJ_CUDA += TestCuda_Reductions_DeviceView.o OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o OBJ_CUDA += TestCuda_Complex.o OBJ_CUDA += TestCuda_AtomicOperations_int.o TestCuda_AtomicOperations_unsignedint.o TestCuda_AtomicOperations_longint.o OBJ_CUDA += TestCuda_AtomicOperations_unsignedlongint.o TestCuda_AtomicOperations_longlongint.o TestCuda_AtomicOperations_double.o TestCuda_AtomicOperations_float.o OBJ_CUDA += TestCuda_AtomicViews.o TestCuda_Atomics.o OBJ_CUDA += TestCuda_Team.o TestCuda_TeamScratch.o - OBJ_CUDA += TestCuda_TeamReductionScan.o + OBJ_CUDA += TestCuda_TeamReductionScan.o TestCuda_TeamTeamSize.o OBJ_CUDA += TestCuda_Other.o OBJ_CUDA += TestCuda_MDRange_a.o TestCuda_MDRange_b.o TestCuda_MDRange_c.o TestCuda_MDRange_d.o TestCuda_MDRange_e.o OBJ_CUDA += TestCuda_Crs.o @@ -84,13 +85,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) OBJ_ROCM += TestROCm_AtomicOperations_int.o TestROCm_AtomicOperations_unsignedint.o TestROCm_AtomicOperations_longint.o OBJ_ROCM += TestROCm_AtomicOperations_unsignedlongint.o TestROCm_AtomicOperations_longlongint.o TestROCm_AtomicOperations_double.o TestROCm_AtomicOperations_float.o OBJ_ROCM += TestROCm_Atomics.o -# complex failing OBJ_ROCM += TestROCm_AtomicViews.o OBJ_ROCM += TestROCm_Other.o -# Compiles but runtime Segfaults: -# OBJ_ROCM += TestROCm_MDRange_a.o TestROCm_MDRange_b.o TestROCm_MDRange_c.o TestROCm_MDRange_d.o TestROCm_MDRange_e.o -# rocm.memory_pool - OBJ_ROCM += TestROCm_Reductions.o + OBJ_ROCM += TestROCm_MDRange_a.o TestROCm_MDRange_b.o TestROCm_MDRange_c.o TestROCm_MDRange_d.o TestROCm_MDRange_e.o + OBJ_ROCM += TestROCm_MDRangeReduce_a.o TestROCm_MDRangeReduce_b.o TestROCm_MDRangeReduce_c.o TestROCm_MDRangeReduce_d.o TestROCm_MDRangeReduce_e.o + OBJ_ROCM += TestROCm_Reductions.o OBJ_ROCM += TestROCm_Reducers_a.o TestROCm_Reducers_b.o TestROCm_Reducers_c.o TestROCm_Reducers_d.o OBJ_ROCM += TestROCm_Scan.o OBJ_ROCM += TestROCm_SharedAlloc.o @@ -108,22 +107,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) OBJ_ROCM += TestROCm_SubView_c10.o OBJ_ROCM += TestROCm_SubView_c11.o OBJ_ROCM += TestROCm_SubView_c12.o + OBJ_ROCM += TestROCm_SubView_c13.o OBJ_ROCM += TestROCm_Team.o -# compile fails / compiler segfaults - #OBJ_ROCM += TestROCm_TeamReductionScan.o -# compile fails - OBJ_ROCM += TestROCm_TeamScratch.o + OBJ_ROCM += TestROCm_TeamReductionScan.o + OBJ_ROCM += TestROCm_TeamScratch.o TestROCm_TeamTeamSize.o OBJ_ROCM += TestROCm_ViewAPI_a.o TestROCm_ViewAPI_b.o TestROCm_ViewAPI_c.o TestROCm_ViewAPI_d.o TestROCm_ViewAPI_e.o OBJ_ROCM += TestROCm_ViewMapping_a.o OBJ_ROCM += TestROCm_ViewMapping_b.o OBJ_ROCM += TestROCm_ViewMapping_subview.o - OBJ_ROCM += TestROCmHostPinned_ViewAPI_a.o TestROCmHostPinned_ViewAPI_b.o TestROCmHostPinned_ViewAPI_c.o TestROCmHostPinned_ViewAPI_d.o TestROCmHostPinned_ViewAPI_e.o + OBJ_ROCM += TestROCmHostPinned_ViewCopy.o TestROCmHostPinned_ViewAPI_a.o TestROCmHostPinned_ViewAPI_b.o TestROCmHostPinned_ViewAPI_c.o TestROCmHostPinned_ViewAPI_d.o TestROCmHostPinned_ViewAPI_e.o OBJ_ROCM += TestROCmHostPinned_View_64bit.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o OBJ_ROCM += TestROCm_ViewOfClass.o - OBJ_ROCM += TestROCm_Spaces.o + OBJ_ROCM += TestROCm_Spaces.o + OBJ_ROCM += TestROCm_Crs.o TARGETS += KokkosCore_UnitTest_ROCm TEST_TARGETS += test-rocm @@ -137,7 +136,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) OBJ_THREADS += TestThreads_RangePolicy.o OBJ_THREADS += TestThreads_View_64bit.o OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o - OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o + OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o TestThreads_ViewLayoutStrideAssignment.o OBJ_THREADS += TestThreads_ViewOfClass.o OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o @@ -145,12 +144,13 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o + OBJ_THREADS += TestThreads_Reductions_DeviceView.o OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o OBJ_THREADS += TestThreads_Complex.o OBJ_THREADS += TestThreads_AtomicOperations_int.o TestThreads_AtomicOperations_unsignedint.o TestThreads_AtomicOperations_longint.o OBJ_THREADS += TestThreads_AtomicOperations_unsignedlongint.o TestThreads_AtomicOperations_longlongint.o TestThreads_AtomicOperations_double.o TestThreads_AtomicOperations_float.o OBJ_THREADS += TestThreads_AtomicViews.o TestThreads_Atomics.o - OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o + OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o TestThreads_TeamTeamSize.o OBJ_THREADS += TestThreads_TeamReductionScan.o OBJ_THREADS += TestThreads_Other.o OBJ_THREADS += TestThreads_MDRange_a.o TestThreads_MDRange_b.o TestThreads_MDRange_c.o TestThreads_MDRange_d.o TestThreads_MDRange_e.o @@ -167,7 +167,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP += TestOpenMP_RangePolicy.o OBJ_OPENMP += TestOpenMP_View_64bit.o OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o - OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o + OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o TestOpenMP_ViewLayoutStrideAssignment.o OBJ_OPENMP += TestOpenMP_ViewOfClass.o OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o @@ -176,13 +176,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o OBJ_OPENMP += TestOpenMP_SubView_c13.o OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o + OBJ_OPENMP += TestOpenMP_Reductions_DeviceView.o OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o OBJ_OPENMP += TestOpenMP_Complex.o OBJ_OPENMP += TestOpenMP_AtomicOperations_int.o TestOpenMP_AtomicOperations_unsignedint.o TestOpenMP_AtomicOperations_longint.o OBJ_OPENMP += TestOpenMP_AtomicOperations_unsignedlongint.o TestOpenMP_AtomicOperations_longlongint.o TestOpenMP_AtomicOperations_double.o TestOpenMP_AtomicOperations_float.o OBJ_OPENMP += TestOpenMP_AtomicViews.o TestOpenMP_Atomics.o OBJ_OPENMP += TestOpenMP_Team.o TestOpenMP_TeamScratch.o - OBJ_OPENMP += TestOpenMP_TeamReductionScan.o + OBJ_OPENMP += TestOpenMP_TeamReductionScan.o TestOpenMP_TeamTeamSize.o OBJ_OPENMP += TestOpenMP_Other.o OBJ_OPENMP += TestOpenMP_MDRange_a.o TestOpenMP_MDRange_b.o TestOpenMP_MDRange_c.o TestOpenMP_MDRange_d.o TestOpenMP_MDRange_e.o OBJ_OPENMP += TestOpenMP_Crs.o @@ -256,7 +257,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL += TestSerial_RangePolicy.o OBJ_SERIAL += TestSerial_View_64bit.o OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o - OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o + OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o TestSerial_ViewLayoutStrideAssignment.o OBJ_SERIAL += TestSerial_ViewOfClass.o OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o @@ -265,13 +266,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o OBJ_SERIAL += TestSerial_SubView_c13.o OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o + OBJ_SERIAL += TestSerial_Reductions_DeviceView.o OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o OBJ_SERIAL += TestSerial_Complex.o OBJ_SERIAL += TestSerial_AtomicOperations_int.o TestSerial_AtomicOperations_unsignedint.o TestSerial_AtomicOperations_longint.o OBJ_SERIAL += TestSerial_AtomicOperations_unsignedlongint.o TestSerial_AtomicOperations_longlongint.o TestSerial_AtomicOperations_double.o TestSerial_AtomicOperations_float.o OBJ_SERIAL += TestSerial_AtomicViews.o TestSerial_Atomics.o OBJ_SERIAL += TestSerial_Team.o TestSerial_TeamScratch.o - OBJ_SERIAL += TestSerial_TeamReductionScan.o + OBJ_SERIAL += TestSerial_TeamReductionScan.o TestSerial_TeamTeamSize.o OBJ_SERIAL += TestSerial_Other.o #HCC_WORKAROUND ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) diff --git a/packages/kokkos/core/unit_test/TestAtomic.hpp b/packages/kokkos/core/unit_test/TestAtomic.hpp index cf4bae584..58b632511 100644 --- a/packages/kokkos/core/unit_test/TestAtomic.hpp +++ b/packages/kokkos/core/unit_test/TestAtomic.hpp @@ -224,7 +224,8 @@ T AddLoop( int loop ) { struct AddFunctorReduce< T, execution_space > f_add_red; f_add_red.data = data; - Kokkos::parallel_reduce( loop, f_add_red ); + int dummy_result; + Kokkos::parallel_reduce( loop, f_add_red , dummy_result ); execution_space::fence(); return val; @@ -309,7 +310,8 @@ T CASLoop( int loop ) { struct CASFunctorReduce< T, execution_space > f_cas_red; f_cas_red.data = data; - Kokkos::parallel_reduce( loop, f_cas_red ); + int dummy_result; + Kokkos::parallel_reduce( loop, f_cas_red , dummy_result ); execution_space::fence(); return val; @@ -401,7 +403,8 @@ T ExchLoop( int loop ) { struct ExchFunctorReduce< T, execution_space > f_exch_red; f_exch_red.data = data; f_exch_red.data2 = data2; - Kokkos::parallel_reduce( loop, f_exch_red ); + int dummy_result; + Kokkos::parallel_reduce( loop, f_exch_red , dummy_result ); execution_space::fence(); return val; @@ -529,7 +532,7 @@ TEST_F( TEST_CATEGORY, atomics ) ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 3 ) ) ); #ifndef KOKKOS_ENABLE_OPENMPTARGET -#ifndef KOKKOS_ENABLE_ROCM +#ifndef KOKKOS_ENABLE_ROCM // ROCM doesn't yet support atomics for >64bit types ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 1 ) ) ); ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 2 ) ) ); ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 3 ) ) ); diff --git a/packages/kokkos/core/unit_test/TestCXX11.hpp b/packages/kokkos/core/unit_test/TestCXX11.hpp index b6c34d2d4..8a158e266 100644 --- a/packages/kokkos/core/unit_test/TestCXX11.hpp +++ b/packages/kokkos/core/unit_test/TestCXX11.hpp @@ -216,7 +216,7 @@ template< class DeviceType, bool PWRTest > double ReduceTestFunctor() { typedef Kokkos::TeamPolicy< DeviceType > policy_type; typedef Kokkos::View< double**, DeviceType > view_type; - typedef Kokkos::View< double, typename view_type::host_mirror_space, Kokkos::MemoryUnmanaged > unmanaged_result; + typedef Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > unmanaged_result; view_type a( "A", 100, 5 ); typename view_type::HostMirror h_a = Kokkos::create_mirror_view( a ); @@ -244,7 +244,7 @@ template< class DeviceType, bool PWRTest > double ReduceTestLambda() { typedef Kokkos::TeamPolicy< DeviceType > policy_type; typedef Kokkos::View< double**, DeviceType > view_type; - typedef Kokkos::View< double, typename view_type::host_mirror_space, Kokkos::MemoryUnmanaged > unmanaged_result; + typedef Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > unmanaged_result; view_type a( "A", 100, 5 ); typename view_type::HostMirror h_a = Kokkos::create_mirror_view( a ); @@ -327,12 +327,18 @@ bool Test( int test ) { }; bool passed = true; - if ( res_functor != res_lambda ) { + auto a = res_functor; + auto b = res_lambda; + // use a tolerant comparison because functors and lambdas vectorize differently + // https://github.com/trilinos/Trilinos/issues/3233 + auto rel_err = (std::abs(b - a) / std::max(std::abs(a), std::abs(b))); + auto tol = 1e-14; + if (rel_err > tol) { passed = false; std::cout << "CXX11 ( test = '" - << testnames[test] << "' FAILED : " - << res_functor << " != " << res_lambda + << testnames[test] << "' FAILED : relative error " + << rel_err << " > tolerance " << tol << std::endl; } diff --git a/packages/kokkos/core/unit_test/TestComplex.hpp b/packages/kokkos/core/unit_test/TestComplex.hpp index fcaebe3c6..f4343df0c 100644 --- a/packages/kokkos/core/unit_test/TestComplex.hpp +++ b/packages/kokkos/core/unit_test/TestComplex.hpp @@ -71,7 +71,7 @@ struct TestComplexConstruction { ASSERT_FLOAT_EQ(h_results(7).real(),7.5); ASSERT_FLOAT_EQ(h_results(7).imag(),0.0); ASSERT_FLOAT_EQ(h_results(8).real(),double(8)); ASSERT_FLOAT_EQ(h_results(8).imag(),0.0); -#ifndef KOKKOS_ENABLE_ROCM +#ifndef KOKKOS_ENABLE_ROCM // Copy construction conversion between Kokkos::complex and std::complex doesn't compile Kokkos::complex<double> a(1.5,2.5),b(3.25,5.25),r_kk; std::complex<double> sa(a),sb(3.25,5.25),r; r = a; r_kk = a; ASSERT_FLOAT_EQ(r.real(),r_kk.real()); ASSERT_FLOAT_EQ(r.imag(),r_kk.imag()); diff --git a/packages/kokkos/core/unit_test/TestMDRange.hpp b/packages/kokkos/core/unit_test/TestMDRange.hpp index 9298983aa..88b3a9b0c 100644 --- a/packages/kokkos/core/unit_test/TestMDRange.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange.hpp @@ -318,6 +318,24 @@ struct TestMDRange_2D { ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) ); } + // Test with reducers - scalar + label + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type; + int s0 = 1; + int s1 = 1; + range_type range( {{ s0, s1 }}, {{ N0, N1 }}, {{ 3, 3 }} ); + + TestMDRange_2D functor( N0, N1 ); + + parallel_for( "rank2-parfor-label", range, functor ); + + value_type sum = 0.0; + Kokkos::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( "rank2-reducer-label", range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) ); + } // Test with reducers - scalar view { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type; @@ -337,7 +355,31 @@ struct TestMDRange_2D { ASSERT_EQ( sum, 2 * N0 * N1 ); } + // Test Min reducer with lambda +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type; + range_type range( {{ 1, 1 }}, {{ N0, N1 }}, {{ 3, 3 }} ); + + Kokkos::View< double**, ExecSpace > v_in("v_in", N0, N1 ); + + parallel_for( "rank2-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j ) { + v_in( i , j ) = (i+1) * (j+1) ; + }); + double min; + Kokkos::Min< double > reducer_scalar( min ); + + parallel_reduce( "rank2-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, double& min_val ) { + min_val = fmin( v_in(i,j), min_val ); + } + , reducer_scalar); + + ASSERT_EQ( min, 4.0 ); + } +#endif +#endif // Tagged operator test { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int>, InitTag > range_type; @@ -858,6 +900,22 @@ struct TestMDRange_3D { ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); } + // Test with reducers - scalar + label + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<3>, Kokkos::IndexType<int> > range_type; + range_type range( {{ 0, 0, 0 }}, {{ N0, N1, N2 }}, {{ 3, 3, 3 }} ); + + TestMDRange_3D functor( N0, N1, N2 ); + + parallel_for( "rank3-parfor-label", range, functor ); + + value_type sum = 0.0; + Kokkos::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( "rank3-reducer-label", range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } // Test with reducers - scalar view { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<3>, Kokkos::IndexType<int> > range_type; @@ -877,6 +935,31 @@ struct TestMDRange_3D { ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); } + // Test Min reducer with lambda +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<3>, Kokkos::IndexType<int> > range_type; + + range_type range( {{ 1, 1, 1 }}, {{ N0, N1, N2 }}, {{ 3, 3, 3 }} ); + + Kokkos::View< double***, ExecSpace > v_in("v_in", N0, N1, N2 ); + + parallel_for( "rank3-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j, const int k ) { + v_in( i, j, k ) = (i+1) * (j+1) * (k+1) ; + }); + + double min; + + parallel_reduce("rank3-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, double& min_val ) { + min_val = (v_in(i,j,k) < min_val) ? v_in(i,j,k) : min_val; + } + , Kokkos::Min<double>(min) ); + + ASSERT_EQ( min, 8.0 ); + } +#endif +#endif // Tagged operator test { @@ -1382,6 +1465,23 @@ struct TestMDRange_4D { ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); } + // Test with reducers - scalar + label + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4>, Kokkos::IndexType<int> > range_type; + range_type range( {{ 0, 0, 0, 0 }}, {{ N0, N1, N2, N3 }}, {{ 3, 3, 3, 3 }} ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( "rank4-parfor-label", range, functor ); + + value_type sum = 0.0; + Kokkos::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( "rank4-reducer-label", range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + // Test with reducers - scalar view { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4>, Kokkos::IndexType<int> > range_type; @@ -1402,6 +1502,32 @@ struct TestMDRange_4D { ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); } + // Test Min reducer with lambda +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4>, Kokkos::IndexType<int> > range_type; + + range_type range( {{ 1, 1, 1, 1 }}, {{ N0, N1, N2, N3 }}, {{ 3, 3, 3, 3 }} ); + + Kokkos::View< double****, ExecSpace > v_in("v_in", N0, N1, N2, N3 ); + + parallel_for( "rank4-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l ) { + v_in( i, j, k, l ) = (i+1) * (j+1) * (k+1) * (l+1) ; + }); + + double min; + + parallel_reduce("rank4-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, double& min_val ) { + min_val = (v_in(i,j,k,l) < min_val) ? v_in(i,j,k,l) : min_val; + } + , Kokkos::Min<double>(min) ); + + ASSERT_EQ( min, 16.0 ); + } +#endif +#endif + // Tagged operator test { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int>, InitTag > range_type; @@ -1926,6 +2052,23 @@ struct TestMDRange_5D { ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); } + // Test with reducers - scalar + label + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5>, Kokkos::IndexType<int> > range_type; + range_type range( {{ 0, 0, 0, 0, 0 }}, {{ N0, N1, N2, N3, N4 }}, {{ 3, 3, 3, 3, 3 }} ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + parallel_for( "rank5-parfor-label", range, functor ); + + value_type sum = 0.0; + Kokkos::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( "rank5-reducer-label", range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); + } + // Test with reducers - scalar view { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5>, Kokkos::IndexType<int> > range_type; @@ -1946,6 +2089,32 @@ struct TestMDRange_5D { ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); } + // Test Min reducer with lambda +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5>, Kokkos::IndexType<int> > range_type; + + range_type range( {{ 1, 1, 1, 1, 1 }}, {{ N0, N1, N2, N3, N4 }}, {{ 3, 3, 3, 2, 2 }} ); + + Kokkos::View< double*****, ExecSpace > v_in("v_in", N0, N1, N2, N3, N4 ); + + parallel_for( "rank5-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, const int m ) { + v_in( i, j, k, l, m ) = (i+1) * (j+1) * (k+1) * (l+1) * (m+1) ; + }); + + double min; + + parallel_reduce("rank5-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, const int m, double& min_val ) { + min_val = (v_in(i,j,k,l,m) < min_val) ? v_in(i,j,k,l,m) : min_val; + } + , Kokkos::Min<double>(min) ); + + ASSERT_EQ( min, 32.0 ); + } +#endif +#endif + // Tagged operator test { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int>, InitTag > range_type; @@ -2401,6 +2570,23 @@ struct TestMDRange_6D { ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); } + // Test with reducers - scalar + label + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int> > range_type; + range_type range( {{ 0, 0, 0, 0, 0, 0 }}, {{ N0, N1, N2, N3, N4, N5 }}, {{ 3, 3, 3, 3, 3, 2 }} ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + parallel_for( "rank6-parfor-label", range, functor ); + + value_type sum = 0.0; + Kokkos::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( "rank6-reducer-label", range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); + } + // Test with reducers - scalar view { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int> > range_type; @@ -2421,6 +2607,32 @@ struct TestMDRange_6D { ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); } + // Test Min reducer with lambda +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + { + typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int> > range_type; + + range_type range( {{ 1, 1, 1, 1, 1, 1 }}, {{ N0, N1, N2, N3, N4, N5 }}, {{ 3, 3, 3, 2, 2, 1 }} ); + + Kokkos::View< double******, ExecSpace > v_in("v_in", N0, N1, N2, N3, N4, N5 ); + + parallel_for( "rank6-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, const int m, const int n ) { + v_in( i, j, k, l, m, n ) = (i+1) * (j+1) * (k+1) * (l+1) * (m+1) * (n+1) ; + }); + + double min; + + parallel_reduce("rank6-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, const int m, const int n, double& min_val ) { + min_val = (v_in(i,j,k,l,m,n) < min_val) ? v_in(i,j,k,l,m,n) : min_val; + } + , Kokkos::Min<double>(min) ); + + ASSERT_EQ( min, 64.0 ); + } +#endif +#endif + // Tagged operator test { typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int>, InitTag > range_type; diff --git a/packages/kokkos/core/unit_test/TestMDRange_a.hpp b/packages/kokkos/core/unit_test/TestMDRange_a.hpp index 5de5225eb..308b3a302 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_a.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_a.hpp @@ -44,8 +44,12 @@ #include<TestMDRange.hpp> namespace Test { + TEST_F( TEST_CATEGORY , mdrange_5d ) { +#if !defined ( KOKKOS_ENABLE_ROCM ) // MDRange Reduce explicitly handled in its own cpp file TestMDRange_5D< TEST_EXECSPACE >::test_reduce5( 100, 10, 10, 10, 5 ); +#endif TestMDRange_5D< TEST_EXECSPACE >::test_for5( 100, 10, 10, 10, 5 ); } + } diff --git a/packages/kokkos/core/unit_test/TestMDRange_b.hpp b/packages/kokkos/core/unit_test/TestMDRange_b.hpp index 60ece56aa..e714f1839 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_b.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_b.hpp @@ -45,10 +45,11 @@ namespace Test { - TEST_F( TEST_CATEGORY , mdrange_6d ) { TestMDRange_6D< TEST_EXECSPACE >::test_for6( 10, 10, 10, 10, 5, 5 ); +#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduce explicitly handled in its own cpp file TestMDRange_6D< TEST_EXECSPACE >::test_reduce6( 100, 10, 10, 10, 5, 5 ); +#endif } } diff --git a/packages/kokkos/core/unit_test/TestMDRange_c.hpp b/packages/kokkos/core/unit_test/TestMDRange_c.hpp index 029b1e2b1..810e1d82b 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_c.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_c.hpp @@ -46,8 +46,10 @@ namespace Test { TEST_F( TEST_CATEGORY , mdrange_2d) { - TestMDRange_2D< TEST_EXECSPACE >::test_for2( 100, 100 ); +#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduce explicitly handled in its own cpp file TestMDRange_2D< TEST_EXECSPACE >::test_reduce2( 100, 100 ); +#endif + TestMDRange_2D< TEST_EXECSPACE >::test_for2( 100, 100 ); } TEST_F( TEST_CATEGORY , mdrange_array_reduce ) { diff --git a/packages/kokkos/core/unit_test/TestMDRange_d.hpp b/packages/kokkos/core/unit_test/TestMDRange_d.hpp index 240df9aec..1a477a228 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_d.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_d.hpp @@ -44,9 +44,12 @@ #include<TestMDRange.hpp> namespace Test { + TEST_F( TEST_CATEGORY , mdrange_3d) { TestMDRange_3D< TEST_EXECSPACE >::test_for3( 100, 10, 100 ); +#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduced explicitly handled in its own cpp file TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 100, 10, 100 ); +#endif } TEST_F( TEST_CATEGORY , mdrange_neg_idx ) { @@ -56,4 +59,5 @@ TEST_F( TEST_CATEGORY , mdrange_neg_idx ) { TestMDRange_5D_NegIdx< TEST_EXECSPACE >::test_5D_negidx( 128, 32, 8, 8, 4 ); TestMDRange_6D_NegIdx< TEST_EXECSPACE >::test_6D_negidx( 128, 32, 8, 8, 4, 2 ); } + } diff --git a/packages/kokkos/core/unit_test/TestMDRange_e.hpp b/packages/kokkos/core/unit_test/TestMDRange_e.hpp index 8162184c9..a62672535 100644 --- a/packages/kokkos/core/unit_test/TestMDRange_e.hpp +++ b/packages/kokkos/core/unit_test/TestMDRange_e.hpp @@ -44,8 +44,12 @@ #include<TestMDRange.hpp> namespace Test { + TEST_F( TEST_CATEGORY , mdrange_4d ) { +#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduce explicitly handled in its own cpp file TestMDRange_4D< TEST_EXECSPACE >::test_reduce4( 100, 10, 10, 10 ); +#endif TestMDRange_4D< TEST_EXECSPACE >::test_for4( 100, 10, 10, 10 ); } + } diff --git a/packages/kokkos/core/unit_test/TestMemoryPool.hpp b/packages/kokkos/core/unit_test/TestMemoryPool.hpp index 9fb1d900f..00079e02e 100644 --- a/packages/kokkos/core/unit_test/TestMemoryPool.hpp +++ b/packages/kokkos/core/unit_test/TestMemoryPool.hpp @@ -626,7 +626,9 @@ TEST_F( TEST_CATEGORY, memory_pool ) TestMemoryPool::test_host_memory_pool_stats<>(); TestMemoryPool::test_memory_pool_v2< TEST_EXECSPACE >(false,false); TestMemoryPool::test_memory_pool_corners< TEST_EXECSPACE >(false,false); +#ifdef KOKKOS_ENABLE_LARGE_MEM_TESTS TestMemoryPool::test_memory_pool_huge< TEST_EXECSPACE >(); +#endif } } diff --git a/packages/kokkos/core/unit_test/TestReduce.hpp b/packages/kokkos/core/unit_test/TestReduce.hpp index 5748df1f1..924d8eb45 100644 --- a/packages/kokkos/core/unit_test/TestReduce.hpp +++ b/packages/kokkos/core/unit_test/TestReduce.hpp @@ -63,9 +63,11 @@ public: const size_type nwork; + KOKKOS_INLINE_FUNCTION ReduceFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} + KOKKOS_INLINE_FUNCTION ReduceFunctor( const ReduceFunctor & rhs ) : nwork( rhs.nwork ) {} @@ -102,6 +104,7 @@ class ReduceFunctorFinal : public ReduceFunctor< long, DeviceType > { public: typedef typename ReduceFunctor< long, DeviceType >::value_type value_type; + KOKKOS_INLINE_FUNCTION ReduceFunctorFinal( const size_t n ) : ReduceFunctor< long, DeviceType >( n ) {} diff --git a/packages/kokkos/core/unit_test/TestReduceDeviceView.hpp b/packages/kokkos/core/unit_test/TestReduceDeviceView.hpp new file mode 100644 index 000000000..4f65166e3 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestReduceDeviceView.hpp @@ -0,0 +1,131 @@ +#include<Kokkos_Core.hpp> + +namespace Test { +namespace { + +struct TestIsAsynchFunctor { + Kokkos::View<double,TEST_EXECSPACE> atomic_test; + TestIsAsynchFunctor(Kokkos::View<double,TEST_EXECSPACE> atomic_test_):atomic_test(atomic_test_){} + + KOKKOS_INLINE_FUNCTION + void operator()(const int) const { + Kokkos::atomic_add(&atomic_test(),1.0); + } +}; + +template<class PolicyType, class ReduceFunctor> +void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor) { + + using ExecSpace = TEST_EXECSPACE; + + Kokkos::View<int64_t,TEST_EXECSPACE> result("Result"); + Kokkos::View<double,TEST_EXECSPACE> atomic_test("Atomic"); + int64_t reducer_result, view_result, scalar_result; + + + Kokkos::Timer timer; + + // Establish whether execspace is asynchronous + Kokkos::parallel_for("Test::ReduceDeviceView::TestIsAsynch",Kokkos::RangePolicy<TEST_EXECSPACE>(0,1000000), + TestIsAsynchFunctor(atomic_test)); + double time0 = timer.seconds(); + timer.reset(); + ExecSpace::execution_space::fence(); + double time_fence0 = timer.seconds(); + Kokkos::deep_copy(result,0); + timer.reset(); + bool is_async = time0<time_fence0; + + // Test Reducer + + Kokkos::parallel_reduce("Test::ReduceDeviceView::TestReducer",policy, functor, Kokkos::Sum<int64_t,TEST_EXECSPACE>(result)); + double time1 = timer.seconds(); + // Check whether it was asyncronous + timer.reset(); + ExecSpace::execution_space::fence(); + double time_fence1 = timer.seconds(); + Kokkos::deep_copy(reducer_result,result); + Kokkos::deep_copy(result,0); + ASSERT_EQ(N,reducer_result); + timer.reset(); + + + // Test View + Kokkos::parallel_reduce("Test::ReduceDeviceView::TestView",policy, functor, result); + double time2 = timer.seconds(); + // Check whether it was asyncronous + timer.reset(); + ExecSpace::execution_space::fence(); + double time_fence2 = timer.seconds(); + Kokkos::deep_copy(view_result,result); + Kokkos::deep_copy(result,0); + ASSERT_EQ(N,view_result); + timer.reset(); + + + // Test Scalar + Kokkos::parallel_reduce("Test::ReduceDeviceView::TestScalar",policy, functor, scalar_result); + double time3 = timer.seconds(); + + // Check whether it was asyncronous + timer.reset(); + ExecSpace::execution_space::fence(); + double time_fence3 = timer.seconds(); + + ASSERT_EQ(N,scalar_result); + if(is_async) { + ASSERT_TRUE(time1<time_fence1); + } + if(is_async) { + ASSERT_TRUE(time2<time_fence2); + ASSERT_TRUE(time3>time_fence3); + } + } + +struct RangePolicyFunctor { + KOKKOS_INLINE_FUNCTION + void operator() (const int, int64_t& lsum) const { + lsum += 1; + } +}; + +struct MDRangePolicyFunctor { + KOKKOS_INLINE_FUNCTION + void operator() (const int, const int, const int, int64_t& lsum) const { + lsum += 1; + } +}; + +struct TeamPolicyFunctor { + int M; + TeamPolicyFunctor(int M_):M(M_){} + + KOKKOS_INLINE_FUNCTION + void operator() (const Kokkos::TeamPolicy<TEST_EXECSPACE>::member_type& team, int64_t& lsum) const { + for(int i=team.team_rank(); i<M; i+=team.team_size()) + lsum += 1; + } +}; + +} // namespace + +TEST_F( TEST_CATEGORY, reduce_device_view_range_policy ) +{ + int N=1000*1024*1024; + test_reduce_device_view(N,Kokkos::RangePolicy<TEST_EXECSPACE>(0,N),RangePolicyFunctor()); +} + +TEST_F( TEST_CATEGORY, reduce_device_view_mdrange_policy ) +{ + int N=1000*1024*1024; + test_reduce_device_view(N,Kokkos::MDRangePolicy<TEST_EXECSPACE,Kokkos::Rank<3>>({0,0,0},{1000,1024,1024}),MDRangePolicyFunctor()); +} + +TEST_F( TEST_CATEGORY, reduce_device_view_team_policy ) +{ + int N=1000*1024*1024; + test_reduce_device_view(N,Kokkos::TeamPolicy<TEST_EXECSPACE>(1000*1024,Kokkos::AUTO),TeamPolicyFunctor(1024)); +} + +} // namespace Test + diff --git a/packages/kokkos/core/unit_test/TestReducers.hpp b/packages/kokkos/core/unit_test/TestReducers.hpp index 519e3a80f..7270ea337 100644 --- a/packages/kokkos/core/unit_test/TestReducers.hpp +++ b/packages/kokkos/core/unit_test/TestReducers.hpp @@ -477,7 +477,7 @@ struct TestReducers { int reference_loc = -1; for ( int i = 0; i < N; i++ ) { - h_values( i ) = (Scalar) ( rand() % 100000 ); + h_values( i ) = (Scalar) ( rand() % 100000 + 2 ); if ( h_values( i ) < reference_min ) { reference_min = h_values( i ); @@ -485,7 +485,7 @@ struct TestReducers { } else if ( h_values( i ) == reference_min ) { // Make min unique. - h_values( i ) += std::numeric_limits< Scalar >::epsilon(); + h_values( i ) += Scalar(1); } } Kokkos::deep_copy( values, h_values ); @@ -537,7 +537,7 @@ struct TestReducers { int reference_loc = -1; for ( int i = 0; i < N; i++ ) { - h_values( i ) = (Scalar) ( rand() % 100000 ); + h_values( i ) = (Scalar) ( rand() % 100000 + 2 ); if ( h_values( i ) > reference_max ) { reference_max = h_values( i ); @@ -545,7 +545,7 @@ struct TestReducers { } else if ( h_values( i ) == reference_max ) { // Make max unique. - h_values( i ) -= std::numeric_limits< Scalar >::epsilon(); + h_values( i ) -= Scalar(1); } } Kokkos::deep_copy( values, h_values ); @@ -599,7 +599,7 @@ struct TestReducers { int reference_maxloc = -1; for ( int i = 0; i < N; i++ ) { - h_values( i ) = (Scalar) ( rand() % 100000 ); + h_values( i ) = (Scalar) ( rand() % 100000 + 2); } for ( int i = 0; i < N; i++ ) { @@ -609,7 +609,7 @@ struct TestReducers { } else if ( h_values( i ) == reference_max ) { // Make max unique. - h_values( i ) -= std::numeric_limits< Scalar >::epsilon(); + h_values( i ) -= Scalar(1); } } @@ -620,7 +620,7 @@ struct TestReducers { } else if ( h_values( i ) == reference_min ) { // Make min unique. - h_values( i ) += std::numeric_limits< Scalar >::epsilon(); + h_values( i ) += Scalar(1); } } diff --git a/packages/kokkos/core/unit_test/TestScan.hpp b/packages/kokkos/core/unit_test/TestScan.hpp index 5700c21dc..e021ed09f 100644 --- a/packages/kokkos/core/unit_test/TestScan.hpp +++ b/packages/kokkos/core/unit_test/TestScan.hpp @@ -69,9 +69,9 @@ struct TestScan { const value_type answer = n & 1 ? ( n * ( ( n + 1 ) / 2 ) ) : ( ( n / 2 ) * ( n + 1 ) ); if ( answer != update ) { - errors()++; + int fail = errors()++; - if ( errors() < 20 ) { + if ( fail < 20 ) { printf( "TestScan(%d,%ld) != %ld\n", iwork, update, answer ); } } @@ -97,6 +97,7 @@ struct TestScan { long long int total = 0; Kokkos::parallel_scan( N, *this, total ); run_check( size_t( ( N+1 )*N/2 ), size_t( total ) ); + check_error(); } TestScan( const WorkSpec & Start , const WorkSpec & N ) @@ -108,6 +109,13 @@ struct TestScan { errors = errors_a; Kokkos::parallel_scan( exec_policy( Start , N ) , *this ); + check_error(); + } + + void check_error() { + int total_errors; + Kokkos::deep_copy(total_errors, errors); + ASSERT_EQ(total_errors,0); } static void test_range( const WorkSpec & begin, const WorkSpec & end ) diff --git a/packages/kokkos/core/unit_test/TestTeam.hpp b/packages/kokkos/core/unit_test/TestTeam.hpp index 2fe615a75..487a4d581 100644 --- a/packages/kokkos/core/unit_test/TestTeam.hpp +++ b/packages/kokkos/core/unit_test/TestTeam.hpp @@ -61,7 +61,7 @@ struct TestTeamPolicy { TestTeamPolicy( const size_t league_size ) : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), - Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this ), + Kokkos::TeamPolicy< ScheduleType, ExecSpace >(1,1).team_size_max( *this, Kokkos::ParallelReduceTag() ), league_size ) {} struct VerifyInitTag {}; @@ -113,11 +113,14 @@ struct TestTeamPolicy { static void test_for( const size_t league_size ) { TestTeamPolicy functor( league_size ); + typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace > policy_type; + typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace, VerifyInitTag > policy_type_init; - const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); + const int team_size = policy_type(league_size,1).team_size_max( functor, Kokkos::ParallelForTag() ); + const int team_size_init = policy_type_init(league_size,1).team_size_max( functor, Kokkos::ParallelForTag() ); - Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size, team_size ), functor ); - Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace, VerifyInitTag >( league_size, team_size ), functor ); + Kokkos::parallel_for( policy_type( league_size, team_size ), functor ); + Kokkos::parallel_for( policy_type_init( league_size, team_size_init ), functor ); test_small_league_size(); } @@ -142,15 +145,19 @@ struct TestTeamPolicy { { TestTeamPolicy functor( league_size ); - const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); + typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace > policy_type; + typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace, ReduceTag > policy_type_reduce; + + const int team_size = policy_type_reduce(league_size,1).team_size_max( functor, Kokkos::ParallelReduceTag() ); + const long N = team_size * league_size; long total = 0; - Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size, team_size ), functor, total ); + Kokkos::parallel_reduce( policy_type( league_size, team_size ), functor, total ); ASSERT_EQ( size_t( ( N - 1 ) * ( N ) ) / 2, size_t( total ) ); - Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace, ReduceTag >( league_size, team_size ), functor, total ); + Kokkos::parallel_reduce( policy_type_reduce( league_size, team_size ), functor, total ); ASSERT_EQ( ( size_t( N ) * size_t( N + 1 ) ) / 2, size_t( total ) ); } }; @@ -177,8 +184,10 @@ public: const size_type nwork; + KOKKOS_INLINE_FUNCTION ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} + KOKKOS_INLINE_FUNCTION ReduceTeamFunctor( const ReduceTeamFunctor & rhs ) : nwork( rhs.nwork ) {} KOKKOS_INLINE_FUNCTION @@ -244,10 +253,12 @@ public: const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) : ( nw / 2 ) * ( nw + 1 ); - const unsigned team_size = policy_type::team_size_recommended( functor_type( nwork ) ); + policy_type team_exec( nw, 1 ); + + const unsigned team_size = team_exec.team_size_recommended( functor_type( nwork ), Kokkos::ParallelReduceTag() ); const unsigned league_size = ( nwork + team_size - 1 ) / team_size; - policy_type team_exec( league_size, team_size ); + team_exec = policy_type( league_size, team_size ); for ( unsigned i = 0; i < Repeat; ++i ) { result_type tmp( & result[i] ); @@ -370,7 +381,8 @@ public: functor_type functor; - policy_type team_exec( nteam, policy_type::team_size_max( functor ) ); + policy_type team_exec( nteam, 1); + team_exec = policy_type(nteam, team_exec.team_size_max(functor, Kokkos::ParallelReduceTag())); for ( unsigned i = 0; i < Repeat; ++i ) { long int accum = 0; @@ -475,7 +487,8 @@ struct TestSharedTeam { typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor; typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; - const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); + const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >(8192, 1).team_size_max( Functor(), + Kokkos::ParallelReduceTag() ); Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size, team_size ); @@ -648,16 +661,20 @@ struct TestScratchTeam { typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace > p_type; - const size_t team_size = p_type::team_size_max( Functor() ); - - p_type team_exec( 8192 / team_size, team_size ); typename Functor::value_type error_count = 0; + int thread_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_THREAD_COUNT ); + + p_type team_exec = p_type(8192,1).set_scratch_size( 1, Kokkos::PerTeam( Functor::shared_int_array_type::shmem_size( Functor::SHARED_TEAM_COUNT ) ), + Kokkos::PerThread( thread_scratch_size + 3*sizeof(int))); + + const size_t team_size = team_exec.team_size_max( Functor(), Kokkos::ParallelReduceTag() ); + int team_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_TEAM_COUNT ) + Functor::shared_int_array_type::shmem_size( 3 * team_size ); - int thread_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_THREAD_COUNT ); + team_exec = p_type(8192 / team_size, team_size ); Kokkos::parallel_reduce( team_exec.set_scratch_size( 1, Kokkos::PerTeam( team_scratch_size ), Kokkos::PerThread( thread_scratch_size ) ), @@ -956,7 +973,7 @@ struct TestShmemSize { size_t size = view_type::shmem_size( d1, d2, d3 ); - ASSERT_EQ( size, d1 * d2 * d3 * sizeof( long ) ); + ASSERT_EQ( size, (d1 * d2 * d3 + 1)* sizeof( long ) ); test_layout_stride(); } @@ -973,3 +990,123 @@ struct TestShmemSize { }; } // namespace Test + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +namespace { + +template< class ExecSpace, class ScheduleType > +struct TestTeamBroadcast { + typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member; + + TestTeamBroadcast( const size_t league_size ) {} + + struct BroadcastTag {}; + + typedef long value_type; + + KOKKOS_INLINE_FUNCTION + void operator()( const team_member &teamMember, value_type &update ) const + { + int lid = teamMember.league_rank(); + int tid = teamMember.team_rank(); + int ts = teamMember.team_size(); + + value_type parUpdate = 0; + value_type value = tid * 3 + 1; + + teamMember.team_broadcast(value, lid%ts); + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( teamMember, ts ), [&] ( const int j, value_type &teamUpdate ) { + teamUpdate += value; + }, parUpdate ); + + if ( teamMember.team_rank() == 0 ) update += parUpdate; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const BroadcastTag &, const team_member &teamMember, value_type &update ) const + { + int lid = teamMember.league_rank(); + int tid = teamMember.team_rank(); + int ts = teamMember.team_size(); + + value_type parUpdate = 0; + value_type value = tid * 3 + 1; + + teamMember.team_broadcast([&] (value_type & var) { var*=2; }, value, lid%ts); + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( teamMember, ts ), [&] ( const int j, value_type &teamUpdate ) { + teamUpdate += value; + }, parUpdate ); + + if ( teamMember.team_rank() == 0 ) update += parUpdate; + } + + static void test_teambroadcast( const size_t league_size ) + { + TestTeamBroadcast functor( league_size ); + + typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace > policy_type; + typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace, BroadcastTag > policy_type_f; + + const int team_size = policy_type_f(league_size,1).team_size_max( functor, Kokkos::ParallelReduceTag() ); //printf("team_size=%d\n",team_size); + + //team_broadcast with value + long total = 0; + + Kokkos::parallel_reduce( policy_type( league_size, team_size ), functor, total ); + + value_type expected_result = 0; + for (unsigned int i=0; i<league_size; i++){ + value_type val = ((i%team_size)*3+1)*team_size; + expected_result+= val; + } + ASSERT_EQ( size_t( expected_result ), size_t( total ) ); //printf("team_broadcast with value -- expected_result=%d, total=%d\n",expected_result, total); + + //team_broadcast with funtion object + total = 0; + + Kokkos::parallel_reduce( policy_type_f( league_size, team_size ), functor, total ); + + expected_result = 0; + for (unsigned int i=0; i<league_size; i++){ + value_type val = ((i%team_size)*3+1)*2*team_size; + expected_result+= val; + } + ASSERT_EQ( size_t( expected_result ), size_t( total ) ); //printf("team_broadcast with funtion object -- expected_result=%d, total=%d\n",expected_result, total); + } +}; + +template<class ExecSpace> +struct TestScratchAlignment { + struct TestScalar { + double x,y,z; + }; + TestScratchAlignment() { + test(true); + test(false); + } + typedef Kokkos::View<TestScalar*,typename ExecSpace::scratch_memory_space> ScratchView; + typedef Kokkos::View<int*,typename ExecSpace::scratch_memory_space> ScratchViewInt; + void test(bool allocate_small) { + int shmem_size = ScratchView::shmem_size(11); + if(allocate_small) shmem_size += ScratchViewInt::shmem_size(1); + Kokkos::parallel_for(Kokkos::TeamPolicy<ExecSpace>(1,1).set_scratch_size(0,Kokkos::PerTeam(shmem_size)), + KOKKOS_LAMBDA (const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) { + if(allocate_small) ScratchViewInt p(team.team_scratch(0),1); + ScratchView a(team.team_scratch(0),11); + if(ptrdiff_t(a.data())%sizeof(TestScalar)!=0) + Kokkos::abort("Error: invalid scratch view alignment\n"); + }); + Kokkos::fence(); + } +}; + +} // namespace + +} // namespace Test + +/*--------------------------------------------------------------------------*/ diff --git a/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp new file mode 100644 index 000000000..f9d5add5d --- /dev/null +++ b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp @@ -0,0 +1,146 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cstdio> +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +namespace Test { + +namespace { + template<class T,int N> + class MyArray { + public: + T values[N]; + KOKKOS_INLINE_FUNCTION + void operator+= (const MyArray& src) { for(int i=0; i<N; i++) values[i] += src.values[i]; } + KOKKOS_INLINE_FUNCTION + void operator= (const MyArray& src) { for(int i=0; i<N; i++) values[i] = src.values[i]; } + KOKKOS_INLINE_FUNCTION + void operator+= (const volatile MyArray& src) volatile { for(int i=0; i<N; i++) values[i] += src.values[i]; } + KOKKOS_INLINE_FUNCTION + void operator= (const volatile MyArray& src) volatile { for(int i=0; i<N; i++) values[i] = src.values[i]; } + }; + + template<class T,int N, class PolicyType, int S> + struct FunctorFor { + double static_array[S]; + KOKKOS_INLINE_FUNCTION + void operator() (const typename PolicyType::member_type& team) const { + } + }; + template<class T,int N, class PolicyType, int S> + struct FunctorReduce { + double static_array[S]; + KOKKOS_INLINE_FUNCTION + void operator() (const typename PolicyType::member_type& team, MyArray<T,N>& lval) const { + for(int j=0; j<N; j++) + lval.values[j] += 1 + lval.values[0]; + } + }; +} + + +typedef Kokkos::TeamPolicy<TEST_EXECSPACE> policy_type; +typedef Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<128,8> > policy_type_128_8; +typedef Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<1024,2> > policy_type_1024_2; + +template<class T, int N, class PolicyType, int S> +void test_team_policy_max_recommended_static_size(int scratch_size) { + PolicyType p = PolicyType(10000, Kokkos::AUTO, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)); + int team_size_max_for = p.team_size_max(FunctorFor<T,N,PolicyType,S>(),Kokkos::ParallelForTag()); + int team_size_rec_for = p.team_size_recommended(FunctorFor<T,N,PolicyType,S>(),Kokkos::ParallelForTag()); + int team_size_max_reduce = p.team_size_max(FunctorReduce<T,N,PolicyType,S>(),Kokkos::ParallelReduceTag()); + int team_size_rec_reduce = p.team_size_recommended(FunctorReduce<T,N,PolicyType,S>(),Kokkos::ParallelReduceTag()); + + ASSERT_TRUE( team_size_max_for >= team_size_rec_for ); + ASSERT_TRUE( team_size_max_reduce >= team_size_rec_reduce ); + ASSERT_TRUE( team_size_max_for >= team_size_max_reduce ); + + Kokkos::parallel_for(PolicyType(10000, team_size_max_for, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)), + FunctorFor<T,N,PolicyType,S>()); + Kokkos::parallel_for(PolicyType(10000, team_size_rec_for, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)), + FunctorFor<T,N,PolicyType,S>()); + MyArray<T,N> val; + Kokkos::parallel_reduce(PolicyType(10000, team_size_max_reduce, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)), + FunctorReduce<T,N,PolicyType,S>(),val); + Kokkos::parallel_reduce(PolicyType(10000, team_size_rec_reduce, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)), + FunctorReduce<T,N,PolicyType,S>(),val); + Kokkos::fence(); +} + +template<class T, int N, class PolicyType> +void test_team_policy_max_recommended(int scratch_size) { + test_team_policy_max_recommended_static_size<T,N,PolicyType,1>(scratch_size); + test_team_policy_max_recommended_static_size<T,N,PolicyType,1000>(scratch_size); +} + +TEST_F( TEST_CATEGORY, team_policy_max_recommended ) +{ + int max_scratch_size = policy_type::scratch_size_max(0); + test_team_policy_max_recommended<double,2,policy_type>(0); + test_team_policy_max_recommended<double,2,policy_type>(max_scratch_size/3); + test_team_policy_max_recommended<double,2,policy_type>(max_scratch_size); + test_team_policy_max_recommended<double,2,policy_type_128_8>(0); + test_team_policy_max_recommended<double,2,policy_type_128_8>(max_scratch_size/3/8); + test_team_policy_max_recommended<double,2,policy_type_128_8>(max_scratch_size/8); + test_team_policy_max_recommended<double,2,policy_type_1024_2>(0); + test_team_policy_max_recommended<double,2,policy_type_1024_2>(max_scratch_size/3/2); + test_team_policy_max_recommended<double,2,policy_type_1024_2>(max_scratch_size/2); + + test_team_policy_max_recommended<double,16,policy_type>(0); + test_team_policy_max_recommended<double,16,policy_type>(max_scratch_size/3); + test_team_policy_max_recommended<double,16,policy_type>(max_scratch_size); + test_team_policy_max_recommended<double,16,policy_type_128_8>(0); + test_team_policy_max_recommended<double,16,policy_type_128_8>(max_scratch_size/3/8); + test_team_policy_max_recommended<double,16,policy_type_128_8>(max_scratch_size/8); + test_team_policy_max_recommended<double,16,policy_type_1024_2>(0); + test_team_policy_max_recommended<double,16,policy_type_1024_2>(max_scratch_size/3/2); + test_team_policy_max_recommended<double,16,policy_type_1024_2>(max_scratch_size/2); +} + + +} // namespace Test diff --git a/packages/kokkos/core/unit_test/TestTeamVector.hpp b/packages/kokkos/core/unit_test/TestTeamVector.hpp index 783fde600..294247a78 100644 --- a/packages/kokkos/core/unit_test/TestTeamVector.hpp +++ b/packages/kokkos/core/unit_test/TestTeamVector.hpp @@ -227,14 +227,13 @@ struct functor_team_for { functor_team_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { - typedef typename ExecutionSpace::scratch_memory_space shmem_space; - typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; - typedef typename shared_int::size_type size_type; - + typedef typename shmem_space::size_type size_type; const size_type shmemSize = team.team_size() * 13; shared_int values = shared_int( team.team_shmem(), shmemSize ); @@ -290,7 +289,9 @@ struct functor_team_reduce { functor_team_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { @@ -333,7 +334,9 @@ struct functor_team_reduce_reducer { functor_team_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { @@ -376,12 +379,12 @@ struct functor_team_vector_for { functor_team_vector_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { - typedef typename ExecutionSpace::scratch_memory_space shmem_space; - typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; typedef typename shared_int::size_type size_type; const size_type shmemSize = team.team_size() * 13; @@ -442,7 +445,9 @@ struct functor_team_vector_reduce { Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; functor_team_vector_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { @@ -485,7 +490,9 @@ struct functor_team_vector_reduce_reducer { functor_team_vector_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { @@ -568,12 +575,12 @@ struct functor_vec_for { functor_vec_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { - typedef typename ExecutionSpace::scratch_memory_space shmem_space; - typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; shared_int values = shared_int( team.team_shmem(), team.team_size() * 13 ); @@ -739,23 +746,16 @@ bool test_scalar( int nteams, int team_size, int test ) { functor_vec_red< Scalar, ExecutionSpace >( d_flag ) ); } else if ( test == 1 ) { - // WORKAROUND ROCM/CUDA + // WORKAROUND CUDA #if defined(KOKKOS_ENABLE_CUDA) #if defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) || defined(KOKKOS_ARCH_PASCAL) if(!std::is_same<ExecutionSpace,Kokkos::Cuda>::value) #endif #endif - #if defined(KOKKOS_ENABLE_ROCM) - if(!std::is_same<ExecutionSpace,Kokkos::Experimental::ROCm>::value) - #endif Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), functor_vec_red_reducer< Scalar, ExecutionSpace >( d_flag ) ); } else if ( test == 2 ) { - // WORKAROUND ROCM - #if defined(KOKKOS_ENABLE_ROCM) - if(!std::is_same<ExecutionSpace,Kokkos::Experimental::ROCm>::value) - #endif Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), functor_vec_scan< Scalar, ExecutionSpace >( d_flag ) ); } @@ -776,10 +776,6 @@ bool test_scalar( int nteams, int team_size, int test ) { functor_team_reduce< Scalar, ExecutionSpace >( d_flag ) ); } else if ( test == 7 ) { - // WORKAROUND ROCM - #if defined(KOKKOS_ENABLE_ROCM) - if(!std::is_same<ExecutionSpace,Kokkos::Experimental::ROCm>::value) - #endif Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ), functor_team_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) ); } @@ -792,10 +788,6 @@ bool test_scalar( int nteams, int team_size, int test ) { functor_team_vector_reduce< Scalar, ExecutionSpace >( d_flag ) ); } else if ( test == 10 ) { - // WORKAROUND ROCM - #if defined(KOKKOS_ENABLE_ROCM) - if(!std::is_same<ExecutionSpace,Kokkos::Experimental::ROCm>::value) - #endif Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), functor_team_vector_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) ); } @@ -955,28 +947,22 @@ TEST_F( TEST_CATEGORY, triple_nested_parallelism ) // With KOKKOS_DEBUG enabled, the functor uses too many registers to run // with a team size of 32 on GPUs, 16 is the max possible (at least on a K80 GPU) // See https://github.com/kokkos/kokkos/issues/1513 -#if defined(KOKKOS_DEBUG) && defined(KOKKOS_ENABLE_CUDA) +#if defined(KOKKOS_ENABLE_DEBUG) && defined(KOKKOS_ENABLE_CUDA) if (!std::is_same<TEST_EXECSPACE, Kokkos::Cuda>::value) { #endif #ifdef KOKKOS_ENABLE_ROCM // ROCm doesn't support TeamSize 32x32 - if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::ROCm>::value) { + if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::ROCm>::value) #endif TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 32, 32 ); -#ifdef KOKKOS_ENABLE_ROCM - } -#endif TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 32, 16 ); -#if defined(KOKKOS_DEBUG) && defined(KOKKOS_ENABLE_CUDA) +#if defined(KOKKOS_ENABLE_DEBUG) && defined(KOKKOS_ENABLE_CUDA) } #endif TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 16 ); #ifdef KOKKOS_ENABLE_ROCM // ROCm doesn't support team sizes not powers of two - if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::ROCm>::value) { + if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::ROCm>::value) #endif TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 7, 16 ); -#ifdef KOKKOS_ENABLE_ROCM - } -#endif } #endif diff --git a/packages/kokkos/core/unit_test/TestViewAPI_a.hpp b/packages/kokkos/core/unit_test/TestViewAPI_a.hpp index efc9ab27b..ba74331c5 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_a.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_a.hpp @@ -45,11 +45,9 @@ namespace Test { -#if !defined(KOKKOS_ENABLE_ROCM) TEST_F( TEST_CATEGORY, view_api_a ) { TestViewAPI< double, TEST_EXECSPACE >::run_test(); } -#endif } diff --git a/packages/kokkos/core/unit_test/TestViewAPI_b.hpp b/packages/kokkos/core/unit_test/TestViewAPI_b.hpp index e006dd9b6..03b41db51 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_b.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_b.hpp @@ -45,13 +45,11 @@ namespace Test { -#if !defined(KOKKOS_ENABLE_ROCM) TEST_F( TEST_CATEGORY, view_api_b ) { TestViewAPI< double, TEST_EXECSPACE >::run_test_view_operator_a(); TestViewAPI< double, TEST_EXECSPACE >::run_test_mirror(); TestViewAPI< double, TEST_EXECSPACE >::run_test_scalar(); } -#endif } diff --git a/packages/kokkos/core/unit_test/TestViewAPI_c.hpp b/packages/kokkos/core/unit_test/TestViewAPI_c.hpp index a0f03ff18..d34ae6340 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_c.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_c.hpp @@ -45,12 +45,10 @@ namespace Test { -#if !defined(KOKKOS_ENABLE_ROCM) TEST_F( TEST_CATEGORY, view_api_c ) { TestViewAPI< double, TEST_EXECSPACE >::run_test_deep_copy_empty(); TestViewAPI< double, TEST_EXECSPACE >::run_test_view_operator_b(); } -#endif } diff --git a/packages/kokkos/core/unit_test/TestViewAPI_d.hpp b/packages/kokkos/core/unit_test/TestViewAPI_d.hpp index b984df98b..38e10381f 100644 --- a/packages/kokkos/core/unit_test/TestViewAPI_d.hpp +++ b/packages/kokkos/core/unit_test/TestViewAPI_d.hpp @@ -45,7 +45,6 @@ namespace Test { -#if !defined(KOKKOS_ENABLE_ROCM) TEST_F( TEST_CATEGORY, view_api_d ) { TestViewAPI< double, TEST_EXECSPACE >::run_test_const(); @@ -54,6 +53,5 @@ TEST_F( TEST_CATEGORY, view_api_d ) TestViewAPI< double, TEST_EXECSPACE >::run_test_vector(); TestViewAPI< double, TEST_EXECSPACE >::run_test_view_operator_c(); } -#endif } diff --git a/packages/kokkos/core/unit_test/TestViewCopy.hpp b/packages/kokkos/core/unit_test/TestViewCopy.hpp new file mode 100644 index 000000000..7eab9daa1 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewCopy.hpp @@ -0,0 +1,155 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cstdio> + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +namespace Test { + +namespace { + +template < typename ExecSpace > +struct TestViewCopy { + + using InExecSpace = ExecSpace; + + static void test_view_copy() + { +#if defined( KOKKOS_ENABLE_CUDA ) || defined( KOKKOS_ENABLE_ROCM ) + // ExecSpace = CudaUVM, CudaHostPinned + // This test will fail at runtime with an illegal memory access if something goes wrong + // Test 1: deep_copy from host_mirror_space to ExecSpace and ExecSpace back to host_mirror_space + { + const int dim0 = 4; + const int dim1 = 2; + const int dim2 = 3; + + typedef Kokkos::View<double****,InExecSpace> Rank4ViewType; + Rank4ViewType view_4; + view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2); + + typedef typename Kokkos::Impl::is_space<InExecSpace>::host_mirror_space::execution_space host_space_type; + Kokkos::View<double**,Kokkos::LayoutLeft,host_space_type> srcView("srcView", dim2, dim2); + + // Strided dst view + auto dstView = Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL()); + + // host_mirror_space to ExecSpace + Kokkos::deep_copy( dstView, srcView ); + Kokkos::fence(); + + // ExecSpace to host_mirror_space + Kokkos::deep_copy( srcView, dstView ); + Kokkos::fence(); + } + + // Test 2: deep_copy from Cuda to ExecSpace and ExecSpace back to Cuda + { + const int dim0 = 4; + const int dim1 = 2; + const int dim2 = 3; + + typedef Kokkos::View<double****,InExecSpace> Rank4ViewType; + Rank4ViewType view_4; + view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2); + +#if defined( KOKKOS_ENABLE_CUDA ) + typedef Kokkos::Cuda space_type; +#endif +#if defined( KOKKOS_ENABLE_ROCM ) + typedef Kokkos::Experimental::ROCm space_type; +#endif + Kokkos::View<double**,Kokkos::LayoutLeft,space_type> srcView("srcView", dim2, dim2); + + // Strided dst view + auto dstView = Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL()); + + // Cuda to ExecSpace + Kokkos::deep_copy( dstView, srcView ); + Kokkos::fence(); + + // ExecSpace to Cuda + Kokkos::deep_copy( srcView, dstView ); + Kokkos::fence(); + } + + // Test 3: deep_copy from host_space to ExecSpace and ExecSpace back to host_space + { + const int dim0 = 4; + const int dim1 = 2; + const int dim2 = 3; + + typedef Kokkos::View<double****,InExecSpace> Rank4ViewType; + Rank4ViewType view_4; + view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2); + + typedef Kokkos::HostSpace host_space_type; + Kokkos::View<double**,Kokkos::LayoutLeft,host_space_type> srcView("srcView", dim2, dim2); + + // Strided dst view + auto dstView = Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL()); + + // host_space to ExecSpace + Kokkos::deep_copy( dstView, srcView ); + Kokkos::fence(); + + // ExecSpace to host_space + Kokkos::deep_copy( srcView, dstView ); + Kokkos::fence(); + } +#endif + } // end test_view_copy + +}; // end struct + +} // namespace + +TEST_F( TEST_CATEGORY , view_copy_tests ) { + //Only include this file to be compiled with CudaUVM and CudaHostPinned + TestViewCopy< TEST_EXECSPACE >::test_view_copy(); +} + +} // namespace Test diff --git a/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp new file mode 100644 index 000000000..3185fa547 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp @@ -0,0 +1,740 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> +#include <time.h> + +#include <Kokkos_Core.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY , view_layoutstride_left_to_layoutleft_assignment ) +{ + typedef TEST_EXECSPACE exec_space ; + + auto t = time(0); + srand(t);// Use current time as seed for random generator + printf("view_layoutstride_left_to_layoutleft_assignment: srand(%lu)\n",size_t(t)); + + { // Assignment of rank-1 LayoutLeft = LayoutStride + int ndims = 1; + int dims [] = {10}; + int order [] = {0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double*, Kokkos::LayoutLeft, exec_space > dst = src; + + Kokkos::View< double*, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-2 LayoutLeft = LayoutStride + int ndims = 2; + int dims [] = {10,9}; + int order [] = {0,1}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double**, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double**, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double**, Kokkos::LayoutLeft, exec_space > dst = src; + + Kokkos::View< double**, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-3 LayoutLeft = LayoutStride + int ndims = 3; + int dims [] = {10,9,8}; + int order [] = {0,1,2}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double***, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double***, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double***, Kokkos::LayoutLeft, exec_space > dst = src; + + Kokkos::View< double***, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-4 LayoutLeft = LayoutStride + int ndims = 4; + int dims [] = {10,9,8,7}; + int order [] = {0,1,2,3}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double****, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double****, Kokkos::LayoutLeft, exec_space > dst = src; + + Kokkos::View< double****, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-5 LayoutLeft = LayoutStride + int ndims = 5; + int dims [] = {10,9,8,7,6}; + int order [] = {0,1,2,3,4}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*****, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double*****, Kokkos::LayoutLeft, exec_space > dst = src; + + Kokkos::View< double*****, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-6 LayoutLeft = LayoutStride + int ndims = 6; + int dims [] = {10,9,8,7,6,5}; + int order [] = {0,1,2,3,4,5}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double******, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double******, Kokkos::LayoutLeft, exec_space > dst = src; + + Kokkos::View< double******, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-7 LayoutLeft = LayoutStride + int ndims = 7; + int dims [] = {10,9,8,7,6,5,4}; + int order [] = {0,1,2,3,4,5,6}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*******, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double*******, Kokkos::LayoutLeft, exec_space > dst = src; + + Kokkos::View< double*******, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-8 LayoutLeft = LayoutStride + int ndims = 8; + int dims [] = {10,9,8,7,6,5,4,3}; + int order [] = {0,1,2,3,4,5,6,7}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double********, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double********, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double********, Kokkos::LayoutLeft, exec_space > dst = src; + + Kokkos::View< double********, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + +} + +TEST_F( TEST_CATEGORY , view_layoutstride_right_to_layoutright_assignment ) +{ + typedef TEST_EXECSPACE exec_space ; + + auto t = time(0); + srand(t);// Use current time as seed for random generator + printf("view_layoutstride_right_to_layoutright_assignment: srand(%lu)\n",size_t(t)); + + { // Assignment of rank-1 LayoutRight = LayoutStride + int ndims = 1; + int dims [] = {10}; + int order [] = {0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double*, Kokkos::LayoutRight, exec_space > dst = src; + + Kokkos::View< double*, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-2 LayoutRight = LayoutStride + int ndims = 2; + int dims [] = {10,9}; + int order [] = {1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double**, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double**, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double**, Kokkos::LayoutRight, exec_space > dst = src; + + Kokkos::View< double**, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-3 LayoutRight = LayoutStride + int ndims = 3; + int dims [] = {10,9,8}; + int order [] = {2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double***, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double***, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double***, Kokkos::LayoutRight, exec_space > dst = src; + + Kokkos::View< double***, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-4 LayoutRight = LayoutStride + int ndims = 4; + int dims [] = {10,9,8,7}; + int order [] = {3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double****, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double****, Kokkos::LayoutRight, exec_space > dst = src; + + Kokkos::View< double****, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-5 LayoutRight = LayoutStride + int ndims = 5; + int dims [] = {10,9,8,7,6}; + int order [] = {4,3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*****, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double*****, Kokkos::LayoutRight, exec_space > dst = src; + + Kokkos::View< double*****, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-6 LayoutRight = LayoutStride + int ndims = 6; + int dims [] = {10,9,8,7,6,5}; + int order [] = {5,4,3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double******, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double******, Kokkos::LayoutRight, exec_space > dst = src; + + Kokkos::View< double******, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-7 LayoutRight = LayoutStride + int ndims = 7; + int dims [] = {10,9,8,7,6,5,4}; + int order [] = {6,5,4,3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*******, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double*******, Kokkos::LayoutRight, exec_space > dst = src; + + Kokkos::View< double*******, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-8 LayoutRight = LayoutStride + int ndims = 8; + int dims [] = {10,9,8,7,6,5,4,3}; + int order [] = {7,6,5,4,3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double********, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double********, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double********, Kokkos::LayoutRight, exec_space > dst = src; + + Kokkos::View< double********, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + +} + +TEST_F( TEST_CATEGORY , view_layoutstride_right_to_layoutleft_assignment ) +{ + typedef TEST_EXECSPACE exec_space ; + + auto t = time(0); + srand(t);// Use current time as seed for random generator + printf("view_layoutstride_right_to_layoutleft_assignment: srand(%lu)\n",size_t(t)); + + { // Assignment of rank-1 LayoutLeft = LayoutStride (LayoutRight compatible) + int ndims = 1; + int dims [] = {10}; + int order [] = {0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double*, Kokkos::LayoutLeft, exec_space > dst; + + dst = src; + + Kokkos::View< double*, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-2 LayoutLeft = LayoutStride (LayoutRight compatible) + int ndims = 2; + int dims [] = {10,9}; + int order [] = {1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double**, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double**, Kokkos::LayoutLeft, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-3 LayoutLeft = LayoutStride (LayoutRight compatible) + int ndims = 3; + int dims [] = {10,9,8}; + int order [] = {2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double***, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double***, Kokkos::LayoutLeft, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-4 LayoutLeft = LayoutStride (LayoutRight compatible) + int ndims = 4; + int dims [] = {10,9,8,7}; + int order [] = {3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double****, Kokkos::LayoutLeft, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-5 LayoutLeft = LayoutStride (LayoutRight compatible) + int ndims = 5; + int dims [] = {10,9,8,7,6}; + int order [] = {4,3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*****, Kokkos::LayoutLeft, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-6 LayoutLeft = LayoutStride (LayoutRight compatible) + int ndims = 6; + int dims [] = {10,9,8,7,6,5}; + int order [] = {5,4,3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double******, Kokkos::LayoutLeft, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-7 LayoutLeft = LayoutStride (LayoutRight compatible) + int ndims = 7; + int dims [] = {10,9,8,7,6,5,4}; + int order [] = {6,5,4,3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*******, Kokkos::LayoutLeft, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-8 LayoutLeft = LayoutStride (LayoutRight compatible) + int ndims = 8; + int dims [] = {10,9,8,7,6,5,4,3}; + int order [] = {7,6,5,4,3,2,1,0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double********, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double********, Kokkos::LayoutLeft, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + +} + +TEST_F( TEST_CATEGORY , view_layoutstride_left_to_layoutright_assignment ) +{ + typedef TEST_EXECSPACE exec_space ; + + auto t = time(0); + srand(t);// Use current time as seed for random generator + printf("view_layoutstride_left_to_layoutright_assignment: srand(%lu)\n",size_t(t)); + + { // Assignment of rank-1 LayoutRight = LayoutStride (LayoutLeft compatible) + int ndims = 1; + int dims [] = {10}; + int order [] = {0}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src ); + + for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100); + + Kokkos::deep_copy( src, h_src ); + + Kokkos::View< double*, Kokkos::LayoutRight, exec_space > dst; + + dst = src; + + Kokkos::View< double*, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst ); + + Kokkos::deep_copy( h_dst, dst ); + + bool test = true; + for(size_t i=0; i<src.span();i++){ + if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;} + } + ASSERT_EQ( dst.span(), src.span() ); + ASSERT_EQ( test, true ); + } + { // Assignment of rank-2 LayoutRight = LayoutStride (LayoutLeft compatible) + int ndims = 2; + int dims [] = {10,9}; + int order [] = {0,1}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double**, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double**, Kokkos::LayoutRight, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-3 LayoutRight = LayoutStride (LayoutLeft compatible) + int ndims = 3; + int dims [] = {10,9,8}; + int order [] = {0,1,2}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double***, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double***, Kokkos::LayoutRight, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-4 LayoutRight = LayoutStride (LayoutLeft compatible) + int ndims = 4; + int dims [] = {10,9,8,7}; + int order [] = {0,1,2,3}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double****, Kokkos::LayoutRight, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-5 LayoutRight = LayoutStride (LayoutLeft compatible) + int ndims = 5; + int dims [] = {10,9,8,7,6}; + int order [] = {0,1,2,3,4}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*****, Kokkos::LayoutRight, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-6 LayoutRight = LayoutStride (LayoutLeft compatible) + int ndims = 6; + int dims [] = {10,9,8,7,6,5}; + int order [] = {0,1,2,3,4,5}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double******, Kokkos::LayoutRight, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-7 LayoutRight = LayoutStride (LayoutLeft compatible) + int ndims = 7; + int dims [] = {10,9,8,7,6,5,4}; + int order [] = {0,1,2,3,4,5,6}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double*******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double*******, Kokkos::LayoutRight, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + { // Assignment of rank-8 LayoutRight = LayoutStride (LayoutLeft compatible) + int ndims = 8; + int dims [] = {10,9,8,7,6,5,4,3}; + int order [] = {0,1,2,3,4,5,6,7}; + Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims); + Kokkos::View< double********, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout); + + Kokkos::View< double********, Kokkos::LayoutRight, exec_space > dst; + + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts"); + } + +} + +} + diff --git a/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp b/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp new file mode 100644 index 000000000..e765e8b06 --- /dev/null +++ b/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp @@ -0,0 +1,1215 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cstdio> + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_ViewLayoutTiled.hpp> + +#include <type_traits> +#include <typeinfo> + +namespace Test { + +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE +namespace { + +template <typename ExecSpace > +struct TestViewLayoutTiled { + + typedef double Scalar; + + static constexpr int T0 = 2; + static constexpr int T1 = 4; + static constexpr int T2 = 4; + static constexpr int T3 = 2; + static constexpr int T4 = 2; + static constexpr int T5 = 2; + static constexpr int T6 = 2; + static constexpr int T7 = 2; + + // Rank 2 + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1> LayoutLL_2D_2x4; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1> LayoutRL_2D_2x4; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1> LayoutLR_2D_2x4; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1> LayoutRR_2D_2x4; + + // Rank 3 + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1, T2> LayoutLL_3D_2x4x4; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1, T2> LayoutRL_3D_2x4x4; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1, T2> LayoutLR_3D_2x4x4; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2> LayoutRR_3D_2x4x4; + + // Rank 4 + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1, T2, T3> LayoutLL_4D_2x4x4x2; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1, T2, T3> LayoutRL_4D_2x4x4x2; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1, T2, T3> LayoutLR_4D_2x4x4x2; + typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2, T3> LayoutRR_4D_2x4x4x2; + + + static void test_view_layout_tiled_2d( const int N0, const int N1 ) + { +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + const int FT = T0*T1; + + const int NT0 = int( std::ceil( N0 / T0 ) ); + const int NT1 = int( std::ceil( N1 / T1 ) ); + // Test create_mirror_view, deep_copy + // Create LL View + { + typedef typename Kokkos::View< Scalar**, LayoutLL_2D_2x4, ExecSpace > ViewType; + ViewType v("v", N0, N1); + + typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); + + // Initialize host-view + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + hv(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i + j*T0 ); + } } + } } + + // copy to device + Kokkos::deep_copy(v, hv); + + Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 2 LL", mdrangepolicy, + KOKKOS_LAMBDA (const int ti, const int tj) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; } + } } + }); + + Kokkos::deep_copy(hv, v); + + long counter_subview = 0; + long counter_inc = 0; + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( hv, ti, tj ); + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; } + if ( tile_subview(i,j) != (( ti + tj*NT0 )*FT + ( i + j*T0 ) + 1 )) { ++counter_inc; } + } } + } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } + + // Create RL View + { + typedef typename Kokkos::View< Scalar**, LayoutRL_2D_2x4, ExecSpace > ViewType; + Kokkos::View< Scalar**, LayoutRL_2D_2x4, ExecSpace > v("v", N0, N1); + + typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); + + // Initialize host-view + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + hv(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i + j*T0 ); + } } + } } + + // copy to device + Kokkos::deep_copy(v, hv); + + Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 2 RL", mdrangepolicy, + KOKKOS_LAMBDA (const int ti, const int tj) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; } + } } + }); + + Kokkos::deep_copy(hv, v); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + auto tile_subview = Kokkos::tile_subview( hv, ti, tj ); + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; } + if ( tile_subview(i,j) != (( ti*NT1 + tj )*FT + ( i + j*T0 ) + 1 )) { ++counter_inc; } + } } + } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope + + // Create LR View + { + typedef typename Kokkos::View< Scalar**, LayoutLR_2D_2x4, ExecSpace > ViewType; + Kokkos::View< Scalar**, LayoutLR_2D_2x4, ExecSpace > v("v", N0, N1); + + typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); + + // Initialize host-view + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + hv(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i*T1 + j ); + } } + } } + + // copy to device + Kokkos::deep_copy(v, hv); + + Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy, + KOKKOS_LAMBDA (const int ti, const int tj) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; } + } } + }); + + Kokkos::deep_copy(hv, v); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( hv, ti, tj ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; } + if ( tile_subview(i,j) != ( ( ti + tj*NT0 )*FT + ( i*T1 + j ) + 1 ) ) { ++counter_inc; } + } } + } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope + + // Create RR View + { + typedef typename Kokkos::View< Scalar**, LayoutRR_2D_2x4, ExecSpace > ViewType; + Kokkos::View< Scalar**, LayoutRR_2D_2x4, ExecSpace > v("v", N0, N1); + + typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); + + // Initialize host-view + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + hv(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i*T1 + j ); + } } + } } + + // copy to device + Kokkos::deep_copy(v, hv); + + Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy, + KOKKOS_LAMBDA (const int ti, const int tj) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; } + } } + }); + + Kokkos::deep_copy(hv, v); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + auto tile_subview = Kokkos::tile_subview( hv, ti, tj ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; } + if ( tile_subview(i,j) != ( ( ti*NT1 + tj )*FT + ( i*T1 + j ) + 1 ) ) { ++counter_inc; } + } } + } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope +#endif +#endif + } // end test_view_layout_tiled_2d + + + static void test_view_layout_tiled_3d( const int N0, const int N1, const int N2 ) + { +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + + const int FT = T0*T1*T2; + + const int NT0 = int( std::ceil( N0 / T0 ) ); + const int NT1 = int( std::ceil( N1 / T1 ) ); + const int NT2 = int( std::ceil( N2 / T2 ) ); + + // Create LL View + { + typedef Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, ExecSpace > ViewType; + Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2); + + typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); + + // Initialize on host + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ); + } } } + } } } + + // copy to device + Kokkos::deep_copy(dv, v); + + Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 3 LL", mdrangepolicy, + KOKKOS_LAMBDA (const int i, const int j, const int k) { + dv(i,j,k) += 1; + }); + + Kokkos::deep_copy(v, dv); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk ); + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; } + if ( tile_subview(i,j,k) != ( ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ) + 1 ) ) { ++counter_inc; } + } } } + } } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope + + // Create RL View + { + typedef Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, ExecSpace > ViewType; + Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2); + + typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); + + // Initialize on host + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ); + } } } + } } } + + // copy to device + Kokkos::deep_copy(dv, v); + + Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 3 RL", mdrangepolicy, + KOKKOS_LAMBDA (const int i, const int j, const int k) { + dv(i,j,k) += 1; + }); + + Kokkos::deep_copy(v, dv); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk ); + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; } + if ( tile_subview(i,j,k) != ( ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ) + 1 ) ) { ++counter_inc; } + } } } + } } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope + + // Create LR View + { + typedef Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, ExecSpace > ViewType; + Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2); + + typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); + + // Initialize on host + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ); + } } } + } } } + + // copy to device + Kokkos::deep_copy(dv, v); + + Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 3 LR", mdrangepolicy, + KOKKOS_LAMBDA (const int i, const int j, const int k) { + dv(i,j,k) += 1; + }); + + Kokkos::deep_copy(v, dv); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; } + if ( tile_subview(i,j,k) != ( ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ) + 1 ) ) { ++counter_inc; } + } } } + } } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope + + // Create RR View + { + typedef Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, ExecSpace > ViewType; + Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2); + + typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); + + // Initialize on host + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ); + } } } + } } } + + // copy to device + Kokkos::deep_copy(dv, v); + + Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 3 RR", mdrangepolicy, + KOKKOS_LAMBDA (const int i, const int j, const int k) { + dv(i,j,k) += 1; + }); + + Kokkos::deep_copy(v, dv); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; } + if ( tile_subview(i,j,k) != ( ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ) + 1 ) ) { ++counter_inc; } + } } } + } } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope +#endif +#endif + } // end test_view_layout_tiled_3d + + + static void test_view_layout_tiled_4d( const int N0, const int N1, const int N2, const int N3 ) + { +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + const int FT = T0*T1*T2*T3; + + const int NT0 = int( std::ceil( N0 / T0 ) ); + const int NT1 = int( std::ceil( N1 / T1 ) ); + const int NT2 = int( std::ceil( N2 / T2 ) ); + const int NT3 = int( std::ceil( N3 / T3 ) ); + + // Create LL View + { + typedef Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace > ViewType; + Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3); + + typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); + + // Initialize on host + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int l = 0; l < T3; ++l ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ); + } } } } + } } } } + + // copy to device + Kokkos::deep_copy(dv, v); + + Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 4 LL", mdrangepolicy, + KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) { + dv(i,j,k,l) += 1; + }); + + Kokkos::deep_copy(v, dv); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl ); + for ( int l = 0; l < T3; ++l ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; } + if ( tile_subview(i,j,k,l) != ( ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) + 1 ) ) { ++counter_inc; } + } } } } + } } } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope + + // Create RL View + { + typedef Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace > ViewType; + Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3); + + typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); + + // Initialize on host + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int l = 0; l < T3; ++l ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ); + } } } } + } } } } + + // copy to device + Kokkos::deep_copy(dv, v); + + Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 4 RL", mdrangepolicy, + KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) { + dv(i,j,k,l) += 1; + }); + + Kokkos::deep_copy(v, dv); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tl = 0; tl < NT3; ++tl ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl ); + for ( int l = 0; l < T3; ++l ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; } + if ( tile_subview(i,j,k,l) != ( ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) + 1 ) ) { ++counter_inc; } + } } } } + } } } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope + + // Create LR View + { + typedef Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace > ViewType; + Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3); + + typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); + + // Initialize on host + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + for ( int l = 0; l < T3; ++l ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ); + } } } } + } } } } + + // copy to device + Kokkos::deep_copy(dv, v); + + Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 4 LR", mdrangepolicy, + KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) { + dv(i,j,k,l) += 1; + }); + + Kokkos::deep_copy(v, dv); + + long counter_subview = 0; + long counter_inc = 0; + + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + for ( int l = 0; l < T3; ++l ) { + if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; } + if ( tile_subview(i,j,k,l) != ( ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) + 1 ) ) { ++counter_inc; } + } } } } + } } } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope + + // Create RR View + { + typedef Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace > ViewType; + Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3); + + typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); + + // Initialize on host + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + for ( int l = 0; l < T3; ++l ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ); + } } } } + } } } } + + // copy to device + Kokkos::deep_copy(dv, v); + + Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} ); + + // iterate by tile + Kokkos::parallel_for( "ViewTile rank 4 RR", mdrangepolicy, + KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) { + dv(i,j,k,l) += 1; + }); + + Kokkos::deep_copy(v, dv); + + long counter_subview = 0; + long counter_inc = 0; + + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tl = 0; tl < NT3; ++tl ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + for ( int l = 0; l < T3; ++l ) { + if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; } + if ( tile_subview(i,j,k,l) != ( ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) + 1 ) ) { ++counter_inc; } + } } } } + } } } } + ASSERT_EQ(counter_subview, long(0)); + ASSERT_EQ(counter_inc, long(0)); + } // end scope +#endif +#endif + } // end test_view_layout_tiled_4d + + + static void test_view_layout_tiled_subtile_2d( const int N0, const int N1 ) + { + const int FT = T0*T1; + + const int NT0 = int( std::ceil( N0 / T0 ) ); + const int NT1 = int( std::ceil( N1 / T1 ) ); + + // Counter to check for errors at the end + long counter[4] = {0}; + + // Create LL View + { + Kokkos::View< Scalar**, LayoutLL_2D_2x4, Kokkos::HostSpace > v("v", N0, N1); + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i + j*T0 ); + } } + } } + + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj ); + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[0]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl; + std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti + tj*NT0 )*FT + ( i + j*T0 ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl; +#endif + } } + } } + } // end scope + + // Create RL View + { + Kokkos::View< Scalar**, LayoutRL_2D_2x4, Kokkos::HostSpace > v("v", N0, N1); + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i + j*T0 ); + } } + } } + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj ); + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[1]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl; + std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti*NT1 + tj )*FT + ( i + j*T0 ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl; +#endif + } } + } } + } // end scope + + // Create LR View + { + Kokkos::View< Scalar**, LayoutLR_2D_2x4, Kokkos::HostSpace > v("v", N0, N1); + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + v(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i*T1 + j ); + } } + } } + + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[2]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl; + std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti + tj*NT0 )*FT + ( i*T1 + j ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl; +#endif + } } + } } + } // end scope + + // Create RR View + { + Kokkos::View< Scalar**, LayoutRR_2D_2x4, Kokkos::HostSpace > v("v", N0, N1); + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + v(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i*T1 + j ); + } } + } } + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[3]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl; + std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << " v = " << v(ti*T0 + i, tj*T1+j) << " flat idx = " << ( ti*NT1 + tj )*FT + ( i*T1 + j ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl; + std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; +#endif + } } + } } + } // end scope + +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "subview_tile vs view errors:\n" + << " LL: " << counter[0] + << " RL: " << counter[1] + << " LR: " << counter[2] + << " RR: " << counter[3] + << std::endl; +#endif + + ASSERT_EQ(counter[0], long(0)); + ASSERT_EQ(counter[1], long(0)); + ASSERT_EQ(counter[2], long(0)); + ASSERT_EQ(counter[3], long(0)); + } // end test_view_layout_tiled_subtile_2d + + + static void test_view_layout_tiled_subtile_3d( const int N0, const int N1, const int N2 ) + { + + const int FT = T0*T1*T2; + + const int NT0 = int( std::ceil( N0 / T0 ) ); + const int NT1 = int( std::ceil( N1 / T1 ) ); + const int NT2 = int( std::ceil( N2 / T2 ) ); + + // Counter to check for errors at the end + long counter[4] = {0}; + // Create LL View + { + Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2); + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ); + } } } + } } } + + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk ); + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[0]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl; + std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl; + std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; +#endif + } } } + } } } + } // end scope + + // Create RL View + { + Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2); + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ); + } } } + } } } + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk ); + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[1]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl; + std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl; +#endif + } } } + } } } + } // end scope + + // Create LR View + { + Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2); + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ); + } } } + } } } + + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[2]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl; + std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl; + std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; +#endif + } } } + } } } + } // end scope + + // Create RR View + { + Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2); + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ); + } } } + } } } + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[3]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl; + std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << " flat idx = " << ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl; + std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; +#endif + } } } + } } } + } // end scope + +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "subview_tile vs view errors:\n" + << " LL: " << counter[0] + << " RL: " << counter[1] + << " LR: " << counter[2] + << " RR: " << counter[3] + << std::endl; +#endif + + ASSERT_EQ(counter[0], long(0)); + ASSERT_EQ(counter[1], long(0)); + ASSERT_EQ(counter[2], long(0)); + ASSERT_EQ(counter[3], long(0)); + + } // end test_view_layout_tiled_subtile_3d + + + static void test_view_layout_tiled_subtile_4d( const int N0, const int N1, const int N2, const int N3 ) + { + const int FT = T0*T1*T2*T3; + + const int NT0 = int( std::ceil( N0 / T0 ) ); + const int NT1 = int( std::ceil( N1 / T1 ) ); + const int NT2 = int( std::ceil( N2 / T2 ) ); + const int NT3 = int( std::ceil( N3 / T3 ) ); + + // Counter to check for errors at the end + long counter[4] = {0}; + // Create LL View + { + Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3); + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int l = 0; l < T3; ++l ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ); + } } } } + } } } } + + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl ); + for ( int l = 0; l < T3; ++l ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[0]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl; + std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << "," + << " i,j,k,l: " << i << "," << j << "," << k << "," << l + << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) + << " flat idx = " << ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl; + std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; +#endif + } } } } + } } } } + } // end scope + + // Create RL View + { + Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3); + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int l = 0; l < T3; ++l ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ); + } } } } + } } } } + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tl = 0; tl < NT3; ++tl ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl ); + for ( int l = 0; l < T3; ++l ) { + for ( int k = 0; k < T2; ++k ) { + for ( int j = 0; j < T1; ++j ) { + for ( int i = 0; i < T0; ++i ) { + if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[1]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl; + std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << "," + << " i,j,k,l: " << i << "," << j << "," << k << "," << l + << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) + << " flat idx = " << ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl; + std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; +#endif + } } } } + } } } } + } // end scope + + // Create LR View + { + Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3); + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + for ( int l = 0; l < T3; ++l ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ); + } } } } + } } } } + + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int ti = 0; ti < NT0; ++ti ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + for ( int l = 0; l < T3; ++l ) { + if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[2]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl; + std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << "," + << " i,j,k,l: " << i << "," << j << "," << k << "," << l + << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) + << " flat idx = " << ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl; + std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; +#endif + } } } } + } } } } + } // end scope + + // Create RR View + { + Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3); + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tl = 0; tl < NT3; ++tl ) { + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + for ( int l = 0; l < T3; ++l ) { + v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ); + } } } } + } } } } + + for ( int ti = 0; ti < NT0; ++ti ) { + for ( int tj = 0; tj < NT1; ++tj ) { + for ( int tk = 0; tk < NT2; ++tk ) { + for ( int tl = 0; tl < NT3; ++tl ) { + auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl ); + for ( int i = 0; i < T0; ++i ) { + for ( int j = 0; j < T1; ++j ) { + for ( int k = 0; k < T2; ++k ) { + for ( int l = 0; l < T3; ++l ) { + if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[3]; } +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl; + std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << "," + << " i,j,k,l: " << i << "," << j << "," << k << "," << l + << " v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) + << " flat idx = " << ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) << std::endl; + std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl; + std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl; +#endif + } } } } + } } } } + } // end scope + +#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT + std::cout << "subview_tile vs view errors:\n" + << " LL: " << counter[0] + << " RL: " << counter[1] + << " LR: " << counter[2] + << " RR: " << counter[3] + << std::endl; +#endif + + ASSERT_EQ(counter[0], long(0)); + ASSERT_EQ(counter[1], long(0)); + ASSERT_EQ(counter[2], long(0)); + ASSERT_EQ(counter[3], long(0)); + + } // end test_view_layout_tiled_subtile_4d + +}; // end TestViewLayoutTiled struct + +} // namespace + +TEST_F( TEST_CATEGORY , view_layouttiled) { + // These two examples are iterating by tile, then within a tile - not by extents + // If N# is not a power of two, but want to iterate by tile then within a tile, need to check that mapped index is within extent + TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_2d( 4, 12 ); + TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_3d( 4, 12, 16 ); + TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_4d( 4, 12, 16, 12 ); +} +TEST_F( TEST_CATEGORY , view_layouttiled_subtile) { + // These two examples are iterating by tile, then within a tile - not by extents + // If N# is not a power of two, but want to iterate by tile then within a tile, need to check that mapped index is within extent + TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_2d( 4, 12 ); + TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_3d( 4, 12, 16 ); + TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_4d( 4, 12, 16, 12 ); +} +#endif +} // namespace Test diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy.cpp new file mode 100644 index 000000000..374859235 --- /dev/null +++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestViewCopy.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy.cpp new file mode 100644 index 000000000..b0ea67a1b --- /dev/null +++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewCopy.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Other.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Other.cpp index f63409da2..788e458ee 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_Other.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Other.cpp @@ -50,3 +50,4 @@ #include<TestTile.hpp> #include<TestViewCtorPropEmbeddedDim.hpp> +#include<TestViewLayoutTiled.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Reductions_DeviceView.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Reductions_DeviceView.cpp new file mode 100644 index 000000000..5b3409014 --- /dev/null +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Reductions_DeviceView.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestReduceDeviceView.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Team.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Team.cpp index c63358509..374068345 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_Team.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Team.cpp @@ -68,6 +68,22 @@ TEST_F( TEST_CATEGORY, team_reduce ) TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } + +TEST_F( TEST_CATEGORY, team_broadcast ) +{ + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 0 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 0 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 2 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 2 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 16 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 16 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 1000 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 1000 ); +} + } #include <TestTeamVector.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp index 879633b0c..dcb6896b8 100644 --- a/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp @@ -65,6 +65,10 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request ) TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); } + +TEST_F( TEST_CATEGORY, scratch_align) { + TestScratchAlignment< TEST_EXECSPACE >(); +} #endif #endif diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamTeamSize.cpp new file mode 100644 index 000000000..7e61e0810 --- /dev/null +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamTeamSize.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestTeamTeamSize.hpp> diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_ViewLayoutStrideAssignment.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_ViewLayoutStrideAssignment.cpp new file mode 100644 index 000000000..2732cd4ba --- /dev/null +++ b/packages/kokkos/core/unit_test/cuda/TestCuda_ViewLayoutStrideAssignment.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestViewLayoutStrideAssignment.hpp> + diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp index 566891bb3..0ddd67acf 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp @@ -50,6 +50,7 @@ #include<TestTile.hpp> #include<TestViewCtorPropEmbeddedDim.hpp> +#include<TestViewLayoutTiled.hpp> #include <mutex> diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Reductions_DeviceView.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Reductions_DeviceView.cpp new file mode 100644 index 000000000..99fe5842c --- /dev/null +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Reductions_DeviceView.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmp/TestOpenMP_Category.hpp> +#include <TestReduceDeviceView.hpp> diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp index 790ea9e6d..e5b900ac5 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp @@ -68,6 +68,21 @@ TEST_F( TEST_CATEGORY, team_reduce ) TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } + +TEST_F( TEST_CATEGORY, team_broadcast ) +{ + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 0 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 0 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 2 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 2 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 16 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 16 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 1000 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 1000 ); +} } #include <TestTeamVector.hpp> diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp index f57da139a..64d757533 100644 --- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp @@ -65,6 +65,9 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request ) TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); } +TEST_F( TEST_CATEGORY, scratch_align) { + TestScratchAlignment< TEST_EXECSPACE >(); +} #endif #endif diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamTeamSize.cpp new file mode 100644 index 000000000..a98728f02 --- /dev/null +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamTeamSize.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmp/TestOpenMP_Category.hpp> +#include <TestTeamTeamSize.hpp> + diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewLayoutStrideAssignment.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewLayoutStrideAssignment.cpp new file mode 100644 index 000000000..81b296d15 --- /dev/null +++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewLayoutStrideAssignment.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewLayoutStrideAssignment.hpp> + diff --git a/packages/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewCopy.cpp b/packages/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewCopy.cpp new file mode 100644 index 000000000..4636691d9 --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewCopy.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <rocm/TestROCmHostPinned_Category.hpp> +#include <TestViewCopy.hpp> diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_Crs.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_Crs.cpp new file mode 100644 index 000000000..05a90da83 --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_Crs.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<rocm/TestROCm_Category.hpp> +#include<TestCrs.hpp> + diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_a.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_a.cpp new file mode 100644 index 000000000..23edcbcc3 --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_a.cpp @@ -0,0 +1,54 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<rocm/TestROCm_Category.hpp> +#include<TestMDRange.hpp> +namespace Test { + +TEST_F( TEST_CATEGORY , mdrange_5d_reduce ) { + TestMDRange_5D< TEST_EXECSPACE >::test_reduce5( 100, 10, 10, 10, 5 ); +} + +} + diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_b.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_b.cpp new file mode 100644 index 000000000..5f02e893a --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_b.cpp @@ -0,0 +1,54 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<rocm/TestROCm_Category.hpp> +#include<TestMDRange.hpp> +namespace Test { + +TEST_F( TEST_CATEGORY , mdrange_6d_reduce ) { + TestMDRange_6D< TEST_EXECSPACE >::test_reduce6( 10, 10, 10, 10, 10, 5 ); +} + +} + diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_c.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_c.cpp new file mode 100644 index 000000000..6a84962d1 --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_c.cpp @@ -0,0 +1,54 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<rocm/TestROCm_Category.hpp> +#include<TestMDRange.hpp> +namespace Test { + +TEST_F( TEST_CATEGORY , mdrange_2d_reduce ) { + TestMDRange_2D< TEST_EXECSPACE >::test_reduce2( 100, 100 ); +} + +} + diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_d.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_d.cpp new file mode 100644 index 000000000..c6c6ba291 --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_d.cpp @@ -0,0 +1,54 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<rocm/TestROCm_Category.hpp> +#include<TestMDRange.hpp> +namespace Test { + +TEST_F( TEST_CATEGORY , mdrange_3d_reduce ) { + TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 100, 100, 5 ); +} + +} + diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_e.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_e.cpp new file mode 100644 index 000000000..6afd8b8ec --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_e.cpp @@ -0,0 +1,54 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<rocm/TestROCm_Category.hpp> +#include<TestMDRange.hpp> +namespace Test { + +TEST_F( TEST_CATEGORY , mdrange_4d_reduce ) { + TestMDRange_4D< TEST_EXECSPACE >::test_reduce4( 100, 100, 10, 5 ); +} + +} + diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_SubView_c13.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_SubView_c13.cpp new file mode 100644 index 000000000..03b39972a --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_SubView_c13.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <rocm/TestROCmHostPinned_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_test_unmanaged_subview_reset ) +{ + TestViewSubview::test_unmanaged_subview_reset< TEST_EXECSPACE >(); +} + +} // namespace Test diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp index e1025f1ba..57887450e 100644 --- a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp @@ -46,7 +46,6 @@ namespace Test { -#if !defined(KOKKOS_ROCM_CLANG_WORKAROUND) TEST_F( TEST_CATEGORY, team_scan ) { TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); @@ -56,7 +55,6 @@ TEST_F( TEST_CATEGORY, team_scan ) TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10000 ); TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); } -#endif TEST_F( TEST_CATEGORY, team_long_reduce ) { diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp index 1968ab31e..c7255919d 100644 --- a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp @@ -65,6 +65,10 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request ) TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); } + +TEST_F( TEST_CATEGORY, scratch_align) { + TestScratchAlignment< TEST_EXECSPACE >(); +} #endif #endif diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamTeamSize.cpp new file mode 100644 index 000000000..583e01fcb --- /dev/null +++ b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamTeamSize.cpp @@ -0,0 +1,49 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Doesn't work right now due to bug with static sized array member + +//#include <rocm/TestROCm_Category.hpp> +//#ifndef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND +//#include <TestTeamTeamSize.hpp> +//#endif diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Other.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Other.cpp index 0c3bae377..26a218c5c 100644 --- a/packages/kokkos/core/unit_test/serial/TestSerial_Other.cpp +++ b/packages/kokkos/core/unit_test/serial/TestSerial_Other.cpp @@ -50,3 +50,4 @@ #include<TestTile.hpp> #include<TestViewCtorPropEmbeddedDim.hpp> +#include<TestViewLayoutTiled.hpp> diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Reductions_DeviceView.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Reductions_DeviceView.cpp new file mode 100644 index 000000000..1c20670c2 --- /dev/null +++ b/packages/kokkos/core/unit_test/serial/TestSerial_Reductions_DeviceView.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestReduceDeviceView.hpp> diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Team.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Team.cpp index 619cb727a..47d02f700 100644 --- a/packages/kokkos/core/unit_test/serial/TestSerial_Team.cpp +++ b/packages/kokkos/core/unit_test/serial/TestSerial_Team.cpp @@ -68,6 +68,21 @@ TEST_F( TEST_CATEGORY, team_reduce ) TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } + +TEST_F( TEST_CATEGORY, team_broadcast ) +{ + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 0 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 0 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 2 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 2 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 16 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 16 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 1000 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 1000 ); +} } #include <TestTeamVector.hpp> diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp index 963908c92..029999ab7 100644 --- a/packages/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp +++ b/packages/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp @@ -65,6 +65,10 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request ) TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); } + +TEST_F( TEST_CATEGORY, scratch_align) { + TestScratchAlignment< TEST_EXECSPACE >(); +} #endif #endif diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_TeamTeamSize.cpp new file mode 100644 index 000000000..53451b30c --- /dev/null +++ b/packages/kokkos/core/unit_test/serial/TestSerial_TeamTeamSize.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestTeamTeamSize.hpp> diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_ViewLayoutStrideAssignment.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_ViewLayoutStrideAssignment.cpp new file mode 100644 index 000000000..64c5b642d --- /dev/null +++ b/packages/kokkos/core/unit_test/serial/TestSerial_ViewLayoutStrideAssignment.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestViewLayoutStrideAssignment.hpp> + diff --git a/packages/kokkos/core/unit_test/standalone/Makefile b/packages/kokkos/core/unit_test/standalone/Makefile new file mode 100644 index 000000000..f8a75616c --- /dev/null +++ b/packages/kokkos/core/unit_test/standalone/Makefile @@ -0,0 +1,55 @@ +KOKKOS_DEVICES=Cuda +KOKKOS_CUDA_OPTIONS=enable_lambda +KOKKOS_ARCH = "SNB,Kepler35" + +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) + +ifndef KOKKOS_PATH + KOKKOS_PATH = $(MAKEFILE_PATH)../../../ +endif + +SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) +HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) +HEADERS = $(wildcard $(MAKEFILE_PATH)/../*.hpp) + +vpath %.cpp $(sort $(dir $(SRC))) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper +EXE = test.cuda +else +CXX = g++ +EXE = test.host +endif + +CXXFLAGS ?= -O3 -g +override CXXFLAGS += -I$(MAKEFILE_PATH) -I$(KOKKOS_PATH)/core/unit_test -I$(KOKKOS_PATH)/tpls/gtest -DTESTFILE=$(TESTFILE) +#SRC += $(KOKKOS_PATH)/tpls/gtest/gtest/gtest-all.cc + +DEPFLAGS = -M +LINK = ${CXX} +LINKFLAGS = + +OBJ = $(notdir $(SRC:.cpp=.o)) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) gtest-all.o + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) gtest-all.o -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) + +gtest-all.o:$(KOKKOS_PATH)/tpls/gtest/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(KOKKOS_PATH)/tpls/gtest/gtest/gtest-all.cc diff --git a/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp b/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp new file mode 100644 index 000000000..2db51658c --- /dev/null +++ b/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp @@ -0,0 +1,71 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> +#include <cstdlib> + +#include <Kokkos_Core.hpp> + +#ifdef KOKKOS_ENABLE_ROCM +#include <rocm/TestROCm_Category.hpp> +#endif +#ifdef KOKKOS_ENABLE_CUDA +#include <cuda/TestCuda_Category.hpp> +#endif +#ifdef KOKKOS_ENABLE_OPENMP +#include <openmp/TestOpenMP_Category.hpp> +#endif +#ifdef KOKKOS_ENABLE_THREADS +#include <threads/TestThreads_Category.hpp> +#endif + +#include <TestMemoryPool.hpp> + +int main( int argc, char *argv[] ) { + Kokkos::initialize(argc,argv); + ::testing::InitGoogleTest( &argc, argv ); + + int result = RUN_ALL_TESTS(); + Kokkos::finalize(); + return result; +} diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_Other.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_Other.cpp index a0c8b4159..13786aa4a 100644 --- a/packages/kokkos/core/unit_test/threads/TestThreads_Other.cpp +++ b/packages/kokkos/core/unit_test/threads/TestThreads_Other.cpp @@ -50,3 +50,4 @@ #include<TestTile.hpp> #include<TestViewCtorPropEmbeddedDim.hpp> +#include<TestViewLayoutTiled.hpp> diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_Reductions_DeviceView.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_Reductions_DeviceView.cpp new file mode 100644 index 000000000..28f4b7d57 --- /dev/null +++ b/packages/kokkos/core/unit_test/threads/TestThreads_Reductions_DeviceView.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestReduceDeviceView.hpp> diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_Team.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_Team.cpp index b87c1f77d..63d250935 100644 --- a/packages/kokkos/core/unit_test/threads/TestThreads_Team.cpp +++ b/packages/kokkos/core/unit_test/threads/TestThreads_Team.cpp @@ -68,6 +68,21 @@ TEST_F( TEST_CATEGORY, team_reduce ) TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } + +TEST_F( TEST_CATEGORY, team_broadcast ) +{ + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 0 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 0 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 2 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 2 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 16 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 16 ); + + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 1000 ); + TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 1000 ); +} } #include <TestTeamVector.hpp> diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp index c07fae77c..d17119579 100644 --- a/packages/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp +++ b/packages/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp @@ -65,6 +65,10 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request ) TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); } + +TEST_F( TEST_CATEGORY, scratch_align) { + TestScratchAlignment< TEST_EXECSPACE >(); +} #endif #endif diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_TeamTeamSize.cpp new file mode 100644 index 000000000..b1cf4ec87 --- /dev/null +++ b/packages/kokkos/core/unit_test/threads/TestThreads_TeamTeamSize.cpp @@ -0,0 +1,47 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestTeamTeamSize.hpp> + + diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_ViewLayoutStrideAssignment.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_ViewLayoutStrideAssignment.cpp new file mode 100644 index 000000000..5ddd07108 --- /dev/null +++ b/packages/kokkos/core/unit_test/threads/TestThreads_ViewLayoutStrideAssignment.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestViewLayoutStrideAssignment.hpp> + diff --git a/packages/kokkos/doc/kokkos-promotion.txt b/packages/kokkos/doc/kokkos-promotion.txt index 5a1306ecc..0aede5f32 100644 --- a/packages/kokkos/doc/kokkos-promotion.txt +++ b/packages/kokkos/doc/kokkos-promotion.txt @@ -149,7 +149,9 @@ Step 5: This step can be done on any SEMS machine (e.g. kokkos-dev). Actually, t git clone -b kokkos-develop git@github.com:trilinos/Trilinos.git TRILINOS_PATH=$PWD/Trilinos - 5.2. Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees. + 5.2. Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees. + + * Use the master branch of Kokkos for this. module load sems-python/2.7.9 python $KOKKOS_PATH/scripts/snapshot.py $KOKKOS_PATH $TRILINOS_PATH/packages @@ -173,20 +175,22 @@ Step 5: This step can be done on any SEMS machine (e.g. kokkos-dev). Actually, t ## KokkosKernels Changelog - 5.4. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3) - - cd $TRILINOS_PATH - mkdir CHECKIN - cd CHECKIN - nohup ../cmake/std/sems/checkin-test-sems.sh --do-all --push & - - Although Trilinos has experimental Pull Request testing, it is not good enough to replace the checkin script yet. + 5.4. Wait for Trilinos Autotester results 5.5. If there are failures, fix and backtrack. Otherwise, go to next step // -------------------------------------------------------------------------------- // -Step 6: Push Kokkos master to GitHub (requires Owner permission). +Step 6: Push Kokkos master and develop to GitHub (requires Owner permission). + 6.1. Master branch: cd KOKKOS_PATH + git checkout master git push --follow-tags origin master + + 6.2. Develop branch: First merge (--no-ff) master back into develop + cd KOKKOS_PATH + git checkout develop + git merge --no-ff maseter + git push origin develop + diff --git a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp index ff3002e64..e8c1550fc 100644 --- a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp +++ b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp @@ -81,13 +81,22 @@ int main(int narg, char* args[]) { Kokkos::initialize(narg,args); // Launch 12 teams of the maximum number of threads per team - const team_policy policy( 12 , team_policy::team_size_max( hello_world() ) ); - + const int team_size_max = team_policy(1,1).team_size_max(hello_world(), Kokkos::ParallelReduceTag()); + const team_policy policy_a( 12 , team_size_max ); + int sum = 0; - Kokkos::parallel_reduce( policy , hello_world() , sum ); + Kokkos::parallel_reduce( policy_a , hello_world() , sum ); + + // The result will be 12*team_size_max + printf("Result A: %i == %i\n",sum, team_size_max*12); + + // In practice it is often better to let Kokkos decide on the team_size + const team_policy policy_b( 12 , Kokkos::AUTO ); - // The result will be 12*team_policy::team_size_max( hello_world()) - printf("Result %i\n",sum); + Kokkos::parallel_reduce( policy_b , hello_world() , sum ); + // The result will be 12*policy_b.team_size_recommended( hello_world(), Kokkos::ParallelReduceTag()) + const int team_size_recommended = policy_b.team_size_recommended( hello_world(), Kokkos::ParallelReduceTag()); + printf("Result B: %i %i\n",sum, team_size_recommended*12); Kokkos::finalize(); } diff --git a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp index 721aab2d3..bbb1000e9 100644 --- a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp +++ b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp @@ -79,7 +79,8 @@ int main(int narg, char* args[]) { Kokkos::initialize(narg,args); // Launch 3 teams of the maximum number of threads per team - const team_policy policy( 3 , team_policy::team_size_max( hello_world() ) ); + const int team_size_max = team_policy(3,1).team_size_max( hello_world(), Kokkos::ParallelReduceTag()); + const team_policy policy( 3 , team_size_max ); int sum = 0; Kokkos::parallel_reduce( policy , hello_world() , sum ); diff --git a/packages/kokkos/example/virtual_functions/Makefile b/packages/kokkos/example/virtual_functions/Makefile new file mode 100644 index 000000000..06186786c --- /dev/null +++ b/packages/kokkos/example/virtual_functions/Makefile @@ -0,0 +1,55 @@ +KOKKOS_DEVICES=Cuda +KOKKOS_CUDA_OPTIONS=enable_lambda +KOKKOS_ARCH = "SNB,Kepler35" + +#KOKKOS_DEVICES=OpenMP +#KOKKOS_CUDA_OPTIONS=enable_lambda +#KOKKOS_ARCH = "SNB" + +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) + +ifndef KOKKOS_PATH + KOKKOS_PATH = $(MAKEFILE_PATH)../.. +endif + +SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) +HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) + +vpath %.cpp $(sort $(dir $(SRC))) + +default: build + echo "Start Build" + +LINKFLAGS = +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper +EXE = virtual.cuda +override LINKFLAGS += --remove-duplicate-link-files +else +CXX = g++ +EXE = virtual.host +endif + +CXXFLAGS ?= -O3 -g +override CXXFLAGS += -I$(MAKEFILE_PATH) + +DEPFLAGS = -M +LINK = ${CXX} + +OBJ = $(notdir $(SRC:.cpp=.o)) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/packages/kokkos/example/virtual_functions/classes.cpp b/packages/kokkos/example/virtual_functions/classes.cpp new file mode 100644 index 000000000..1b9d7cb38 --- /dev/null +++ b/packages/kokkos/example/virtual_functions/classes.cpp @@ -0,0 +1,26 @@ +#include<classes.hpp> + +KOKKOS_FUNCTION +Foo::Foo() { + val = 0; +} + +KOKKOS_FUNCTION +Foo_1::Foo_1() { + val = 1; +} + +KOKKOS_FUNCTION +int Foo_1::value() { + return val; +} + +KOKKOS_FUNCTION +Foo_2::Foo_2() { + val = 2; +} + +KOKKOS_FUNCTION +int Foo_2::value() { + return val; +} diff --git a/packages/kokkos/example/virtual_functions/classes.hpp b/packages/kokkos/example/virtual_functions/classes.hpp new file mode 100644 index 000000000..362c473ce --- /dev/null +++ b/packages/kokkos/example/virtual_functions/classes.hpp @@ -0,0 +1,39 @@ +#ifndef KOKKOS_EXAMPLE_VIRTUAL_FUNCTIONS_CLASSES_HPP +#define KOKKOS_EXAMPLE_VIRTUAL_FUNCTIONS_CLASSES_HPP + +#include<Kokkos_Core.hpp> + +class Foo { + protected: + int val; + public: + KOKKOS_FUNCTION + Foo(); + + KOKKOS_FUNCTION + virtual int value() { return 0; }; + + KOKKOS_FUNCTION + virtual ~Foo() {} +}; + +class Foo_1: public Foo { + public: + KOKKOS_FUNCTION + Foo_1(); + + KOKKOS_FUNCTION + int value(); +}; + +class Foo_2: public Foo { + public: + KOKKOS_FUNCTION + Foo_2(); + + KOKKOS_FUNCTION + int value(); +}; + +#endif //KOKKOS_EXAMPLE_VIRTUAL_FUNCTIONS_CLASSES_HPP + diff --git a/packages/kokkos/example/virtual_functions/main.cpp b/packages/kokkos/example/virtual_functions/main.cpp new file mode 100644 index 000000000..a16e0cf73 --- /dev/null +++ b/packages/kokkos/example/virtual_functions/main.cpp @@ -0,0 +1,36 @@ +#include<classes.hpp> + +int main(int argc, char* argv[]) { + Kokkos::initialize(argc,argv); + + { + Foo* f_1 = (Foo*) Kokkos::kokkos_malloc(sizeof(Foo_1)); + Foo* f_2 = (Foo*) Kokkos::kokkos_malloc(sizeof(Foo_2)); + + Kokkos::parallel_for("CreateObjects",1, KOKKOS_LAMBDA (const int&) { + new ((Foo_1*)f_1) Foo_1(); + new ((Foo_2*)f_2) Foo_2(); + }); + + int value_1,value_2; + Kokkos::parallel_reduce("CheckValues",1, KOKKOS_LAMBDA (const int&, int& lsum) { + lsum = f_1->value(); + },value_1); + + Kokkos::parallel_reduce("CheckValues",1, KOKKOS_LAMBDA (const int&, int& lsum) { + lsum = f_2->value(); + },value_2); + + printf("Values: %i %i\n",value_1,value_2); + + Kokkos::parallel_for("DestroyObjects",1, KOKKOS_LAMBDA (const int&) { + f_1->~Foo(); + f_2->~Foo(); + }); + + Kokkos::kokkos_free(f_1); + Kokkos::kokkos_free(f_2); + } + + Kokkos::finalize(); +} diff --git a/packages/kokkos/generate_makefile.bash b/packages/kokkos/generate_makefile.bash index 4225e5b2d..34be03f98 100755 --- a/packages/kokkos/generate_makefile.bash +++ b/packages/kokkos/generate_makefile.bash @@ -97,12 +97,21 @@ do echo "Invalid compiler by --compiler command: '${COMPILER}'" exit fi + # ... valid compiler, ensure absolute path set + WCOMPATH=`which $COMPILER` + COMPDIR=`dirname $WCOMPATH` + COMPNAME=`basename $WCOMPATH` + COMPILER=${COMPDIR}/${COMPNAME} ;; --with-options*) KOKKOS_OPT="${key#*=}" ;; + --gcc-toolchain*) + KOKKOS_GCC_TOOLCHAIN="${key#*=}" + ;; --help) echo "Kokkos configure options:" + echo "" echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory." echo "--qthreads-path=/Path/To/Qthreads: Path to Qthreads install directory." echo " Overrides path given by --with-qthreads." @@ -171,6 +180,7 @@ do echo " " echo "--with-cuda-options=[OPT]: Additional options to CUDA:" echo " force_uvm, use_ldg, enable_lambda, rdc" + echo "--gcc-toolchain=/Path/To/GccRoot: Set the gcc toolchain to use with clang (e.g. /usr)" echo "--make-j=[NUM]: DEPRECATED: call make with appropriate" echo " -j flag" exit 0 @@ -195,7 +205,7 @@ else fi if [ "${KOKKOS_PATH}" = "${PWD}" ] || [ "${KOKKOS_PATH}" = "${PWD}/" ]; then - echo "Running generate_makefile.sh in the Kokkos root directory is not allowed" + echo "Running generate_makefile.bash in the Kokkos root directory is not allowed" exit fi @@ -204,8 +214,13 @@ KOKKOS_SRC_PATH=${KOKKOS_PATH} KOKKOS_SETTINGS="KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}" #KOKKOS_SETTINGS="KOKKOS_PATH=${KOKKOS_PATH}" +# The double [[ ]] in the elif branch is not a typo if [ ${#COMPILER} -gt 0 ]; then KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" +elif + [ ${#COMPILER} -eq 0 ] && [[ ${KOKKOS_DEVICES} =~ .*Cuda.* ]]; then + COMPILER="${KOKKOS_PATH}/bin/nvcc_wrapper" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" fi if [ ${#KOKKOS_DEVICES} -gt 0 ]; then @@ -265,6 +280,10 @@ if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" fi +if [ ${#KOKKOS_GCC_TOOLCHAIN} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_INTERNAL_GCC_TOOLCHAIN=${KOKKOS_GCC_TOOLCHAIN}" +fi + KOKKOS_SETTINGS_NO_KOKKOS_PATH="${KOKKOS_SETTINGS}" KOKKOS_TEST_INSTALL_PATH="${PWD}/install" @@ -276,7 +295,7 @@ fi mkdir -p install gen_makefile=Makefile.kokkos -echo "#Makefile to satisfy existens of target kokkos-clean before installing the library" > install/${gen_makefile} +echo "#Makefile to satisfy existence of target kokkos-clean before installing the library" > install/${gen_makefile} echo "kokkos-clean:" >> install/${gen_makefile} echo "" >> install/${gen_makefile} mkdir -p core diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index fe90cc296..08453309d 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -14,3 +14,4 @@ tag: 2.04.11 date: 10:28:2017 master: 54a1330a develop: ed36c017 tag: 2.5.00 date: 12:15:2017 master: dfe685f4 develop: ec7ad6d8 tag: 2.6.00 date: 03:07:2018 master: 62e760fa develop: d1ba7d71 tag: 2.7.00 date: 05:24:2018 master: e01945d0 develop: 2d13f608 +tag: 2.7.24 date: 11:04:2018 master: d3a94192 develop: 7a06fc81 diff --git a/packages/kokkos/scripts/eti/generate_view_copy_cpp_files_write b/packages/kokkos/scripts/eti/generate_view_copy_cpp_files_write index ffe2a593b..45208e76b 100755 --- a/packages/kokkos/scripts/eti/generate_view_copy_cpp_files_write +++ b/packages/kokkos/scripts/eti/generate_view_copy_cpp_files_write @@ -19,8 +19,8 @@ echo "KOKKOS_IMPL_VIEWCOPY_ETI_DECL(${SCALAR_TYPE}${RANK_STARS},${LAYOUT_TYPE},L echo "KOKKOS_IMPL_VIEWFILL_ETI_DECL(${SCALAR_TYPE}${RANK_STARS},${LAYOUT_TYPE},KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE,${INDEX_TYPE})" >> common/Kokkos_ViewFillCopyETIDecl_Macros.hpp -FileName=${EXECUTION_SPACE_DIR}/Kokkos_${EXECUTION_SPACE}_ViewCopyETIInst_${INDEX_TYPE}_${SCALAR_TYPE}_${LAYOUT_TYPE}_Rank${RANK}.cpp -ObjectName=Kokkos_${EXECUTION_SPACE}_ViewCopyETIInst_${INDEX_TYPE}_${SCALAR_TYPE}_${LAYOUT_TYPE}_Rank${RANK}.o +FileName=${EXECUTION_SPACE_DIR}/Kokkos_${EXECUTION_SPACE_DIR}_ViewCopyETIInst_${INDEX_TYPE}_${SCALAR_TYPE}_${LAYOUT_TYPE}_Rank${RANK}.cpp +ObjectName=Kokkos_${EXECUTION_SPACE_DIR}_ViewCopyETIInst_${INDEX_TYPE}_${SCALAR_TYPE}_${LAYOUT_TYPE}_Rank${RANK}.o cp ${SCRIPT_PATH}/../../LICENSE ${FileName} @@ -37,5 +37,5 @@ echo "" >> ${FileName} echo "}" >> ${FileName} echo "}" >> ${FileName} -echo "${ObjectName}: \$(KOKKOS_CPP_DEPENDS) \$(KOKKOS_ETI_PATH)/${FileName}" >> ${EXECUTION_SPACE_DIR}/Makefile.eti_${EXECUTION_SPACE} -echo -e "\t\$(CXX) \$(KOKKOS_CPPFLAGS) \$(KOKKOS_CXXFLAGS) \$(CXXFLAGS) -c \$(KOKKOS_ETI_PATH)/${FileName}" >> ${EXECUTION_SPACE_DIR}/Makefile.eti_${EXECUTION_SPACE} +echo "${ObjectName}: \$(KOKKOS_CPP_DEPENDS) \$(KOKKOS_ETI_PATH)/${FileName}" >> ${EXECUTION_SPACE_DIR}/Makefile.eti_${EXECUTION_SPACE_DIR} +echo -e "\t\$(CXX) \$(KOKKOS_CPPFLAGS) \$(KOKKOS_CXXFLAGS) \$(CXXFLAGS) -c \$(KOKKOS_ETI_PATH)/${FileName}" >> ${EXECUTION_SPACE_DIR}/Makefile.eti_${EXECUTION_SPACE_DIR} diff --git a/packages/kokkos/scripts/testing_scripts/test_all_sandia b/packages/kokkos/scripts/testing_scripts/test_all_sandia new file mode 100755 index 000000000..d1424ade8 --- /dev/null +++ b/packages/kokkos/scripts/testing_scripts/test_all_sandia @@ -0,0 +1,790 @@ +#!/bin/bash -e + +# +# Global config +# + +set -o pipefail + +# Determine current machine. + +MACHINE="" +HOSTNAME=$(hostname) +PROCESSOR=`uname -p` + +if [[ "$HOSTNAME" =~ (white|ride).* ]]; then + MACHINE=white + module load git +fi + +if [[ "$HOSTNAME" =~ .*bowman.* ]]; then + MACHINE=bowman + module load git +fi + +if [[ "$HOSTNAME" == *blake* ]]; then # Warning: very generic name + MACHINE=blake + module load git +fi + +if [[ "$HOSTNAME" == apollo\.* ]]; then + MACHINE=apollo + module load git +fi + +if [[ "$HOSTNAME" == mayer\.* ]]; then + MACHINE=mayer +# module load git +fi +if [[ "$HOSTNAME" == cn* ]]; then # Warning: very generic name + MACHINE=mayer +fi + +if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then + if [[ "$MACHINE" = "" ]]; then + MACHINE=sems + module load sems-git + fi +fi + +if [[ "$MACHINE" = "" ]]; then + echo "Unrecognized machine" >&2 + exit 1 +fi + +echo "Running on machine: $MACHINE" + +GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" +ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" +INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" +CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" +CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial" + +GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" +IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +PGI_WARNING_FLAGS="" + +# Default. Machine specific can override. +DEBUG=False +ARGS="" +CUSTOM_BUILD_LIST="" +QTHREADS_PATH="" +DRYRUN=False +BUILD_ONLY=False +declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1 +TEST_SCRIPT=False +SKIP_HWLOC=False +SPOT_CHECK=False + +PRINT_HELP=False +OPT_FLAG="" +CXX_FLAGS_EXTRA="" +LD_FLAGS_EXTRA="" +KOKKOS_OPTIONS="" + +# +# Handle arguments. +# + +while [[ $# > 0 ]] +do + key="$1" + + case $key in + --kokkos-path*) + KOKKOS_PATH="${key#*=}" + ;; + --qthreads-path*) + QTHREADS_PATH="${key#*=}" + ;; + --build-list*) + CUSTOM_BUILD_LIST="${key#*=}" + ;; + --debug*) + DEBUG=True + ;; + --build-only*) + BUILD_ONLY=True + ;; + --test-script*) + TEST_SCRIPT=True + ;; + --skip-hwloc*) + SKIP_HWLOC=True + ;; + --num*) + NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" + ;; + --dry-run*) + DRYRUN=True + ;; + --spot-check*) + SPOT_CHECK=True + ;; + --arch*) + ARCH_FLAG="--arch=${key#*=}" + ;; + --opt-flag*) + OPT_FLAG="${key#*=}" + ;; + --with-cuda-options*) + KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" + ;; + --with-options*) + KOKKOS_OPTIONS="--with-options=${key#*=}" + ;; + --cxxflags-extra*) + CXX_FLAGS_EXTRA="${key#*=}" + ;; + --ldflags-extra*) + LD_FLAGS_EXTRA="${key#*=}" + ;; + --help*) + PRINT_HELP=True + ;; + *) + # args, just append + ARGS="$ARGS $1" + ;; + esac + + shift +done + +SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd ../.. && pwd ) + +# Set kokkos path. +if [ -z "$KOKKOS_PATH" ]; then + KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT +else + # Ensure KOKKOS_PATH is abs path. + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) +fi + +UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null` +if ! [ -z "$UNCOMMITTED" ]; then + echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :" + echo "$UNCOMMITTED" + echo "" +fi + +GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline` +echo "Repository Status: " ${GITSTATUS} +echo "" +echo "" + +# +# Machine specific config. +# + +if [ "$MACHINE" = "sems" ]; then + source /projects/sems/modulefiles/utils/sems-modules-init.sh + + BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-<COMPILER_NAME>/<COMPILER_VERSION>" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="" + fi + + if [ "$SPOT_CHECK" = "True" ]; then + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS" + "gcc/7.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" + "intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" + "clang/4.0.1 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" + "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + else + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/4.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + fi +elif [ "$MACHINE" = "white" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" + IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>" + CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.2.0,ibm/xl/16.1.0" + + # Don't do pthread on white. + GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=Power8,Kepler37" + fi + +elif [ "$MACHINE" = "bowman" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.4.258 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.2.174 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/18.2.199 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=KNL" + fi + +elif [ "$MACHINE" = "mayer" ]; then + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=96 + + BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" + ARM_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/7.2.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "arm/18.4.0 $ARM_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS") + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=ARMv8-TX2" + fi + +elif [ "$MACHINE" = "blake" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" + BASE_MODULE_LIST_INTEL="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + + if [ "$SPOT_CHECK" = "True" ]; then + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/18.1.163 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS" + ) + else + COMPILERS=("intel/18.1.163 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.5.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.4.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/8.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS" + ) + + fi + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=SKX" + fi + +elif [ "$MACHINE" = "apollo" ]; then + source /projects/sems/modulefiles/utils/sems-modules-init.sh + module use /home/projects/modulefiles/local/x86-64 + module load kokkos-env + + module load sems-git + module load sems-tex + module load sems-cmake/3.5.2 + module load sems-gdb + + SKIP_HWLOC=True + + BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + + CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.0.69" + CLANG7_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.1" + NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0" + + BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" + BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" + BUILD_LIST_CLANG="Serial,Pthread,OpenMP" + + if [ "$SPOT_CHECK" = "True" ]; then + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" + "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread,OpenMP" clang++ $CUDA_WARNING_FLAGS" + "cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + else + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" + "clang/7.0 $CLANG7_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" + "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" + "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + ) + fi + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=SNB,Volta70" + fi + +else + echo "Unhandled machine $MACHINE" >&2 + exit 1 +fi + +export OMP_NUM_THREADS=8 +export OMP_PROC_BIND=spread +export OMP_PLACES=cores + +declare -i NUM_RESULTS_TO_KEEP=7 + +RESULT_ROOT_PREFIX=TestAll + +if [ "$PRINT_HELP" = "True" ]; then + echo "test_all_sandia <ARGS> <OPTIONS>:" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" + echo " Defaults to root repo containing this script" + echo "--debug: Run tests in debug. Defaults to False" + echo "--test-script: Test this script, not Kokkos" + echo "--skip-hwloc: Do not do hwloc tests" + echo "--num=N: Number of jobs to run in parallel" + echo "--spot-check: Minimal test set to issue pull request" + echo "--dry-run: Just print what would be executed" + echo "--build-only: Just do builds, don't run anything" + echo "--opt-flag=FLAG: Optimization flag (default: -O3)" + echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS" + echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS" + echo "--arch=ARCHITECTURE: overwrite architecture flags" + echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" + echo "--build-list=BUILD,BUILD,BUILD..." + echo " Provide a comma-separated list of builds instead of running all builds" + echo " Valid items:" + echo " OpenMP, Pthread, Qthreads, Serial, OpenMP_Serial, Pthread_Serial" + echo " Qthreads_Serial, Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" + echo "" + + echo "ARGS: list of expressions matching compilers to test" + echo " supported compilers sems" + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + echo " $COMPILER" + done + echo "" + + echo "Examples:" + echo " Run all tests" + echo " % test_all_sandia" + echo "" + echo " Run all gcc tests" + echo " % test_all_sandia gcc" + echo "" + echo " Run all gcc/4.8.4 and all intel tests" + echo " % test_all_sandia gcc/4.8.4 intel" + echo "" + echo " Run all tests in debug" + echo " % test_all_sandia --debug" + echo "" + echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds" + echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial" + echo "" + echo "If you want to kill the tests, do:" + echo " hit ctrl-z" + echo " % kill -9 %1" + echo + exit 0 +fi + +# Set build type. +if [ "$DEBUG" = "True" ]; then + BUILD_TYPE=debug +else + BUILD_TYPE=release +fi + +# If no args provided, do all compilers. +if [ -z "$ARGS" ]; then + ARGS='?' +fi + +# Process args to figure out which compilers to test. +COMPILERS_TO_TEST="" + +for ARG in $ARGS; do + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + + if [[ "$COMPILER" = $ARG* ]]; then + if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then + COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER" + else + echo "Tried to add $COMPILER twice" + fi + fi + done +done + +# Check if Qthreads build requested. +HAVE_QTHREADS_BUILD="False" +if [ -n "$CUSTOM_BUILD_LIST" ]; then + if [[ "$CUSTOM_BUILD_LIST" = *Qthreads* ]]; then + HAVE_QTHREADS_BUILD="True" + fi +else + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + BUILD_LIST=${ARR[2]} + if [[ "$BUILD_LIST" = *Qthreads* ]]; then + HAVE_QTHREADS_BUILD="True" + fi + done +fi + +# Ensure Qthreads path is set if Qthreads build is requested. +if [ "$HAVE_QTHREADS_BUILD" = "True" ]; then + if [ -z "$QTHREADS_PATH" ]; then + echo "Need to supply Qthreads path (--qthreads-path) when testing Qthreads backend." >&2 + exit 1 + else + # Strip trailing slashes from path. + QTHREADS_PATH=$(echo $QTHREADS_PATH | sed 's/\/*$//') + fi +fi + +# +# Functions. +# + +# get_compiler_name <COMPILER> +get_compiler_name() { + echo $1 | cut -d/ -f1 +} + +# get_compiler_version <COMPILER> +get_compiler_version() { + echo $1 | cut -d/ -f2 +} + +# Do not call directly. +get_compiler_data() { + local compiler=$1 + local item=$2 + local compiler_name=$(get_compiler_name $compiler) + local compiler_vers=$(get_compiler_version $compiler) + + local compiler_data + for compiler_data in "${COMPILERS[@]}" ; do + local arr=($compiler_data) + + if [ "$compiler" = "${arr[0]}" ]; then + echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g" + return 0 + fi + done + + # Not found. + echo "Unreconized compiler $compiler" >&2 + exit 1 +} + +# +# For all getters, usage: <GETTER> <COMPILER> +# + +get_compiler_modules() { + get_compiler_data $1 1 +} + +get_compiler_build_list() { + get_compiler_data $1 2 +} + +get_compiler_exe_name() { + get_compiler_data $1 3 +} + +get_compiler_warning_flags() { + get_compiler_data $1 4 +} + +run_cmd() { + echo "RUNNING: $*" + if [ "$DRYRUN" != "True" ]; then + eval "$* 2>&1" + fi +} + +# report_and_log_test_results <SUCCESS> <DESC> <COMMENT> +report_and_log_test_result() { + # Use sane var names. + local success=$1; local desc=$2; local comment=$3; + + if [ "$success" = "0" ]; then + echo " PASSED $desc" + echo $comment > $PASSED_DIR/$desc + else + # For failures, comment should be the name of the phase that failed. + echo " FAILED $desc" >&2 + echo $comment > $FAILED_DIR/$desc + cat ${desc}.${comment}.log + fi +} + +setup_env() { + local compiler=$1 + local compiler_modules=$(get_compiler_modules $compiler) + + module purge + + local mod + for mod in $compiler_modules; do + echo "Loading module $mod" + module load $mod 2>&1 + # It is ridiculously hard to check for the success of a loaded + # module. Module does not return error codes and piping to grep + # causes module to run in a subshell. + module list 2>&1 | grep "$mod" >& /dev/null || return 1 + done + + return 0 +} + +# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE> +single_build_and_test() { + # Use sane var names. + local compiler=$1; local build=$2; local build_type=$3; + + # Set up env. + mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" + cd $ROOT_DIR/$compiler/"${build}-$build_type" + local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') + setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + + # Set up flags. + local compiler_warning_flags=$(get_compiler_warning_flags $compiler) + local compiler_exe=$(get_compiler_exe_name $compiler) + + if [[ "$build_type" = hwloc* ]]; then + local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) + fi + + if [[ "$build" = *Qthreads* ]]; then + if [[ "$build_type" = hwloc* ]]; then + local extra_args="$extra_args --qthreads-path=${QTHREADS_PATH}_hwloc" + else + local extra_args="$extra_args --qthreads-path=$QTHREADS_PATH" + fi + fi + + if [[ "$OPT_FLAG" = "" ]]; then + OPT_FLAG="-O3" + fi + + if [[ "$build_type" = *debug* ]]; then + local extra_args="$extra_args --debug" + local cxxflags="-g $compiler_warning_flags" + local ldflags="-g" + else + local cxxflags="$OPT_FLAG $compiler_warning_flags" + local ldflags="${OPT_FLAG}" + fi + + local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}" + local ldflags="${ldflags} ${LD_FLAGS_EXTRA}" + + if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then + local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" + fi + if [[ "$KOKKOS_OPTIONS" != "" ]]; then + local extra_args="$extra_args $KOKKOS_OPTIONS" + else + local extra_args="$extra_args --with-options=enable_large_mem_tests" + fi + + echo " Starting job $desc" + + local comment="no_comment" + + if [ "$TEST_SCRIPT" = "True" ]; then + local rand=$[ 1 + $[ RANDOM % 10 ]] + sleep $rand + + if [ $rand -gt 5 ]; then + run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } + fi + else + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + local -i build_start_time=$(date +%s) + run_cmd make -j 48 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + local -i build_end_time=$(date +%s) + comment="build_time=$(($build_end_time-$build_start_time))" + + if [[ "$BUILD_ONLY" == False ]]; then + run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } + local -i run_end_time=$(date +%s) + comment="$comment run_time=$(($run_end_time-$build_end_time))" + fi + fi + + report_and_log_test_result 0 $desc "$comment" + + return 0 +} + +# wait_for_jobs <NUM-JOBS> +wait_for_jobs() { + local -i max_jobs=$1 + local -i num_active_jobs=$(jobs | wc -l) + while [ $num_active_jobs -ge $max_jobs ] + do + sleep 1 + num_active_jobs=$(jobs | wc -l) + jobs >& /dev/null + done +} + +# run_in_background <COMPILER> <BUILD> <BUILD_TYPE> +run_in_background() { + local compiler=$1 + + local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL + # Don't override command line input. + # if [[ "$BUILD_ONLY" == True ]]; then + # num_jobs=8 + # else + if [[ "$compiler" == cuda* ]]; then + num_jobs=1 + fi + if [[ "$compiler" == clang ]]; then + num_jobs=1 + fi + # fi + wait_for_jobs $num_jobs + + single_build_and_test $* & +} + +# build_and_test_all <COMPILER> +build_and_test_all() { + # Get compiler data. + local compiler=$1 + if [ -z "$CUSTOM_BUILD_LIST" ]; then + local compiler_build_list=$(get_compiler_build_list $compiler) + else + local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ') + fi + + # Do builds. + local build + for build in $compiler_build_list + do + run_in_background $compiler $build $BUILD_TYPE + + # If not cuda, do a hwloc test too. + if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then + run_in_background $compiler $build "hwloc-$BUILD_TYPE" + fi + done + + return 0 +} + +get_test_root_dir() { + local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort) + local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l) + local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP} + + if [ $num_to_delete -gt 0 ]; then + /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete) + fi + + echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S") +} + +wait_summarize_and_exit() { + wait_for_jobs 1 + + echo "#######################################################" + echo "PASSED TESTS" + echo "#######################################################" + + local passed_test + for passed_test in $(\ls -1 $PASSED_DIR | sort) + do + echo $passed_test $(cat $PASSED_DIR/$passed_test) + done + + local -i rv=0 + if [ "$(ls -A $FAILED_DIR)" ]; then + echo "#######################################################" + echo "FAILED TESTS" + echo "#######################################################" + + local failed_test + for failed_test in $(\ls -1 $FAILED_DIR | sort) + do + echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" + rv=$rv+1 + done + fi + + exit $rv +} + +# +# Main. +# + +ROOT_DIR=$(get_test_root_dir) +mkdir -p $ROOT_DIR +cd $ROOT_DIR + +PASSED_DIR=$ROOT_DIR/results/passed +FAILED_DIR=$ROOT_DIR/results/failed +mkdir -p $PASSED_DIR +mkdir -p $FAILED_DIR + +echo "Going to test compilers: " $COMPILERS_TO_TEST +for COMPILER in $COMPILERS_TO_TEST; do + echo "Testing compiler $COMPILER" + build_and_test_all $COMPILER +done + +wait_summarize_and_exit diff --git a/packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_pthread_intel b/packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel similarity index 82% rename from packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_pthread_intel rename to packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel index 3b2c72551..df370509a 100755 --- a/packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_pthread_intel +++ b/packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel @@ -1,6 +1,8 @@ #!/bin/bash -el ulimit -c 0 -module load devpack/openmpi/2.1.1/intel/17.4.196/cuda/none +module load devpack/20171203/openmpi/2.1.2/intel/18.1.163 +# Trilinos now requires cmake version >= 3.10.0 +module swap cmake/3.9.0 cmake/3.10.2 KOKKOS_BRANCH=$1 TRILINOS_UPDATE_BRANCH=$2 @@ -28,8 +30,9 @@ export JENKINS_DO_PTHREAD=ON export JENKINS_DO_SERIAL=OFF export JENKINS_DO_COMPLEX=OFF -export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX2 -mkl" -export JENKINS_ARCH_C_FLAG="-xCORE-AVX2 -mkl" +export JENKINS_ARCH=SKX +export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX512 -mkl" +export JENKINS_ARCH_C_FLAG="-xCORE-AVX512 -mkl" export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a" export LAPACK_LIBRARIES=${BLAS_LIBRARIES} @@ -37,7 +40,7 @@ export JENKINS_DO_TESTS=ON export JENKINS_DO_EXAMPLES=ON export JENKINS_DO_SHARED=ON -export QUEUE=haswell +export QUEUE=blake module load python diff --git a/packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_serial_intel b/packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel similarity index 82% rename from packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_serial_intel rename to packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel index 9ce936ae2..04f1378ce 100755 --- a/packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_serial_intel +++ b/packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel @@ -1,6 +1,8 @@ #!/bin/bash -el ulimit -c 0 -module load devpack/openmpi/2.1.1/intel/17.4.196/cuda/none +module load devpack/20171203/openmpi/2.1.2/intel/18.1.163 +# Trilinos now requires cmake version >= 3.10.0 +module swap cmake/3.9.0 cmake/3.10.2 KOKKOS_BRANCH=$1 TRILINOS_UPDATE_BRANCH=$2 @@ -28,8 +30,9 @@ export JENKINS_DO_PTHREAD=OFF export JENKINS_DO_SERIAL=ON export JENKINS_DO_COMPLEX=ON -export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX2 -mkl" -export JENKINS_ARCH_C_FLAG="-xCORE-AVX2 -mkl" +export JENKINS_ARCH=SKX +export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX512 -mkl" +export JENKINS_ARCH_C_FLAG="-xCORE-AVX512 -mkl" export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a" export LAPACK_LIBRARIES=${BLAS_LIBRARIES} @@ -37,7 +40,7 @@ export JENKINS_DO_TESTS=ON export JENKINS_DO_EXAMPLES=ON export JENKINS_DO_SHARED=ON -export QUEUE=haswell +export QUEUE=blake module load python diff --git a/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda b/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda index 2716767fe..98900c3c9 100755 --- a/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda +++ b/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda @@ -20,7 +20,10 @@ then TRILINOS_PRISTINE_BRANCH=develop fi -module load devpack/openmpi/1.10.4/gcc/5.4.0/cuda/8.0.44 +module load devpack/20180521/openmpi/2.1.2/gcc/7.2.0/cuda/9.2.88 +module swap openblas/0.2.20/gcc/7.2.0 netlib/3.8.0/gcc/7.2.0 +# Trilinos now requires cmake version >= 3.10.0 +module swap cmake/3.9.6 cmake/3.12.3 export OMP_NUM_THREADS=8 export JENKINS_DO_CUDA=ON export JENKINS_DO_OPENMP=OFF @@ -28,6 +31,7 @@ export JENKINS_DO_PTHREAD=OFF export JENKINS_DO_SERIAL=ON export JENKINS_DO_COMPLEX=OFF +export JENKINS_ARCH="Power8,Kepler37" export JENKINS_ARCH_CXX_FLAG="-mcpu=power8 -arch=sm_37" export JENKINS_ARCH_C_FLAG="-mcpu=power8" export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a;gfortran;gomp" diff --git a/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp b/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp index ff1086507..9c5244cd3 100755 --- a/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp +++ b/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp @@ -20,7 +20,10 @@ then TRILINOS_PRISTINE_BRANCH=develop fi -module load devpack/openmpi/1.10.4/gcc/5.4.0/cuda/8.0.44 +module load devpack/20180521/openmpi/2.1.2/gcc/7.2.0/cuda/9.2.88 +module swap openblas/0.2.20/gcc/7.2.0 netlib/3.8.0/gcc/7.2.0 +# Trilinos now requires cmake version >= 3.10.0 +module swap cmake/3.9.6 cmake/3.12.3 export OMP_NUM_THREADS=8 export JENKINS_DO_CUDA=OFF export JENKINS_DO_OPENMP=ON @@ -28,6 +31,7 @@ export JENKINS_DO_PTHREAD=OFF export JENKINS_DO_SERIAL=OFF export JENKINS_DO_COMPLEX=OFF +export JENKINS_ARCH="Power8" export JENKINS_ARCH_CXX_FLAG="-mcpu=power8" export JENKINS_ARCH_C_FLAG="-mcpu=power8" export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a;gfortran;gomp" -- GitLab