From b9ec061afdb0aefa3489d9beaecf99cd608f3d15 Mon Sep 17 00:00:00 2001
From: Stephane Del Pino <stephane.delpino44@gmail.com>
Date: Mon, 12 Nov 2018 22:44:00 +0100
Subject: [PATCH] git subrepo pull packages/kokkos

subrepo:
  subdir:   "packages/kokkos"
  merged:   "9614f72c7"
upstream:
  origin:   "git@github.com:kokkos/kokkos.git"
  branch:   "master"
  commit:   "9614f72c7"
git-subrepo:
  version:  "0.4.0"
  origin:   "git@github.com:ingydotnet/git-subrepo.git"
  commit:   "5d6aba9"
---
 packages/kokkos/.gitrepo                      |    7 +-
 packages/kokkos/.travis.yml                   |   36 +-
 packages/kokkos/CHANGELOG.md                  |   63 +
 packages/kokkos/CMakeLists.txt                |    7 +-
 packages/kokkos/Makefile.kokkos               |   85 +-
 packages/kokkos/README                        |   33 +-
 .../kokkos/algorithms/src/Kokkos_Random.hpp   |    6 +
 .../kokkos/algorithms/src/Kokkos_Sort.hpp     |   10 +-
 packages/kokkos/benchmarks/gups/Makefile      |   41 +
 .../kokkos/benchmarks/gups/gups-kokkos.cc     |  199 ++
 packages/kokkos/benchmarks/stream/Makefile    |   41 +
 .../kokkos/benchmarks/stream/stream-kokkos.cc |  265 +++
 packages/kokkos/bin/hpcbind                   |   66 +-
 packages/kokkos/bin/nvcc_wrapper              |  111 +-
 packages/kokkos/cmake/kokkos_build.cmake      |    4 +
 packages/kokkos/cmake/kokkos_functions.cmake  |    2 +-
 packages/kokkos/cmake/kokkos_options.cmake    |   13 +-
 packages/kokkos/cmake/kokkos_settings.cmake   |   11 +-
 packages/kokkos/config/test_all_sandia        |   11 +-
 .../kokkos/containers/src/Kokkos_DualView.hpp |  335 ++-
 .../containers/src/Kokkos_DynRankView.hpp     |  100 +-
 .../containers/src/Kokkos_OffsetView.hpp      | 1895 +++++++++++++++++
 .../containers/src/Kokkos_StaticCrsGraph.hpp  |    4 +-
 .../kokkos/containers/src/Kokkos_Vector.hpp   |   16 +-
 .../containers/unit_tests/CMakeLists.txt      |    4 +
 .../kokkos/containers/unit_tests/Makefile     |    5 +
 .../containers/unit_tests/TestDynViewAPI.hpp  |   64 +
 .../containers/unit_tests/TestOffsetView.hpp  |  426 ++++
 .../containers/unit_tests/TestScatterView.hpp |   27 +-
 .../unit_tests/TestStaticCrsGraph.hpp         |    1 +
 .../unit_tests/cuda/TestCuda_OffsetView.cpp   |   47 +
 .../openmp/TestOpenMP_OffsetView.cpp          |   47 +
 .../unit_tests/rocm/TestROCm_Category.hpp     |    2 +-
 .../serial/TestSerial_OffsetView.cpp          |   46 +
 .../threads/TestThreads_OffsetView.cpp        |   47 +
 packages/kokkos/core/src/CMakeLists.txt       |    4 +
 .../kokkos/core/src/Cuda/Kokkos_CudaExec.hpp  |    8 +-
 .../kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp |    7 +
 .../kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp |    6 +-
 .../core/src/Cuda/Kokkos_Cuda_Internal.hpp    |  221 +-
 .../core/src/Cuda/Kokkos_Cuda_Locks.hpp       |    3 +
 .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp    |  240 ++-
 .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp  |  319 ++-
 .../kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp |  124 +-
 .../Kokkos_Cuda_Version_9_8_Compatibility.hpp |   13 +-
 .../kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp |    2 +
 .../core/src/KokkosExp_MDRangePolicy.hpp      |    6 +-
 packages/kokkos/core/src/Kokkos_Array.hpp     |    8 +-
 packages/kokkos/core/src/Kokkos_Concepts.hpp  |    5 +-
 packages/kokkos/core/src/Kokkos_CopyViews.hpp |  252 ++-
 .../kokkos/core/src/Kokkos_ExecPolicy.hpp     |    8 +
 packages/kokkos/core/src/Kokkos_Layout.hpp    |  146 ++
 packages/kokkos/core/src/Kokkos_Macros.hpp    |    7 +-
 .../core/src/Kokkos_Parallel_Reduce.hpp       |   17 +-
 .../kokkos/core/src/Kokkos_ScratchSpace.hpp   |   49 +
 packages/kokkos/core/src/Kokkos_Serial.hpp    |   22 +-
 .../kokkos/core/src/Kokkos_TaskScheduler.hpp  |    2 -
 packages/kokkos/core/src/Kokkos_View.hpp      | 1065 ++++-----
 packages/kokkos/core/src/Makefile             |   36 +-
 .../core/src/Makefile.generate_build_files    |   34 +-
 .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp    |    4 -
 .../core/src/OpenMP/Kokkos_OpenMP_Team.hpp    |   55 +-
 .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp |    3 +-
 .../core/src/ROCm/Kokkos_ROCm_Config.hpp      |    4 +-
 .../kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp |   25 +-
 .../kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp |   21 +-
 .../core/src/ROCm/Kokkos_ROCm_Parallel.hpp    |  393 +++-
 .../core/src/ROCm/Kokkos_ROCm_Reduce.hpp      |    9 +-
 .../core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp  |  224 +-
 .../kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp |  120 +-
 .../core/src/ROCm/Kokkos_ROCm_Space.cpp       |    4 +
 .../kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp |    5 +-
 .../core/src/Threads/Kokkos_ThreadsTeam.hpp   |  101 +-
 ...IInst_int64_t_double_LayoutLeft_Rank1.cpp} |    0
 ...IInst_int64_t_double_LayoutLeft_Rank2.cpp} |    0
 ...IInst_int64_t_double_LayoutLeft_Rank3.cpp} |    0
 ...IInst_int64_t_double_LayoutLeft_Rank4.cpp} |    0
 ...IInst_int64_t_double_LayoutLeft_Rank5.cpp} |    0
 ...IInst_int64_t_double_LayoutLeft_Rank8.cpp} |    0
 ...Inst_int64_t_double_LayoutRight_Rank1.cpp} |    0
 ...Inst_int64_t_double_LayoutRight_Rank2.cpp} |    0
 ...Inst_int64_t_double_LayoutRight_Rank3.cpp} |    0
 ...Inst_int64_t_double_LayoutRight_Rank4.cpp} |    0
 ...Inst_int64_t_double_LayoutRight_Rank5.cpp} |    0
 ...Inst_int64_t_double_LayoutRight_Rank8.cpp} |    0
 ...nst_int64_t_double_LayoutStride_Rank1.cpp} |    0
 ...nst_int64_t_double_LayoutStride_Rank2.cpp} |    0
 ...nst_int64_t_double_LayoutStride_Rank3.cpp} |    0
 ...nst_int64_t_double_LayoutStride_Rank4.cpp} |    0
 ...nst_int64_t_double_LayoutStride_Rank5.cpp} |    0
 ...nst_int64_t_double_LayoutStride_Rank8.cpp} |    0
 ...TIInst_int64_t_float_LayoutLeft_Rank1.cpp} |    0
 ...TIInst_int64_t_float_LayoutLeft_Rank2.cpp} |    0
 ...TIInst_int64_t_float_LayoutLeft_Rank3.cpp} |    0
 ...TIInst_int64_t_float_LayoutLeft_Rank4.cpp} |    0
 ...TIInst_int64_t_float_LayoutLeft_Rank5.cpp} |    0
 ...TIInst_int64_t_float_LayoutLeft_Rank8.cpp} |    0
 ...IInst_int64_t_float_LayoutRight_Rank1.cpp} |    0
 ...IInst_int64_t_float_LayoutRight_Rank2.cpp} |    0
 ...IInst_int64_t_float_LayoutRight_Rank3.cpp} |    0
 ...IInst_int64_t_float_LayoutRight_Rank4.cpp} |    0
 ...IInst_int64_t_float_LayoutRight_Rank5.cpp} |    0
 ...IInst_int64_t_float_LayoutRight_Rank8.cpp} |    0
 ...Inst_int64_t_float_LayoutStride_Rank1.cpp} |    0
 ...Inst_int64_t_float_LayoutStride_Rank2.cpp} |    0
 ...Inst_int64_t_float_LayoutStride_Rank3.cpp} |    0
 ...Inst_int64_t_float_LayoutStride_Rank4.cpp} |    0
 ...Inst_int64_t_float_LayoutStride_Rank5.cpp} |    0
 ...Inst_int64_t_float_LayoutStride_Rank8.cpp} |    0
 ...Inst_int64_t_int64_t_LayoutLeft_Rank1.cpp} |    0
 ...Inst_int64_t_int64_t_LayoutLeft_Rank2.cpp} |    0
 ...Inst_int64_t_int64_t_LayoutLeft_Rank3.cpp} |    0
 ...Inst_int64_t_int64_t_LayoutLeft_Rank4.cpp} |    0
 ...Inst_int64_t_int64_t_LayoutLeft_Rank5.cpp} |    0
 ...Inst_int64_t_int64_t_LayoutLeft_Rank8.cpp} |    0
 ...nst_int64_t_int64_t_LayoutRight_Rank1.cpp} |    0
 ...nst_int64_t_int64_t_LayoutRight_Rank2.cpp} |    0
 ...nst_int64_t_int64_t_LayoutRight_Rank3.cpp} |    0
 ...nst_int64_t_int64_t_LayoutRight_Rank4.cpp} |    0
 ...nst_int64_t_int64_t_LayoutRight_Rank5.cpp} |    0
 ...nst_int64_t_int64_t_LayoutRight_Rank8.cpp} |    0
 ...st_int64_t_int64_t_LayoutStride_Rank1.cpp} |    0
 ...st_int64_t_int64_t_LayoutStride_Rank2.cpp} |    0
 ...st_int64_t_int64_t_LayoutStride_Rank3.cpp} |    0
 ...st_int64_t_int64_t_LayoutStride_Rank4.cpp} |    0
 ...st_int64_t_int64_t_LayoutStride_Rank5.cpp} |    0
 ...st_int64_t_int64_t_LayoutStride_Rank8.cpp} |    0
 ...yETIInst_int64_t_int_LayoutLeft_Rank1.cpp} |    0
 ...yETIInst_int64_t_int_LayoutLeft_Rank2.cpp} |    0
 ...yETIInst_int64_t_int_LayoutLeft_Rank3.cpp} |    0
 ...yETIInst_int64_t_int_LayoutLeft_Rank4.cpp} |    0
 ...yETIInst_int64_t_int_LayoutLeft_Rank5.cpp} |    0
 ...yETIInst_int64_t_int_LayoutLeft_Rank8.cpp} |    0
 ...ETIInst_int64_t_int_LayoutRight_Rank1.cpp} |    0
 ...ETIInst_int64_t_int_LayoutRight_Rank2.cpp} |    0
 ...ETIInst_int64_t_int_LayoutRight_Rank3.cpp} |    0
 ...ETIInst_int64_t_int_LayoutRight_Rank4.cpp} |    0
 ...ETIInst_int64_t_int_LayoutRight_Rank5.cpp} |    0
 ...ETIInst_int64_t_int_LayoutRight_Rank8.cpp} |    0
 ...TIInst_int64_t_int_LayoutStride_Rank1.cpp} |    0
 ...TIInst_int64_t_int_LayoutStride_Rank2.cpp} |    0
 ...TIInst_int64_t_int_LayoutStride_Rank3.cpp} |    0
 ...TIInst_int64_t_int_LayoutStride_Rank4.cpp} |    0
 ...TIInst_int64_t_int_LayoutStride_Rank5.cpp} |    0
 ...TIInst_int64_t_int_LayoutStride_Rank8.cpp} |    0
 ...pyETIInst_int_double_LayoutLeft_Rank1.cpp} |    0
 ...pyETIInst_int_double_LayoutLeft_Rank2.cpp} |    0
 ...pyETIInst_int_double_LayoutLeft_Rank3.cpp} |    0
 ...pyETIInst_int_double_LayoutLeft_Rank4.cpp} |    0
 ...pyETIInst_int_double_LayoutLeft_Rank5.cpp} |    0
 ...pyETIInst_int_double_LayoutLeft_Rank8.cpp} |    0
 ...yETIInst_int_double_LayoutRight_Rank1.cpp} |    0
 ...yETIInst_int_double_LayoutRight_Rank2.cpp} |    0
 ...yETIInst_int_double_LayoutRight_Rank3.cpp} |    0
 ...yETIInst_int_double_LayoutRight_Rank4.cpp} |    0
 ...yETIInst_int_double_LayoutRight_Rank5.cpp} |    0
 ...yETIInst_int_double_LayoutRight_Rank8.cpp} |    0
 ...ETIInst_int_double_LayoutStride_Rank1.cpp} |    0
 ...ETIInst_int_double_LayoutStride_Rank2.cpp} |    0
 ...ETIInst_int_double_LayoutStride_Rank3.cpp} |    0
 ...ETIInst_int_double_LayoutStride_Rank4.cpp} |    0
 ...ETIInst_int_double_LayoutStride_Rank5.cpp} |    0
 ...ETIInst_int_double_LayoutStride_Rank8.cpp} |    0
 ...opyETIInst_int_float_LayoutLeft_Rank1.cpp} |    0
 ...opyETIInst_int_float_LayoutLeft_Rank2.cpp} |    0
 ...opyETIInst_int_float_LayoutLeft_Rank3.cpp} |    0
 ...opyETIInst_int_float_LayoutLeft_Rank4.cpp} |    0
 ...opyETIInst_int_float_LayoutLeft_Rank5.cpp} |    0
 ...opyETIInst_int_float_LayoutLeft_Rank8.cpp} |    0
 ...pyETIInst_int_float_LayoutRight_Rank1.cpp} |    0
 ...pyETIInst_int_float_LayoutRight_Rank2.cpp} |    0
 ...pyETIInst_int_float_LayoutRight_Rank3.cpp} |    0
 ...pyETIInst_int_float_LayoutRight_Rank4.cpp} |    0
 ...pyETIInst_int_float_LayoutRight_Rank5.cpp} |    0
 ...pyETIInst_int_float_LayoutRight_Rank8.cpp} |    0
 ...yETIInst_int_float_LayoutStride_Rank1.cpp} |    0
 ...yETIInst_int_float_LayoutStride_Rank2.cpp} |    0
 ...yETIInst_int_float_LayoutStride_Rank3.cpp} |    0
 ...yETIInst_int_float_LayoutStride_Rank4.cpp} |    0
 ...yETIInst_int_float_LayoutStride_Rank5.cpp} |    0
 ...yETIInst_int_float_LayoutStride_Rank8.cpp} |    0
 ...yETIInst_int_int64_t_LayoutLeft_Rank1.cpp} |    0
 ...yETIInst_int_int64_t_LayoutLeft_Rank2.cpp} |    0
 ...yETIInst_int_int64_t_LayoutLeft_Rank3.cpp} |    0
 ...yETIInst_int_int64_t_LayoutLeft_Rank4.cpp} |    0
 ...yETIInst_int_int64_t_LayoutLeft_Rank5.cpp} |    0
 ...yETIInst_int_int64_t_LayoutLeft_Rank8.cpp} |    0
 ...ETIInst_int_int64_t_LayoutRight_Rank1.cpp} |    0
 ...ETIInst_int_int64_t_LayoutRight_Rank2.cpp} |    0
 ...ETIInst_int_int64_t_LayoutRight_Rank3.cpp} |    0
 ...ETIInst_int_int64_t_LayoutRight_Rank4.cpp} |    0
 ...ETIInst_int_int64_t_LayoutRight_Rank5.cpp} |    0
 ...ETIInst_int_int64_t_LayoutRight_Rank8.cpp} |    0
 ...TIInst_int_int64_t_LayoutStride_Rank1.cpp} |    0
 ...TIInst_int_int64_t_LayoutStride_Rank2.cpp} |    0
 ...TIInst_int_int64_t_LayoutStride_Rank3.cpp} |    0
 ...TIInst_int_int64_t_LayoutStride_Rank4.cpp} |    0
 ...TIInst_int_int64_t_LayoutStride_Rank5.cpp} |    0
 ...TIInst_int_int64_t_LayoutStride_Rank8.cpp} |    0
 ...wCopyETIInst_int_int_LayoutLeft_Rank1.cpp} |    0
 ...wCopyETIInst_int_int_LayoutLeft_Rank2.cpp} |    0
 ...wCopyETIInst_int_int_LayoutLeft_Rank3.cpp} |    0
 ...wCopyETIInst_int_int_LayoutLeft_Rank4.cpp} |    0
 ...wCopyETIInst_int_int_LayoutLeft_Rank5.cpp} |    0
 ...wCopyETIInst_int_int_LayoutLeft_Rank8.cpp} |    0
 ...CopyETIInst_int_int_LayoutRight_Rank1.cpp} |    0
 ...CopyETIInst_int_int_LayoutRight_Rank2.cpp} |    0
 ...CopyETIInst_int_int_LayoutRight_Rank3.cpp} |    0
 ...CopyETIInst_int_int_LayoutRight_Rank4.cpp} |    0
 ...CopyETIInst_int_int_LayoutRight_Rank5.cpp} |    0
 ...CopyETIInst_int_int_LayoutRight_Rank8.cpp} |    0
 ...opyETIInst_int_int_LayoutStride_Rank1.cpp} |    0
 ...opyETIInst_int_int_LayoutStride_Rank2.cpp} |    0
 ...opyETIInst_int_int_LayoutStride_Rank3.cpp} |    0
 ...opyETIInst_int_int_LayoutStride_Rank4.cpp} |    0
 ...opyETIInst_int_int_LayoutStride_Rank5.cpp} |    0
 ...opyETIInst_int_int_LayoutStride_Rank8.cpp} |    0
 .../eti/ROCm/Makefile.eti_Experimental::ROCm  |  288 ---
 .../core/src/eti/ROCm/Makefile.eti_ROCm       |  288 +++
 .../Kokkos_Atomic_Compare_Exchange_Strong.hpp |   13 +-
 .../core/src/impl/Kokkos_Atomic_Exchange.hpp  |   13 +-
 .../core/src/impl/Kokkos_Atomic_Fetch_Add.hpp |   14 +-
 .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp |   13 +-
 .../core/src/impl/Kokkos_Atomic_Generic.hpp   |   26 +-
 .../core/src/impl/Kokkos_Atomic_Windows.hpp   |   12 +-
 packages/kokkos/core/src/impl/Kokkos_Core.cpp |   16 +-
 .../kokkos/core/src/impl/Kokkos_HBWSpace.cpp  |    2 +
 .../kokkos/core/src/impl/Kokkos_HostSpace.cpp |    2 +
 .../core/src/impl/Kokkos_HostThreadTeam.hpp   |   33 +-
 .../kokkos/core/src/impl/Kokkos_OldMacros.hpp |   15 +-
 .../core/src/impl/Kokkos_Serial_Task.cpp      |   20 +-
 .../kokkos/core/src/impl/Kokkos_ViewArray.hpp |  114 +-
 .../core/src/impl/Kokkos_ViewLayoutTiled.hpp  |  945 ++++++++
 .../core/src/impl/Kokkos_ViewMapping.hpp      |  303 ++-
 .../kokkos/core/src/impl/Kokkos_ViewTile.hpp  |    4 +-
 .../kokkos/core/src/impl/Kokkos_hwloc.cpp     |   26 +-
 packages/kokkos/core/src/kokkos.pc.in         |   71 +
 packages/kokkos/core/unit_test/CMakeLists.txt |    2 +
 packages/kokkos/core/unit_test/Makefile       |   52 +-
 packages/kokkos/core/unit_test/TestAtomic.hpp |   11 +-
 packages/kokkos/core/unit_test/TestCXX11.hpp  |   16 +-
 .../kokkos/core/unit_test/TestComplex.hpp     |    2 +-
 .../kokkos/core/unit_test/TestMDRange.hpp     |  212 ++
 .../kokkos/core/unit_test/TestMDRange_a.hpp   |    4 +
 .../kokkos/core/unit_test/TestMDRange_b.hpp   |    3 +-
 .../kokkos/core/unit_test/TestMDRange_c.hpp   |    4 +-
 .../kokkos/core/unit_test/TestMDRange_d.hpp   |    4 +
 .../kokkos/core/unit_test/TestMDRange_e.hpp   |    4 +
 .../kokkos/core/unit_test/TestMemoryPool.hpp  |    2 +
 packages/kokkos/core/unit_test/TestReduce.hpp |    3 +
 .../core/unit_test/TestReduceDeviceView.hpp   |  131 ++
 .../kokkos/core/unit_test/TestReducers.hpp    |   14 +-
 packages/kokkos/core/unit_test/TestScan.hpp   |   12 +-
 packages/kokkos/core/unit_test/TestTeam.hpp   |  169 +-
 .../core/unit_test/TestTeamTeamSize.hpp       |  146 ++
 .../kokkos/core/unit_test/TestTeamVector.hpp  |   68 +-
 .../kokkos/core/unit_test/TestViewAPI_a.hpp   |    2 -
 .../kokkos/core/unit_test/TestViewAPI_b.hpp   |    2 -
 .../kokkos/core/unit_test/TestViewAPI_c.hpp   |    2 -
 .../kokkos/core/unit_test/TestViewAPI_d.hpp   |    2 -
 .../kokkos/core/unit_test/TestViewCopy.hpp    |  155 ++
 .../TestViewLayoutStrideAssignment.hpp        |  740 +++++++
 .../core/unit_test/TestViewLayoutTiled.hpp    | 1215 +++++++++++
 .../cuda/TestCudaHostPinned_ViewCopy.cpp      |   45 +
 .../unit_test/cuda/TestCudaUVM_ViewCopy.cpp   |   45 +
 .../core/unit_test/cuda/TestCuda_Other.cpp    |    1 +
 .../cuda/TestCuda_Reductions_DeviceView.cpp   |   45 +
 .../core/unit_test/cuda/TestCuda_Team.cpp     |   16 +
 .../unit_test/cuda/TestCuda_TeamScratch.cpp   |    4 +
 .../unit_test/cuda/TestCuda_TeamTeamSize.cpp  |   45 +
 .../TestCuda_ViewLayoutStrideAssignment.cpp   |   46 +
 .../unit_test/openmp/TestOpenMP_Other.cpp     |    1 +
 .../TestOpenMP_Reductions_DeviceView.cpp      |   45 +
 .../core/unit_test/openmp/TestOpenMP_Team.cpp |   15 +
 .../openmp/TestOpenMP_TeamScratch.cpp         |    3 +
 .../openmp/TestOpenMP_TeamTeamSize.cpp        |   46 +
 .../TestOpenMP_ViewLayoutStrideAssignment.cpp |   46 +
 .../rocm/TestROCmHostPinned_ViewCopy.cpp      |   45 +
 .../core/unit_test/rocm/TestROCm_Crs.cpp      |   47 +
 .../rocm/TestROCm_MDRangeReduce_a.cpp         |   54 +
 .../rocm/TestROCm_MDRangeReduce_b.cpp         |   54 +
 .../rocm/TestROCm_MDRangeReduce_c.cpp         |   54 +
 .../rocm/TestROCm_MDRangeReduce_d.cpp         |   54 +
 .../rocm/TestROCm_MDRangeReduce_e.cpp         |   54 +
 .../unit_test/rocm/TestROCm_SubView_c13.cpp   |   54 +
 .../rocm/TestROCm_TeamReductionScan.cpp       |    2 -
 .../unit_test/rocm/TestROCm_TeamScratch.cpp   |    4 +
 .../unit_test/rocm/TestROCm_TeamTeamSize.cpp  |   49 +
 .../unit_test/serial/TestSerial_Other.cpp     |    1 +
 .../TestSerial_Reductions_DeviceView.cpp      |   45 +
 .../core/unit_test/serial/TestSerial_Team.cpp |   15 +
 .../serial/TestSerial_TeamScratch.cpp         |    4 +
 .../serial/TestSerial_TeamTeamSize.cpp        |   45 +
 .../TestSerial_ViewLayoutStrideAssignment.cpp |   46 +
 .../kokkos/core/unit_test/standalone/Makefile |   55 +
 .../unit_test/standalone/UnitTestMainInit.cpp |   71 +
 .../unit_test/threads/TestThreads_Other.cpp   |    1 +
 .../TestThreads_Reductions_DeviceView.cpp     |   45 +
 .../unit_test/threads/TestThreads_Team.cpp    |   15 +
 .../threads/TestThreads_TeamScratch.cpp       |    4 +
 .../threads/TestThreads_TeamTeamSize.cpp      |   47 +
 ...TestThreads_ViewLayoutStrideAssignment.cpp |   46 +
 packages/kokkos/doc/kokkos-promotion.txt      |   24 +-
 .../01_thread_teams/thread_teams.cpp          |   19 +-
 .../nested_parallel_for.cpp                   |    3 +-
 .../kokkos/example/virtual_functions/Makefile |   55 +
 .../example/virtual_functions/classes.cpp     |   26 +
 .../example/virtual_functions/classes.hpp     |   39 +
 .../kokkos/example/virtual_functions/main.cpp |   36 +
 packages/kokkos/generate_makefile.bash        |   23 +-
 packages/kokkos/master_history.txt            |    1 +
 .../eti/generate_view_copy_cpp_files_write    |    8 +-
 .../scripts/testing_scripts/test_all_sandia   |  790 +++++++
 ...=> blake_jenkins_run_script_pthread_intel} |   11 +-
 ... => blake_jenkins_run_script_serial_intel} |   11 +-
 .../white_run_jenkins_script_cuda             |    6 +-
 .../white_run_jenkins_script_omp              |    6 +-
 317 files changed, 13238 insertions(+), 1978 deletions(-)
 create mode 100644 packages/kokkos/benchmarks/gups/Makefile
 create mode 100644 packages/kokkos/benchmarks/gups/gups-kokkos.cc
 create mode 100644 packages/kokkos/benchmarks/stream/Makefile
 create mode 100644 packages/kokkos/benchmarks/stream/stream-kokkos.cc
 create mode 100644 packages/kokkos/containers/src/Kokkos_OffsetView.hpp
 create mode 100644 packages/kokkos/containers/unit_tests/TestOffsetView.hpp
 create mode 100644 packages/kokkos/containers/unit_tests/cuda/TestCuda_OffsetView.cpp
 create mode 100644 packages/kokkos/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp
 create mode 100644 packages/kokkos/containers/unit_tests/serial/TestSerial_OffsetView.cpp
 create mode 100644 packages/kokkos/containers/unit_tests/threads/TestThreads_OffsetView.cpp
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp} (100%)
 rename packages/kokkos/core/src/eti/ROCm/{Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp => Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp} (100%)
 delete mode 100644 packages/kokkos/core/src/eti/ROCm/Makefile.eti_Experimental::ROCm
 create mode 100644 packages/kokkos/core/src/eti/ROCm/Makefile.eti_ROCm
 create mode 100644 packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp
 create mode 100644 packages/kokkos/core/src/kokkos.pc.in
 create mode 100644 packages/kokkos/core/unit_test/TestReduceDeviceView.hpp
 create mode 100644 packages/kokkos/core/unit_test/TestTeamTeamSize.hpp
 create mode 100644 packages/kokkos/core/unit_test/TestViewCopy.hpp
 create mode 100644 packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp
 create mode 100644 packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp
 create mode 100644 packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy.cpp
 create mode 100644 packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy.cpp
 create mode 100644 packages/kokkos/core/unit_test/cuda/TestCuda_Reductions_DeviceView.cpp
 create mode 100644 packages/kokkos/core/unit_test/cuda/TestCuda_TeamTeamSize.cpp
 create mode 100644 packages/kokkos/core/unit_test/cuda/TestCuda_ViewLayoutStrideAssignment.cpp
 create mode 100644 packages/kokkos/core/unit_test/openmp/TestOpenMP_Reductions_DeviceView.cpp
 create mode 100644 packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamTeamSize.cpp
 create mode 100644 packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewLayoutStrideAssignment.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewCopy.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_Crs.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_a.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_b.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_c.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_d.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_e.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_SubView_c13.cpp
 create mode 100644 packages/kokkos/core/unit_test/rocm/TestROCm_TeamTeamSize.cpp
 create mode 100644 packages/kokkos/core/unit_test/serial/TestSerial_Reductions_DeviceView.cpp
 create mode 100644 packages/kokkos/core/unit_test/serial/TestSerial_TeamTeamSize.cpp
 create mode 100644 packages/kokkos/core/unit_test/serial/TestSerial_ViewLayoutStrideAssignment.cpp
 create mode 100644 packages/kokkos/core/unit_test/standalone/Makefile
 create mode 100644 packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp
 create mode 100644 packages/kokkos/core/unit_test/threads/TestThreads_Reductions_DeviceView.cpp
 create mode 100644 packages/kokkos/core/unit_test/threads/TestThreads_TeamTeamSize.cpp
 create mode 100644 packages/kokkos/core/unit_test/threads/TestThreads_ViewLayoutStrideAssignment.cpp
 create mode 100644 packages/kokkos/example/virtual_functions/Makefile
 create mode 100644 packages/kokkos/example/virtual_functions/classes.cpp
 create mode 100644 packages/kokkos/example/virtual_functions/classes.hpp
 create mode 100644 packages/kokkos/example/virtual_functions/main.cpp
 create mode 100755 packages/kokkos/scripts/testing_scripts/test_all_sandia
 rename packages/kokkos/scripts/trilinos-integration/{shepard_jenkins_run_script_pthread_intel => blake_jenkins_run_script_pthread_intel} (82%)
 rename packages/kokkos/scripts/trilinos-integration/{shepard_jenkins_run_script_serial_intel => blake_jenkins_run_script_serial_intel} (82%)

diff --git a/packages/kokkos/.gitrepo b/packages/kokkos/.gitrepo
index e20dc392b..ef0c50a60 100644
--- a/packages/kokkos/.gitrepo
+++ b/packages/kokkos/.gitrepo
@@ -6,6 +6,7 @@
 [subrepo]
 	remote = git@github.com:kokkos/kokkos.git
 	branch = master
-	commit = d3a941925cbfb71785d8ea68259123ed52d3f9da
-	parent = e02f01f376b1594c9768e06f70b637965d594da9
-	cmdver = 0.3.1
+	commit = 9614f72c75aa2131d56900511e5eebae54a7bd8b
+	parent = 7fc65e3330cc86e88570067a4f99f6d794992ac1
+	cmdver = 0.4.0
+	method = merge
diff --git a/packages/kokkos/.travis.yml b/packages/kokkos/.travis.yml
index 2734954ad..bdeaf4762 100644
--- a/packages/kokkos/.travis.yml
+++ b/packages/kokkos/.travis.yml
@@ -6,18 +6,13 @@ os:
   - linux
   - osx
 
-addons:
-  apt:
-    sources:
-      - ubuntu-toolchain-r-test
-    packages:
-      - cmake
-      - clang
-
 compiler:
   - gcc
   - clang
 
+cache:
+  - ccache
+
 env:
   - THREADING="serial"
   - THREADING="openmp"
@@ -25,22 +20,39 @@ env:
 
 # Apple GCC does not support OpenMP.  GCC with OpenMP requires Homebrew.
 # Apple Clang does not support OpenMP.  Clang with OpenMP requires Homebrew.
-# Clang OpenMP support is not always available.
 matrix:
   exclude:
-    - compiler: clang
-      env: THREADING="openmp"
     - os: osx
       env: THREADING="openmp"
     - os: osx
       compiler: gcc
 
+before_script:
+  - if [[ ${TRAVIS_OS_NAME} == "osx" ]]; then
+      brew update;
+      export HOMEBREW_NO_AUTO_UPDATE=1;
+      brew ls --versions ccache   > /dev/null || brew install ccache;
+      export PATH=/usr/local/opt/ccache/libexec:$PATH;
+    fi
+  - ccache -z
+
 script:
   - export OMP_NUM_THREADS=2
   - export OMP_PLACES=threads
   - export OMP_PROC_BIND=spread
+  # LD_LIBRARY_PATH workaround to find clang's libomp: https://github.com/travis-ci/travis-ci/issues/8613 
+  - if [[ ${CC} = clang ]]; then export LD_LIBRARY_PATH=/usr/local/clang/lib${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH; fi
+  # enable ccache for clang on linux and add CCACHE_CPP2 to avoid 'Argument unused during compilation -I...' warning
+  - if [[ ${TRAVIS_OS_NAME} = linux && ${CC} = clang ]]; then
+      ln -s /usr/bin/ccache $HOME/bin/clang++;
+      export CCACHE_CPP2=yes;
+      GENERATE_OPTS="--gcc-toolchain=/usr";
+    fi
   - mkdir build
   - cd build
-  - ../generate_makefile.bash --compiler=$CXX --with-$THREADING --with-options=compiler_warnings
+  - ../generate_makefile.bash --compiler=$CXX --with-$THREADING --with-options=compiler_warnings ${GENERATE_OPTS}
   - make
   - make test
+
+after_success:
+  - ccache -s
diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md
index 145cc6270..5564096ea 100644
--- a/packages/kokkos/CHANGELOG.md
+++ b/packages/kokkos/CHANGELOG.md
@@ -1,5 +1,68 @@
 # Change Log
 
+## [2.7.24](https://github.com/kokkos/kokkos/tree/2.7.24) (2018-11-04)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.00...2.7.24)
+
+**Implemented enhancements:**
+
+- DualView: Add non-templated functions for sync, need\_sync, view, modify [\#1858](https://github.com/kokkos/kokkos/issues/1858)
+- DualView: Avoid needlessly allocates and initializes modify\_host and modify\_device flag views [\#1831](https://github.com/kokkos/kokkos/issues/1831)
+- DualView: Incorrect deduction of "not device type" [\#1659](https://github.com/kokkos/kokkos/issues/1659)
+- BuildSystem: Add KOKKOS\_ENABLE\_CXX14 and KOKKOS\_ENABLE\_CXX17 [\#1602](https://github.com/kokkos/kokkos/issues/1602)
+- BuildSystem: Installed kokkos\_generated\_settings.cmake contains build directories instead of install directories [\#1838](https://github.com/kokkos/kokkos/issues/1838)
+- BuildSystem: KOKKOS\_ARCH: add ticks to printout of improper arch setting [\#1649](https://github.com/kokkos/kokkos/issues/1649)
+- BuildSystem: Make core/src/Makefile for Cuda use needed nvcc\_wrapper [\#1296](https://github.com/kokkos/kokkos/issues/1296)
+- Build: Support PGI as host compiler for NVCC [\#1828](https://github.com/kokkos/kokkos/issues/1828)
+- Build: Many Warnings Fixed e.g.[\#1786](https://github.com/kokkos/kokkos/issues/1786)
+- Capability: OffsetView with non-zero begin index [\#567](https://github.com/kokkos/kokkos/issues/567)
+- Capability: Reductions into device side view [\#1788](https://github.com/kokkos/kokkos/issues/1788)
+- Capability: Add max\_size to Kokkos::Array [\#1760](https://github.com/kokkos/kokkos/issues/1760)
+- Capability: View Assignment: LayoutStride -\> LayoutLeft and LayoutStride -\> LayoutRight [\#1594](https://github.com/kokkos/kokkos/issues/1594)
+- Capability: Atomic function allow implicit conversion of update argument [\#1571](https://github.com/kokkos/kokkos/issues/1571)
+- Capability: Add team\_size\_max with tagged functors [\#663](https://github.com/kokkos/kokkos/issues/663)
+- Capability: Fix allignment of views from Kokkos\_ScratchSpace should use different alignment [\#1700](https://github.com/kokkos/kokkos/issues/1700)
+- Capabilitiy: create\_mirror\_view\_and\_copy for DynRankView [\#1651](https://github.com/kokkos/kokkos/issues/1651)
+- Capability: DeepCopy HBWSpace / HostSpace [\#548](https://github.com/kokkos/kokkos/issues/548)
+- ROCm: support team vector scan  [\#1645](https://github.com/kokkos/kokkos/issues/1645)
+- ROCm:  Merge from rocm-hackathon2 [\#1636](https://github.com/kokkos/kokkos/issues/1636)
+- ROCm:  Add ParallelScanWithTotal [\#1611](https://github.com/kokkos/kokkos/issues/1611)
+- ROCm: Implement MDRange in ROCm [\#1314](https://github.com/kokkos/kokkos/issues/1314)
+- ROCm: Implement Reducers for Nested Parallelism Levels [\#963](https://github.com/kokkos/kokkos/issues/963)
+- ROCm: Add asynchronous deep copy [\#959](https://github.com/kokkos/kokkos/issues/959)
+- Tests: Memory pool test seems to allocate 8GB [\#1830](https://github.com/kokkos/kokkos/issues/1830)
+- Tests: Add unit\_test for team\_broadcast [\#734](https://github.com/kokkos/kokkos/issues/734)
+
+**Fixed bugs:**
+
+- BuildSystem: Makefile.kokkos gets gcc-toolchain wrong if gcc is cached [\#1841](https://github.com/kokkos/kokkos/issues/1841)
+- BuildSystem: kokkos\_generated\_settings.cmake placement is inconsistent [\#1771](https://github.com/kokkos/kokkos/issues/1771)
+- BuildSystem: Invalid escape sequence \. in kokkos\_functions.cmake [\#1661](https://github.com/kokkos/kokkos/issues/1661)
+- BuildSystem: Problem in Kokkos generated cmake file [\#1770](https://github.com/kokkos/kokkos/issues/1770)
+- BuildSystem: invalid file names on windows [\#1671](https://github.com/kokkos/kokkos/issues/1671)
+- Tests: reducers min/max\_loc test fails randomly due to multiple min values and thus multiple valid locations [\#1681](https://github.com/kokkos/kokkos/issues/1681)
+- Tests: cuda.scatterview unit test causes "Bus error" when force\_uvm and enable\_lambda are enabled [\#1852](https://github.com/kokkos/kokkos/issues/1852)
+- Tests: cuda.cxx11 unit test fails when force\_uvm and enable\_lambda are enabled [\#1850](https://github.com/kokkos/kokkos/issues/1850)
+- Tests: threads.reduce\_device\_view\_range\_policy failing with Cuda/8.0.44 and RDC [\#1836](https://github.com/kokkos/kokkos/issues/1836)
+- Build: compile error when compiling Kokkos with hwloc 2.0.1 \(on OSX 10.12.6, with g++ 7.2.0\) [\#1506](https://github.com/kokkos/kokkos/issues/1506)
+- Build: dual\_view.view broken with UVM [\#1834](https://github.com/kokkos/kokkos/issues/1834)
+- Build: White cuda/9.2 + gcc/7.2 warnings triggering errors  [\#1833](https://github.com/kokkos/kokkos/issues/1833)
+- Build: warning: enum constant in boolean context [\#1813](https://github.com/kokkos/kokkos/issues/1813)
+- Capability: Fix overly conservative max\_team\_size thingy [\#1808](https://github.com/kokkos/kokkos/issues/1808)
+- DynRankView: Ctors taking ViewAllocateWithoutInitializing broken [\#1783](https://github.com/kokkos/kokkos/issues/1783)
+- Cuda: Apollo cuda.team\_broadcast test fail with clang-6.0 [\#1762](https://github.com/kokkos/kokkos/issues/1762)
+- Cuda: Clang spurious test failure in impl\_view\_accessible [\#1753](https://github.com/kokkos/kokkos/issues/1753)
+- Cuda: Kokkos::complex\<double\> atomic deadlocks with Clang 6 Cuda build with -O0 [\#1752](https://github.com/kokkos/kokkos/issues/1752)
+- Cuda: LayoutStride Test fails for UVM as default memory space [\#1688](https://github.com/kokkos/kokkos/issues/1688)
+- Cuda: Scan wrong values on Volta [\#1676](https://github.com/kokkos/kokkos/issues/1676)
+- Cuda: Kokkos::deep\_copy error with CudaUVM and Kokkos::Serial spaces [\#1652](https://github.com/kokkos/kokkos/issues/1652)
+- Cuda: cudaErrorInvalidConfiguration with debug build [\#1647](https://github.com/kokkos/kokkos/issues/1647)
+- Cuda: parallel\_for with TeamPolicy::team\_size\_recommended with launch bounds not working -- reported by Daniel Holladay [\#1283](https://github.com/kokkos/kokkos/issues/1283)
+- Cuda: Using KOKKOS\_CLASS\_LAMBDA in a class with Kokkos::Random\_XorShift64\_Pool member data [\#1696](https://github.com/kokkos/kokkos/issues/1696)
+- Long Build Times on Darwin [\#1721](https://github.com/kokkos/kokkos/issues/1721)
+- Capability: Typo in Kokkos\_Sort.hpp - BinOp3D - wrong comparison [\#1720](https://github.com/kokkos/kokkos/issues/1720)
+- Buffer overflow in SharedAllocationRecord in Kokkos\_HostSpace.cpp [\#1673](https://github.com/kokkos/kokkos/issues/1673)
+- Serial unit test failure [\#1632](https://github.com/kokkos/kokkos/issues/1632)
+
 ## [2.7.00](https://github.com/kokkos/kokkos/tree/2.7.00) (2018-05-24)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/2.6.00...2.7.00)
 
diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt
index 9e5308f1c..236f523ae 100644
--- a/packages/kokkos/CMakeLists.txt
+++ b/packages/kokkos/CMakeLists.txt
@@ -11,7 +11,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
 
   # Define Project Name if this is a standalone build
   IF(NOT DEFINED ${PROJECT_NAME})
-    project(Kokkos CXX) 
+    project(Kokkos CXX)
   ENDIF()
 
   # Basic initialization (Used in KOKKOS_SETTINGS)
@@ -22,7 +22,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
   include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
   set_kokkos_cxx_compiler()
   set_kokkos_cxx_standard()
-  
+
   #------------ GET OPTIONS AND KOKKOS_SETTINGS --------------------------------
   # Add Kokkos' modules to CMake's module path.
   set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/")
@@ -34,7 +34,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
 
   #------------ GENERATE HEADER AND SOURCE FILES -------------------------------
   execute_process(
-    COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings
+    COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} PREFIX=${CMAKE_INSTALL_PREFIX} generate_build_settings
     WORKING_DIRECTORY "${Kokkos_BINARY_DIR}"
     OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out
     RESULT_VARIABLE GEN_SETTINGS_RESULT
@@ -45,6 +45,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
   endif()
   include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake)
   install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos)
+  install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX})
   string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}")
   string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}")
   string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}")
diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos
index 6fc70cc70..05f3cf781 100644
--- a/packages/kokkos/Makefile.kokkos
+++ b/packages/kokkos/Makefile.kokkos
@@ -15,12 +15,13 @@ KOKKOS_ARCH ?= ""
 KOKKOS_DEBUG ?= "no"
 # Options: hwloc,librt,experimental_memkind
 KOKKOS_USE_TPLS ?= ""
-# Options: c++11,c++1z
+# Options: c++11,c++14,c++1y,c++17,c++1z,c++2a
 KOKKOS_CXX_STANDARD ?= "c++11"
 # Options: aggressive_vectorization,disable_profiling,disable_deprecated_code,enable_large_mem_tests
 KOKKOS_OPTIONS ?= ""
 # Option for setting ETI path
 KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti
+KOKKOS_CMAKE ?= "no"
 
 # Default settings specific options.
 # Options: force_uvm,use_ldg,rdc,enable_lambda
@@ -35,7 +36,11 @@ kokkos_has_string=$(if $(findstring $2,$1),1,0)
 # Check for general settings.
 KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes)
 KOKKOS_INTERNAL_ENABLE_CXX11 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++11)
+KOKKOS_INTERNAL_ENABLE_CXX14 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++14)
+KOKKOS_INTERNAL_ENABLE_CXX1Y := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1y)
+KOKKOS_INTERNAL_ENABLE_CXX17 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++17)
 KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z)
+KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
 
 # Check for external libraries.
 KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
@@ -104,6 +109,18 @@ KOKKOS_INTERNAL_COMPILER_CLANG       := $(call kokkos_has_string,$(KOKKOS_CXX_VE
 KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin)
 KOKKOS_INTERNAL_COMPILER_HCC         := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
 
+# Check Host Compiler if using NVCC through nvcc_wrapper
+ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+  KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER := $(strip $(shell echo $(CXX) | grep nvcc_wrapper | wc -l))
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC_WRAPPER), 1)
+
+    KOKKOS_CXX_HOST_VERSION             := $(strip $(shell $(CXX) $(CXXFLAGS) --host-version       2>&1))
+    KOKKOS_INTERNAL_COMPILER_PGI    := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),PGI)
+    KOKKOS_INTERNAL_COMPILER_INTEL  := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),Intel Corporation)
+    KOKKOS_INTERNAL_COMPILER_CLANG  := $(call kokkos_has_string,$(KOKKOS_CXX_HOST_VERSION),clang)
+  endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
   KOKKOS_INTERNAL_COMPILER_CLANG = 1
 endif
@@ -196,18 +213,34 @@ endif
 # Set C++11 flags.
 ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
   KOKKOS_INTERNAL_CXX11_FLAG := --c++11
+  KOKKOS_INTERNAL_CXX14_FLAG := --c++14
+  #KOKKOS_INTERNAL_CXX17_FLAG := --c++17
 else
   ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
      KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
+     #KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14
+     KOKKOS_INTERNAL_CXX1Y_FLAG := -std=c++1y
+     #KOKKOS_INTERNAL_CXX17_FLAG := -std=c++17
+     #KOKKOS_INTERNAL_CXX1Z_FLAG := -std=c++1Z
+     #KOKKOS_INTERNAL_CXX2A_FLAG := -std=c++2a
   else
     ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
       KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
+      KOKKOS_INTERNAL_CXX14_FLAG := -hstd=c++14
+      #KOKKOS_INTERNAL_CXX1Y_FLAG := -hstd=c++1y
+      #KOKKOS_INTERNAL_CXX17_FLAG := -hstd=c++17
+      #KOKKOS_INTERNAL_CXX1Z_FLAG := -hstd=c++1z
+      #KOKKOS_INTERNAL_CXX2A_FLAG := -hstd=c++2a
     else
       ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
         KOKKOS_INTERNAL_CXX11_FLAG := 
       else
         KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
+        KOKKOS_INTERNAL_CXX14_FLAG := --std=c++14
+        KOKKOS_INTERNAL_CXX1Y_FLAG := --std=c++1y
+        KOKKOS_INTERNAL_CXX17_FLAG := --std=c++17
         KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
+        KOKKOS_INTERNAL_CXX2A_FLAG := --std=c++2a
       endif
     endif
   endif
@@ -330,7 +363,9 @@ endif
 
 #CPPFLAGS is now unused
 KOKKOS_CPPFLAGS =
-KOKKOS_CXXFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH)
+ifneq ($(KOKKOS_CMAKE), yes)
+  KOKKOS_CXXFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH)
+endif
 KOKKOS_TPL_INCLUDE_DIRS =
 KOKKOS_TPL_LIBRARY_DIRS =
 KOKKOS_TPL_LIBRARY_NAMES =
@@ -341,9 +376,11 @@ endif
 
 KOKKOS_LIBS = -ldl
 KOKKOS_TPL_LIBRARY_NAMES += dl
-KOKKOS_LDFLAGS = -L$(shell pwd)
-# CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command
-KOKKOS_CXXLDFLAGS = -L$(shell pwd)
+ifneq ($(KOKKOS_CMAKE), yes)
+  KOKKOS_LDFLAGS = -L$(shell pwd)
+  # CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command
+  KOKKOS_CXXLDFLAGS = -L$(shell pwd)
+endif
 KOKKOS_LINK_FLAGS = 
 KOKKOS_SRC =
 KOKKOS_HEADERS =
@@ -371,10 +408,12 @@ tmp := $(call kokkos_append_header,"/* Execution Spaces */")
 
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
   tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA")
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_COMPILER_CUDA_VERSION $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION)")
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
   tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
+  tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
 endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
@@ -432,11 +471,25 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
   KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
   tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11")
 endif
-
+ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1)
+  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG)
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
+endif
+ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1)
+  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG)
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14")
+endif
+ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1)
+  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG)
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
+endif
 ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
   KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11")
-  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX1Z")
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17")
+endif
+ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1)
+  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG)
+  tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20")
 endif
 
 ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
@@ -459,7 +512,9 @@ endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
   ifneq ($(HWLOC_PATH),)
-    KOKKOS_CXXFLAGS += -I$(HWLOC_PATH)/include
+    ifneq ($(KOKKOS_CMAKE), yes)
+      KOKKOS_CXXFLAGS += -I$(HWLOC_PATH)/include
+    endif
     KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
     KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib
     KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include
@@ -478,7 +533,9 @@ endif
 
 ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
   ifneq ($(MEMKIND_PATH),)
-    KOKKOS_CXXFLAGS += -I$(MEMKIND_PATH)/include
+    ifneq ($(KOKKOS_CMAKE), yes)
+      KOKKOS_CXXFLAGS += -I$(MEMKIND_PATH)/include
+    endif
     KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
     KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib
     KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include
@@ -971,7 +1028,9 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
 endif
   KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
   ifneq ($(CUDA_PATH),)
-    KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
+    ifneq ($(KOKKOS_CMAKE), yes)
+      KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
+    endif
     KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
     KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64
     KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include
@@ -1026,7 +1085,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
   KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
   KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
   ifneq ($(QTHREADS_PATH),)
-    KOKKOS_CXXFLAGS += -I$(QTHREADS_PATH)/include
+    ifneq ($(KOKKOS_CMAKE), yes)
+      KOKKOS_CXXFLAGS += -I$(QTHREADS_PATH)/include
+    endif
     KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
     KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib
     KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include
diff --git a/packages/kokkos/README b/packages/kokkos/README
index 31d134bf0..4b6d4170e 100644
--- a/packages/kokkos/README
+++ b/packages/kokkos/README
@@ -52,44 +52,47 @@ For specifics see the LICENSE file contained in the repository or distribution.
   * GCC 4.8.4
   * GCC 4.9.3
   * GCC 5.1.0
-  * GCC 5.3.0
+  * GCC 5.5.0
   * GCC 6.1.0
+  * GCC 7.2.0
+  * GCC 7.3.0
+  * GCC 8.1.0
   * Intel 15.0.2
   * Intel 16.0.1
-  * Intel 17.1.043
+  * Intel 17.0.1
   * Intel 17.4.196
-  * Intel 18.0.128
+  * Intel 18.2.128
   * Clang 3.6.1
   * Clang 3.7.1
   * Clang 3.8.1
   * Clang 3.9.0
   * Clang 4.0.0
-  * Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44)
-  * Clang 6.0.0 for CUDA (CUDA Toolkit 9.1)
-  * PGI 17.10
-  * NVCC 7.0 for CUDA (with gcc 4.8.4)
+  * Clang 6.0.0 for CUDA (CUDA Toolkit 9.0)
+  * Clang 7.0.0 for CUDA (CUDA Toolkit 9.1)
+  * PGI 18.7
   * NVCC 7.5 for CUDA (with gcc 4.8.4)
   * NVCC 8.0.44 for CUDA (with gcc 5.3.0)
   * NVCC 9.1 for CUDA (with gcc 6.1.0)
 
 ### Primary tested compilers on Power 8 are:
-  * GCC 5.4.0 (OpenMP,Serial)
-  * IBM XL 13.1.6 (OpenMP, Serial)
-  * NVCC 8.0.44 for CUDA (with gcc 5.4.0)
-  * NVCC 9.0.103 for CUDA (with gcc 6.3.0 and XL 13.1.6)
+  * GCC 6.4.0 (OpenMP,Serial)
+  * GCC 7.2.0 (OpenMP,Serial)
+  * IBM XL 16.1.0 (OpenMP, Serial)
+  * NVCC 9.2.88 for CUDA (with gcc 7.2.0 and XL 16.1.0)
 
 ### Primary tested compilers on Intel KNL are:
-  * GCC 6.2.0
   * Intel 16.4.258 (with gcc 4.7.2)
   * Intel 17.2.174 (with gcc 4.9.3)
-  * Intel 18.0.128 (with gcc 4.9.3)
+  * Intel 18.2.199 (with gcc 4.9.3)
 
-### Primary tested compilers on ARM
-  * GCC 6.1.0 
+### Primary tested compilers on ARM (Cavium ThunderX2)
+  * GCC 7.2.0 
+  * ARM/Clang 18.4.0
   
 ### Other compilers working:
   * X86:
    - Cygwin 2.1.0 64bit with gcc 4.9.3
+   - GCC 8.1.0 (not warning free)
 
 ### Known non-working combinations:
   * Power8:
diff --git a/packages/kokkos/algorithms/src/Kokkos_Random.hpp b/packages/kokkos/algorithms/src/Kokkos_Random.hpp
index 5f1d88bff..e14471a48 100644
--- a/packages/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/packages/kokkos/algorithms/src/Kokkos_Random.hpp
@@ -697,6 +697,7 @@ namespace Kokkos {
     typedef Random_XorShift64<DeviceType> generator_type;
     typedef DeviceType device_type;
 
+    KOKKOS_INLINE_FUNCTION
     Random_XorShift64_Pool() {
       num_states_ = 0;
     }
@@ -709,12 +710,14 @@ namespace Kokkos {
 #endif
     }
 
+    KOKKOS_INLINE_FUNCTION
     Random_XorShift64_Pool(const Random_XorShift64_Pool& src):
       locks_(src.locks_),
       state_(src.state_),
       num_states_(src.num_states_)
     {}
 
+    KOKKOS_INLINE_FUNCTION
     Random_XorShift64_Pool operator = (const Random_XorShift64_Pool& src) {
       locks_ = src.locks_;
       state_ = src.state_;
@@ -958,6 +961,7 @@ namespace Kokkos {
 
     typedef DeviceType device_type;
 
+    KOKKOS_INLINE_FUNCTION
     Random_XorShift1024_Pool() {
       num_states_ = 0;
     }
@@ -972,6 +976,7 @@ namespace Kokkos {
 #endif
     }
 
+    KOKKOS_INLINE_FUNCTION
     Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src):
       locks_(src.locks_),
       state_(src.state_),
@@ -979,6 +984,7 @@ namespace Kokkos {
       num_states_(src.num_states_)
     {}
 
+    KOKKOS_INLINE_FUNCTION
     Random_XorShift1024_Pool operator = (const Random_XorShift1024_Pool& src) {
       locks_ = src.locks_;
       state_ = src.state_;
diff --git a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp
index c952b1e54..8bdd87672 100644
--- a/packages/kokkos/algorithms/src/Kokkos_Sort.hpp
+++ b/packages/kokkos/algorithms/src/Kokkos_Sort.hpp
@@ -246,8 +246,8 @@ public:
   {
     bin_count_atomic = Kokkos::View<int*, Space >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
     bin_count_const =  bin_count_atomic;
-    bin_offsets =      offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins());
-    sort_order =       offset_type("PermutationVector",range_end-range_begin);
+    bin_offsets =      offset_type(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::bin_offsets"),bin_op.max_bins());
+    sort_order =       offset_type(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sort_order"),range_end-range_begin);
   }
 
   BinSort( const_key_view_type  keys_
@@ -290,7 +290,7 @@ public:
 
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
     scratch_view_type
-      sorted_values("Scratch",
+      sorted_values(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sorted_values"),
                     len,
                     values.extent(1),
                     values.extent(2),
@@ -301,7 +301,7 @@ public:
                     values.extent(7));
 #else
     scratch_view_type
-      sorted_values("Scratch",
+      sorted_values(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sorted_values"),
                   values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
                   values.rank_dynamic > 1 ? values.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG ,
                   values.rank_dynamic > 2 ? values.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
@@ -483,7 +483,7 @@ struct BinOp3D {
     if (keys(i1,0)>keys(i2,0)) return true;
     else if (keys(i1,0)==keys(i2,0)) {
       if (keys(i1,1)>keys(i2,1)) return true;
-      else if (keys(i1,1)==keys(i2,2)) {
+      else if (keys(i1,1)==keys(i2,1)) {
         if (keys(i1,2)>keys(i2,2)) return true;
       }
     }
diff --git a/packages/kokkos/benchmarks/gups/Makefile b/packages/kokkos/benchmarks/gups/Makefile
new file mode 100644
index 000000000..717611166
--- /dev/null
+++ b/packages/kokkos/benchmarks/gups/Makefile
@@ -0,0 +1,41 @@
+#Set your Kokkos path to something appropriate
+KOKKOS_PATH = ${HOME}/git/kokkos-github-repo
+KOKKOS_DEVICES = "Cuda"
+KOKKOS_ARCH = "Pascal60"
+KOKKOS_CUDA_OPTIONS = enable_lambda
+#KOKKOS_DEVICES = "OpenMP"
+#KOKKOS_ARCH = "Power8"
+
+SRC = gups-kokkos.cc
+
+default: build
+	echo "Start Build"
+	
+CXXFLAGS = -O3
+CXX = ${HOME}/git/kokkos-github-repo/bin/nvcc_wrapper
+#CXX = g++
+
+LINK = ${CXX}
+
+LINKFLAGS =  
+EXE = gups-kokkos
+
+DEPFLAGS = -M
+
+OBJ = $(SRC:.cc=.o)
+LIB =
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean: kokkos-clean 
+	rm -f *.o $(EXE)
+
+# Compilation rules
+
+%.o:%.cc $(KOKKOS_CPP_DEPENDS)
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
diff --git a/packages/kokkos/benchmarks/gups/gups-kokkos.cc b/packages/kokkos/benchmarks/gups/gups-kokkos.cc
new file mode 100644
index 000000000..4602adda7
--- /dev/null
+++ b/packages/kokkos/benchmarks/gups/gups-kokkos.cc
@@ -0,0 +1,199 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include "Kokkos_Core.hpp"
+#include <cstdio>
+#include <cstdlib>
+#include <cmath>
+
+#include <sys/time.h>
+
+#define HLINE "-------------------------------------------------------------\n"
+
+#if defined(KOKKOS_ENABLE_CUDA)
+typedef Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror GUPSHostArray;
+typedef Kokkos::View<int64_t*, Kokkos::CudaSpace> GUPSDeviceArray;
+#else
+typedef Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror GUPSHostArray;
+typedef Kokkos::View<int64_t*, Kokkos::HostSpace> GUPSDeviceArray;
+#endif
+
+typedef int GUPSIndex;
+
+double now() {
+	struct timeval now;
+	gettimeofday(&now, NULL);
+
+	return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
+}
+
+void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices, const int64_t dataCount) {
+	for( GUPSIndex i = 0; i < indices.extent(0); ++i ) {
+		indices[i] = lrand48() % dataCount;
+	}
+
+	Kokkos::deep_copy(dev_indices, indices);
+}
+
+void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data, const int64_t datum,
+	const bool performAtomics) {
+
+	if( performAtomics ) {
+		Kokkos::parallel_for("bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
+			Kokkos::atomic_fetch_xor( &data[indices[i]], datum );
+		});
+	} else {
+		Kokkos::parallel_for("bench-gups-non-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) {
+			data[indices[i]] ^= datum;
+		});
+	}
+
+	Kokkos::fence();
+}
+
+int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const int repeats,
+	const bool useAtomics) {
+
+	printf("Reports fastest timing per kernel\n");
+	printf("Creating Views...\n");
+
+	printf("Memory Sizes:\n");
+	printf("- Elements:      %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(dataCount),
+		1.0e-6 * ((double) dataCount * (double) sizeof(int64_t)));
+	printf("- Indices:       %15" PRIu64 " (%12.4f MB)\n", static_cast<uint64_t>(indicesCount),
+		1.0e-6 * ((double) indicesCount * (double) sizeof(int64_t)));
+	printf(" - Atomics:      %15s\n", (useAtomics ? "Yes" : "No") );
+	printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
+
+	printf(HLINE);
+
+	GUPSDeviceArray dev_indices("indices", indicesCount);
+	GUPSDeviceArray dev_data("data", dataCount);
+	int64_t datum = -1;
+
+	GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices);
+	GUPSHostArray data    = Kokkos::create_mirror_view(dev_data);
+
+	double gupsTime  = 0.0;
+
+	printf("Initializing Views...\n");
+
+#if defined(KOKKOS_HAVE_OPENMP)
+	Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
+#else
+	Kokkos::parallel_for("init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
+#endif
+		KOKKOS_LAMBDA(const int i) {
+
+		data[i] = 10101010101;
+	});
+
+#if defined(KOKKOS_HAVE_OPENMP)
+	Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
+#else
+	Kokkos::parallel_for("init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
+#endif
+		KOKKOS_LAMBDA(const int i) {
+
+		indices[i] = 0;
+	});
+
+	Kokkos::deep_copy(dev_data, data);
+	Kokkos::deep_copy(dev_indices, indices);
+	double start;
+
+	printf("Starting benchmarking...\n");
+
+	for( GUPSIndex k = 0; k < repeats; ++k ) {
+		randomize_indices(indices, dev_indices, data.extent(0));
+
+		start = now();
+		run_gups(dev_indices, dev_data, datum, useAtomics);
+		gupsTime += now() - start;
+	}
+
+	Kokkos::deep_copy(indices, dev_indices);
+	Kokkos::deep_copy(data, dev_data);
+
+	printf(HLINE);
+	printf("GUP/s Random:      %18.6f\n",
+		(1.0e-9 * ((double) repeats) * (double) dev_indices.extent(0)) / gupsTime);
+	printf(HLINE);
+
+	return 0;
+}
+
+int main(int argc, char* argv[]) {
+
+	printf(HLINE);
+	printf("Kokkos GUPS Benchmark\n");
+	printf(HLINE);
+
+	srand48(1010101);
+
+	Kokkos::initialize(argc, argv);
+
+	int64_t indices = 8192;
+	int64_t data    = 33554432;
+	int64_t repeats = 10;
+	bool useAtomics = false;
+
+	for( int i = 1; i < argc; ++i ) {
+		if( strcmp( argv[i], "--indices" ) == 0 ) {
+			indices = std::atoll(argv[i+1]);
+			++i;
+		} else if( strcmp( argv[i], "--data" ) == 0 ) {
+			data = std::atoll(argv[i+1]);
+			++i;
+		} else if( strcmp( argv[i], "--repeats" ) == 0 ) {
+			repeats = std::atoll(argv[i+1]);
+			++i;
+		} else if( strcmp( argv[i], "--atomics" ) == 0 ) {
+			useAtomics = true;
+		}
+	}
+
+	const int rc = run_benchmark(indices, data, repeats, useAtomics);
+
+	Kokkos::finalize();
+
+	return rc;
+}
diff --git a/packages/kokkos/benchmarks/stream/Makefile b/packages/kokkos/benchmarks/stream/Makefile
new file mode 100644
index 000000000..04566b322
--- /dev/null
+++ b/packages/kokkos/benchmarks/stream/Makefile
@@ -0,0 +1,41 @@
+#Set your Kokkos path to something appropriate
+KOKKOS_PATH = ${HOME}/git/kokkos-github-repo
+#KOKKOS_DEVICES = "Cuda"
+#KOKKOS_ARCH = "Pascal60"
+#KOKKOS_CUDA_OPTIONS = enable_lambda
+KOKKOS_DEVICES = "OpenMP"
+KOKKOS_ARCH = "Power8"
+
+SRC = stream-kokkos.cc
+
+default: build
+	echo "Start Build"
+	
+CXXFLAGS = -O3
+#CXX = ${HOME}/git/kokkos-github-repo/bin/nvcc_wrapper
+CXX = g++
+
+LINK = ${CXX}
+
+LINKFLAGS =  
+EXE = stream-kokkos
+
+DEPFLAGS = -M
+
+OBJ = $(SRC:.cc=.o)
+LIB =
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean: kokkos-clean 
+	rm -f *.o $(EXE)
+
+# Compilation rules
+
+%.o:%.cc $(KOKKOS_CPP_DEPENDS)
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
diff --git a/packages/kokkos/benchmarks/stream/stream-kokkos.cc b/packages/kokkos/benchmarks/stream/stream-kokkos.cc
new file mode 100644
index 000000000..370995432
--- /dev/null
+++ b/packages/kokkos/benchmarks/stream/stream-kokkos.cc
@@ -0,0 +1,265 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include "Kokkos_Core.hpp"
+#include <cstdio>
+#include <cstdlib>
+#include <cmath>
+
+#include <sys/time.h>
+
+#define STREAM_ARRAY_SIZE 100000000
+#define STREAM_NTIMES     20
+
+#define HLINE "-------------------------------------------------------------\n"
+
+#if defined(KOKKOS_ENABLE_CUDA)
+typedef Kokkos::View<double*, Kokkos::CudaSpace>::HostMirror StreamHostArray;
+typedef Kokkos::View<double*, Kokkos::CudaSpace> StreamDeviceArray;
+#else
+typedef Kokkos::View<double*, Kokkos::HostSpace>::HostMirror StreamHostArray;
+typedef Kokkos::View<double*, Kokkos::HostSpace> StreamDeviceArray;
+#endif
+
+typedef int StreamIndex;
+
+double now() {
+	struct timeval now;
+	gettimeofday(&now, NULL);
+
+	return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
+}
+
+void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c) {
+
+	Kokkos::parallel_for("copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
+		c[i] = a[i];
+	});
+
+	Kokkos::fence();
+}
+
+void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c,
+       	const double scalar) {
+
+	Kokkos::parallel_for("copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
+		b[i] = scalar * c[i];
+	});
+
+	Kokkos::fence();
+}
+
+void perform_add(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c) {
+	Kokkos::parallel_for("add", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
+                c[i] = a[i] + b[i];
+        });
+
+	Kokkos::fence();
+}
+
+void perform_triad(StreamDeviceArray& a, StreamDeviceArray& b, StreamDeviceArray& c,
+	const double scalar) {
+
+	Kokkos::parallel_for("triad", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) {
+		a[i] = b[i] + scalar * c[i];
+	});
+
+	Kokkos::fence();
+}
+
+int perform_validation(StreamHostArray& a, StreamHostArray& b, StreamHostArray& c,
+	const StreamIndex arraySize, const double scalar) {
+
+	double ai = 1.0;
+	double bi = 2.0;
+	double ci = 0.0;
+
+	for( StreamIndex i = 0; i < arraySize; ++i ) {
+		ci = ai;
+		bi = scalar * ci;
+		ci = ai + bi;
+		ai = bi + scalar * ci;
+	};
+
+	double aError = 0.0;
+	double bError = 0.0;
+	double cError = 0.0;
+
+	for( StreamIndex i = 0; i < arraySize; ++i ) {
+		aError = std::abs( a[i] - ai );
+		bError = std::abs( b[i] - bi );
+		cError = std::abs( c[i] - ci );
+	}
+
+	double aAvgError = aError / (double) arraySize;
+	double bAvgError = bError / (double) arraySize;
+	double cAvgError = cError / (double) arraySize;
+
+	const double epsilon = 1.0e-13;
+	int errorCount = 0;
+
+	if( std::abs( aAvgError / ai ) > epsilon ) {
+		fprintf(stderr, "Error: validation check on View a failed.\n");
+		errorCount++;
+	}
+
+	if( std::abs( bAvgError / bi ) > epsilon ) {
+		fprintf(stderr, "Error: validation check on View b failed.\n");
+		errorCount++;
+	}
+
+	if( std::abs( cAvgError / ci ) > epsilon ) {
+		fprintf(stderr, "Error: validation check on View c failed.\n");
+		errorCount++;
+	}
+
+	if( errorCount == 0 ) {
+		printf("All solutions checked and verified.\n");
+	}
+
+	return errorCount;
+}
+
+int run_benchmark() {
+
+	printf("Reports fastest timing per kernel\n");
+	printf("Creating Views...\n");
+
+	printf("Memory Sizes:\n");
+	printf("- Array Size:    %" PRIu64 "\n", static_cast<uint64_t>(STREAM_ARRAY_SIZE));
+	printf("- Per Array:     %12.2f MB\n", 1.0e-6 * (double) STREAM_ARRAY_SIZE * (double) sizeof(double));
+	printf("- Total:         %12.2f MB\n", 3.0e-6 * (double) STREAM_ARRAY_SIZE * (double) sizeof(double));
+
+	printf("Benchmark kernels will be performed for %d iterations.\n", STREAM_NTIMES);
+
+	printf(HLINE);
+
+	StreamDeviceArray dev_a("a", STREAM_ARRAY_SIZE);
+	StreamDeviceArray dev_b("b", STREAM_ARRAY_SIZE);
+	StreamDeviceArray dev_c("c", STREAM_ARRAY_SIZE);
+
+	StreamHostArray a = Kokkos::create_mirror_view(dev_a);
+	StreamHostArray b = Kokkos::create_mirror_view(dev_b);
+	StreamHostArray c = Kokkos::create_mirror_view(dev_c);
+
+	const double scalar = 3.0;
+
+	double copyTime  = std::numeric_limits<double>::max();
+	double scaleTime = std::numeric_limits<double>::max();
+	double addTime   = std::numeric_limits<double>::max();
+	double triadTime = std::numeric_limits<double>::max();
+
+	printf("Initializing Views...\n");
+
+#if defined(KOKKOS_HAVE_OPENMP)
+	Kokkos::parallel_for("init", Kokkos::RangePolicy<Kokkos::OpenMP>(0, STREAM_ARRAY_SIZE),
+#else
+	Kokkos::parallel_for("init", Kokkos::RangePolicy<Kokkos::Serial>(0, STREAM_ARRAY_SIZE),
+#endif
+		KOKKOS_LAMBDA(const int i) {
+
+		a[i] = 1.0;
+		b[i] = 2.0;
+		c[i] = 0.0;
+	});
+
+	// Copy contents of a (from the host) to the dev_a (device)
+	Kokkos::deep_copy(dev_a, a);
+	Kokkos::deep_copy(dev_b, b);
+	Kokkos::deep_copy(dev_c, c);
+
+	double start;
+
+	printf("Starting benchmarking...\n");
+
+	for( StreamIndex k = 0; k < STREAM_NTIMES; ++k ) {
+		start = now();
+		perform_copy(dev_a, dev_b, dev_c);
+		copyTime = std::min( copyTime, (now() - start) );
+
+		start = now();
+		perform_scale(dev_a, dev_b, dev_c, scalar);
+		scaleTime = std::min( scaleTime, (now() - start) );
+
+		start = now();
+		perform_add(dev_a, dev_b, dev_c);
+		addTime = std::min( addTime, (now() - start) );
+
+		start = now();
+		perform_triad(dev_a, dev_b, dev_c, scalar);
+		triadTime = std::min( triadTime, (now() - start) );
+	}
+
+	Kokkos::deep_copy(a, dev_a);
+	Kokkos::deep_copy(b, dev_b);
+	Kokkos::deep_copy(c, dev_c);
+
+	printf("Performing validation...\n");
+	int rc = perform_validation(a, b, c, STREAM_ARRAY_SIZE, scalar);
+
+	printf(HLINE);
+
+	printf("Copy            %11.2f MB/s\n",
+		( 1.0e-06 * 2.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / copyTime );
+	printf("Scale           %11.2f MB/s\n",
+		( 1.0e-06 * 2.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / scaleTime );
+	printf("Add             %11.2f MB/s\n",
+		( 1.0e-06 * 3.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / addTime );
+	printf("Triad           %11.2f MB/s\n",
+		( 1.0e-06 * 3.0 * (double) sizeof(double) * (double) STREAM_ARRAY_SIZE) / triadTime );
+
+	printf(HLINE);
+
+	return rc;
+}
+
+int main(int argc, char* argv[]) {
+
+	printf(HLINE);
+	printf("Kokkos STREAM Benchmark\n");
+	printf(HLINE);
+
+	Kokkos::initialize(argc, argv);
+	const int rc = run_benchmark();
+	Kokkos::finalize();
+
+	return rc;
+}
diff --git a/packages/kokkos/bin/hpcbind b/packages/kokkos/bin/hpcbind
index 92f9f81ac..b185a9282 100755
--- a/packages/kokkos/bin/hpcbind
+++ b/packages/kokkos/bin/hpcbind
@@ -125,18 +125,20 @@ function show_help {
   echo "  --openmp-ratio=N/D    Ratio of the cpuset to use for OpenMP"
   echo "                        Default: 1"
   echo "  --openmp-places=<Op>  Op=threads|cores|sockets. Default: threads"
-  echo "  --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES"
-  echo "  --force-openmp-num-threads=N"
+  echo "  --openmp-num-threads=N"
   echo "                        Override logic for selecting OMP_NUM_THREADS"
-  echo "  --force-openmp-proc-bind=<OP>"
+  echo "  --openmp-proc-bind=<OP>"
   echo "                        Override logic for selecting OMP_PROC_BIND"
-  echo "  --no-openmp-nested    Set OMP_NESTED to false"
+  echo "  --openmp-nested       Set OMP_NESTED to true"
+  echo "  --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES"
   echo "  --output-prefix=<P>   Save the output to files of the form"
   echo "                        P.hpcbind.N, P.stdout.N and P.stderr.N where P is "
   echo "                        the prefix and N is the rank (no spaces)"
   echo "  --output-mode=<Op>    How console output should be handled."
   echo "                        Options are all, rank0, and none.  Default: rank0" 
   echo "  --lstopo              Show bindings in lstopo"
+  echo "  --save-topology=<Xml>  Save the topology to the given xml file"
+  echo "  --load-topology=<Xml>  Load a previously saved topology from an xml file"
   echo "  -v|--verbose          Print bindings and relevant environment variables"
   echo "  -h|--help             Show this message"
   echo ""
@@ -189,7 +191,7 @@ HPCBIND_OPENMP_PLACES=${OMP_PLACES:-threads}
 declare -i HPCBIND_OPENMP_PROC_BIND=1
 HPCBIND_OPENMP_FORCE_NUM_THREADS=""
 HPCBIND_OPENMP_FORCE_PROC_BIND=""
-declare -i HPCBIND_OPENMP_NESTED=1
+declare -i HPCBIND_OPENMP_NESTED=0
 declare -i HPCBIND_VERBOSE=0
 
 declare -i HPCBIND_LSTOPO=0
@@ -197,6 +199,9 @@ declare -i HPCBIND_LSTOPO=0
 HPCBIND_OUTPUT_PREFIX=""
 HPCBIND_OUTPUT_MODE="rank0"
 
+HPCBIND_OUTPUT_TOPOLOGY=""
+HPCBIND_INPUT_TOPOLOGY=""
+
 declare -i HPCBIND_HAS_COMMAND=0
 
 for i in "$@"; do
@@ -276,10 +281,22 @@ for i in "$@"; do
       HPCBIND_OPENMP_NESTED=0
       shift
       ;;
+    --openmp-nested)
+      HPCBIND_OPENMP_NESTED=1
+      shift
+      ;;
     --output-prefix=*)
       HPCBIND_OUTPUT_PREFIX="${i#*=}"
       shift
       ;;
+    --save-topology=*)
+      HPCBIND_OUTPUT_TOPOLOGY="${i#*=}"
+      shift
+      ;;
+    --load-topology=*)
+      HPCBIND_INPUT_TOPOLOGY="${i#*=}"
+      shift
+      ;;
     --output-mode=*)
       HPCBIND_OUTPUT_MODE="${i#*=}"
       #convert to lower case
@@ -327,24 +344,37 @@ elif [[ ${HPCBIND_QUEUE_RANK} -eq 0 ]]; then
   HPCBIND_TEE=1
 fi
 
+# Save the topology to the given xml file
+if [[ "${HPCBIND_OUTPUT_TOPOLOGY}" != "" ]]; then
+  if [[ ${HPCBIND_QUEUE_RANK} -eq 0 ]]; then
+    lstopo-no-graphics "${HPCBIND_OUTPUT_TOPOLOGY}"
+  else
+    lstopo-no-graphics >/dev/null 2>&1
+  fi
+fi
+
+# Load the topology to the given xml file
+if [[ "${HPCBIND_INPUT_TOPOLOGY}" != "" ]]; then
+  if [ -f ${HPCBIND_INPUT_TOPOLOGY} ]; then
+    export HWLOC_XMLFILE="${HPCBIND_INPUT_TOPOLOGY}"
+    export HWLOC_THISSYSTEM=1
+  fi
+fi
 
 if [[ "${HPCBIND_OUTPUT_PREFIX}" == "" ]]; then
   HPCBIND_LOG=/dev/null
   HPCBIND_ERR=/dev/null
   HPCBIND_OUT=/dev/null
 else
-  if [[ ${HPCBIND_QUEUE_SIZE} -gt 0 ]]; then
-    HPCBIND_STR_QUEUE_SIZE="${HPCBIND_QUEUE_SIZE}"
-    HPCBIND_STR_QUEUE_RANK=$(printf %0*d ${#HPCBIND_STR_QUEUE_SIZE} ${HPCBIND_QUEUE_RANK})
-
-    HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_STR_QUEUE_RANK}"
-    HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_STR_QUEUE_RANK}"
-    HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_STR_QUEUE_RANK}"
-  else
-    HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_QUEUE_RANK}"
-    HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_QUEUE_RANK}"
-    HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_QUEUE_RANK}"
+  if [[ ${HPCBIND_QUEUE_SIZE} -le 0 ]]; then
+    HPCBIND_QUEUE_SIZE=1
   fi
+  HPCBIND_STR_QUEUE_SIZE="${HPCBIND_QUEUE_SIZE}"
+  HPCBIND_STR_QUEUE_RANK=$(printf %0*d ${#HPCBIND_STR_QUEUE_SIZE} ${HPCBIND_QUEUE_RANK})
+
+  HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_STR_QUEUE_RANK}"
+  HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_STR_QUEUE_RANK}"
+  HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_STR_QUEUE_RANK}"
   > ${HPCBIND_LOG}
 fi
 
@@ -546,6 +576,8 @@ if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then
   hostname -s >> ${HPCBIND_LOG}
   echo "[HPCBIND]" >> ${HPCBIND_LOG}
   echo "${TMP_ENV}" | grep -E "^HPCBIND_" >> ${HPCBIND_LOG}
+  echo "[HWLOC]" >> ${HPCBIND_LOG}
+  echo "${TMP_ENV}" | grep -E "^HWLOC_" >> ${HPCBIND_LOG}
   echo "[CUDA]" >> ${HPCBIND_LOG}
   echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG}
   echo "[OPENMP]" >> ${HPCBIND_LOG}
@@ -568,6 +600,8 @@ else
   hostname -s > >(tee -a ${HPCBIND_LOG})
   echo "[HPCBIND]" > >(tee -a ${HPCBIND_LOG})
   echo "${TMP_ENV}" | grep -E "^HPCBIND_" > >(tee -a ${HPCBIND_LOG})
+  echo "[HWLOC]" > >(tee -a ${HPCBIND_LOG})
+  echo "${TMP_ENV}" | grep -E "^HWLOC_" > >(tee -a ${HPCBIND_LOG})
   echo "[CUDA]" > >(tee -a ${HPCBIND_LOG})
   echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG})
   echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG})
diff --git a/packages/kokkos/bin/nvcc_wrapper b/packages/kokkos/bin/nvcc_wrapper
index d339da4fc..f926ae024 100755
--- a/packages/kokkos/bin/nvcc_wrapper
+++ b/packages/kokkos/bin/nvcc_wrapper
@@ -74,6 +74,9 @@ dry_run=0
 host_only=0
 host_only_args=""
 
+# Just run version on host compiler
+get_host_version=0
+
 # Enable workaround for CUDA 6.5 for pragma ident 
 replace_pragma_ident=0
 
@@ -93,6 +96,9 @@ depfile_separate=0
 depfile_output_arg=""
 depfile_target_arg=""
 
+# Option to remove duplicate libraries and object files
+remove_duplicate_link_files=0
+
 #echo "Arguments: $# $@"
 
 while [ $# -gt 0 ]
@@ -106,10 +112,18 @@ do
   --host-only)
     host_only=1
     ;;
+  #get the host version only
+  --host-version)
+    get_host_version=1
+    ;;
   #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
   --replace-pragma-ident)
     replace_pragma_ident=1
     ;;
+  #remove duplicate link files
+  --remove-duplicate-link-files)
+    remove_duplicate_link_files=1
+    ;;
   #handle source files to be compiled as cuda files
   *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
     cpp_files="$cpp_files $1"
@@ -124,7 +138,12 @@ do
     fi
     ;;
   #Handle shared args (valid for both nvcc and the host compiler)
-  -D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
+  -D*)
+    unescape_commas=`echo "$1" | sed -e 's/\\\,/,/g'`
+    arg=`printf "%q" $unescape_commas`
+    shared_args="$shared_args $arg"
+    ;;
+  -I*|-L*|-l*|-g|--help|--version|-E|-M|-shared|-w)
     shared_args="$shared_args $1"
     ;;
   #Handle compilation argument
@@ -152,7 +171,7 @@ do
     shift
     ;;
   #Handle known nvcc args
-  -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
+  --dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
     cuda_args="$cuda_args $1"
     ;;
   #Handle more known nvcc args
@@ -164,8 +183,11 @@ do
     cuda_args="$cuda_args $1 $2"
     shift
     ;;
+  -rdc=*|-maxrregcount*|--maxrregcount*)
+    cuda_args="$cuda_args $1"
+    ;;
   #Handle c++11
-  --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z)
+  --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1y|-std=c++1y|--std=c++17|-std=c++17|--std=c++1z|-std=c++1z)
     if [ $stdcxx_applied -eq 1 ]; then
        echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting"
     else
@@ -205,6 +227,15 @@ do
     fi
     shift
     ;;
+  #Handle -+ (same as -x c++, specifically used for xl compilers, but mutually exclusive with -x. So replace it with -x c++)
+  -+)
+    if [ $first_xcompiler_arg -eq 1 ]; then
+      xcompiler_args="-x,c++"
+      first_xcompiler_arg=0
+    else
+      xcompiler_args="$xcompiler_args,-x,c++"
+    fi
+    ;;
   #Handle -ccbin (if its not set we can set it to a default value)
   -ccbin)
     cuda_args="$cuda_args $1 $2"
@@ -212,18 +243,39 @@ do
     host_compiler=$2
     shift
     ;;
-  #Handle -arch argument (if its not set use a default
-  -arch*)
+
+  #Handle -arch argument (if its not set use a default) this is the version with = sign
+  -arch*|-gencode*)
     cuda_args="$cuda_args $1"
     arch_set=1
     ;;
+  #Handle -code argument (if its not set use a default) this is the version with = sign
+  -code*)
+    cuda_args="$cuda_args $1"
+    ;;
+  #Handle -arch argument (if its not set use a default) this is the version without = sign
+  -arch|-gencode)
+    cuda_args="$cuda_args $1 $2"
+    arch_set=1
+    shift
+    ;;
+  #Handle -code argument (if its not set use a default) this is the version without = sign
+  -code)
+    cuda_args="$cuda_args $1 $2"
+    shift
+    ;;
   #Handle -Xcudafe argument
   -Xcudafe)
     cuda_args="$cuda_args -Xcudafe $2"
     shift
     ;;
+  #Handle -Xlinker argument
+  -Xlinker)
+    xlinker_args="$xlinker_args -Xlinker $2"
+    shift
+    ;;
   #Handle args that should be sent to the linker
-  -Wl*)
+  -Wl,*)
     xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
     host_linker_args="$host_linker_args ${1:4:${#1}}"
     ;;
@@ -256,6 +308,44 @@ do
   shift
 done
 
+# Only print host compiler version
+if [ $get_host_version -eq 1 ]; then
+  $host_compiler --version
+  exit
+fi
+
+#Remove duplicate object files
+if [ $remove_duplicate_link_files -eq 1 ]; then
+for obj in $object_files
+do
+  object_files_reverse="$obj $object_files_reverse"
+done
+
+object_files_reverse_clean=""
+for obj in $object_files_reverse
+do
+  exists=false
+  for obj2 in $object_files_reverse_clean
+  do
+    if [ "$obj" == "$obj2" ]
+    then
+      exists=true
+      echo "Exists: $obj"
+    fi
+  done
+  if [ "$exists" == "false" ]
+  then
+    object_files_reverse_clean="$object_files_reverse_clean $obj"
+  fi
+done
+
+object_files=""
+for obj in $object_files_reverse_clean
+do
+  object_files="$obj $object_files"
+done
+fi
+
 #Add default host compiler if necessary
 if [ $ccbin_set -ne 1 ]; then
   cuda_args="$cuda_args -ccbin $host_compiler"
@@ -328,10 +418,19 @@ fi
 
 #Run compilation command
 if [ $host_only -eq 1 ]; then
+  if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then
+    echo "$host_command"
+  fi
   $host_command
 elif [ -n "$nvcc_depfile_command" ]; then
+  if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then
+    echo "$nvcc_command && $nvcc_depfile_command"
+  fi
   $nvcc_command && $nvcc_depfile_command
 else
+  if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then
+    echo "$nvcc_command"
+  fi
   $nvcc_command
 fi
 error_code=$?
diff --git a/packages/kokkos/cmake/kokkos_build.cmake b/packages/kokkos/cmake/kokkos_build.cmake
index 94dd733ca..8178483d0 100644
--- a/packages/kokkos/cmake/kokkos_build.cmake
+++ b/packages/kokkos/cmake/kokkos_build.cmake
@@ -235,3 +235,7 @@ install(FILES
 # Install the export set for use with the install-tree
 INSTALL(EXPORT KokkosTargets DESTINATION
        "${INSTALL_CMAKE_DIR}")
+
+# build and install pkgconfig file
+CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY)
+INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig)
diff --git a/packages/kokkos/cmake/kokkos_functions.cmake b/packages/kokkos/cmake/kokkos_functions.cmake
index c0c62ccb6..bc490115a 100644
--- a/packages/kokkos/cmake/kokkos_functions.cmake
+++ b/packages/kokkos/cmake/kokkos_functions.cmake
@@ -47,7 +47,7 @@ function(set_kokkos_cxx_compiler)
                     OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
                     OUTPUT_STRIP_TRAILING_WHITESPACE)
 
-    string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+$"
+    string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$"
            INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
   endif()
 
diff --git a/packages/kokkos/cmake/kokkos_options.cmake b/packages/kokkos/cmake/kokkos_options.cmake
index 80a091bb9..580d1d322 100644
--- a/packages/kokkos/cmake/kokkos_options.cmake
+++ b/packages/kokkos/cmake/kokkos_options.cmake
@@ -41,7 +41,6 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST
 foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST})
   string(TOUPPER ${opt} OPT )
   IF(DEFINED Kokkos_ENABLE_${opt})
-    MESSAGE("Kokkos_ENABLE_${opt} is defined!")
     IF(DEFINED KOKKOS_ENABLE_${OPT})
       IF(NOT ("${KOKKOS_ENABLE_${OPT}}" STREQUAL "${Kokkos_ENABLE_${opt}}"))
         IF(DEFINED KOKKOS_ENABLE_${OPT}_INTERNAL)
@@ -59,7 +58,6 @@ foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST})
       ENDIF()
     ELSE()
       SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}})
-      MESSAGE("set KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT!")
     ENDIF()
   ENDIF()
 endforeach()
@@ -81,6 +79,7 @@ list(APPEND KOKKOS_ARCH_LIST
      ARMv80          # (HOST) ARMv8.0 Compatible CPU
      ARMv81          # (HOST) ARMv8.1 Compatible CPU
      ARMv8-ThunderX  # (HOST) ARMv8 Cavium ThunderX CPU
+     ARMv8-TX2       # (HOST) ARMv8 Cavium ThunderX2 CPU
      WSM             # (HOST) Intel Westmere CPU
      SNB             # (HOST) Intel Sandy/Ivy Bridge CPUs
      HSW             # (HOST) Intel Haswell CPUs
@@ -123,11 +122,18 @@ list(APPEND KOKKOS_DEVICES_LIST
 # List of possible TPLs for Kokkos
 # From Makefile.kokkos: Options: hwloc,librt,experimental_memkind
 set(KOKKOS_USE_TPLS_LIST)
+if(APPLE)
+list(APPEND KOKKOS_USE_TPLS_LIST
+    HWLOC          # hwloc
+    MEMKIND        # experimental_memkind
+    )
+else()
 list(APPEND KOKKOS_USE_TPLS_LIST
     HWLOC          # hwloc
     LIBRT          # librt
     MEMKIND        # experimental_memkind
     )
+endif()
 # Map of cmake variables to Makefile variables
 set(KOKKOS_INTERNAL_HWLOC hwloc)
 set(KOKKOS_INTERNAL_LIBRT librt)
@@ -172,6 +178,7 @@ set(KOKKOS_INTERNAL_LAMBDA enable_lambda)
 
 set(tmpr "\n       ")
 string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}")
+set(KOKKOS_INTERNAL_ARCH_DOCSTR "${tmpr}${KOKKOS_INTERNAL_ARCH_DOCSTR}")
 # This would be useful, but we use Foo_ENABLE mechanisms
 #string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_DEVICES_DOCSTR "${KOKKOS_DEVICES_LIST}")
 #string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_USE_TPLS_DOCSTR "${KOKKOS_USE_TPLS_LIST}")
@@ -269,7 +276,7 @@ set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_P
 set_kokkos_default_default(DEPRECATED_CODE ON)
 set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAULT} CACHE BOOL "Enable deprecated code.")
 
-set_kokkos_default_default(EXPLICIT_INSTANTIATION ON)
+set_kokkos_default_default(EXPLICIT_INSTANTIATION OFF)
 set(KOKKOS_ENABLE_EXPLICIT_INSTANTIATION ${KOKKOS_INTERNAL_ENABLE_EXPLICIT_INSTANTIATION_DEFAULT} CACHE BOOL "Enable explicit template instantiation.")
 
 #-------------------------------------------------------------------------------
diff --git a/packages/kokkos/cmake/kokkos_settings.cmake b/packages/kokkos/cmake/kokkos_settings.cmake
index 21c9d75a9..387ced6d5 100644
--- a/packages/kokkos/cmake/kokkos_settings.cmake
+++ b/packages/kokkos/cmake/kokkos_settings.cmake
@@ -15,16 +15,16 @@
 
 # Ensure that KOKKOS_ARCH is in the ARCH_LIST
 if (KOKKOS_ARCH MATCHES ",")
-  message("-- Detected a comma in: KOKKOS_ARCH=${KOKKOS_ARCH}")
+  message("-- Detected a comma in: KOKKOS_ARCH=`${KOKKOS_ARCH}`")
   message("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow")
   message("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)")
   string(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}")
-  message("-- Commas were changed to semicolons, now KOKKOS_ARCH=${KOKKOS_ARCH}")
+  message("-- Commas were changed to semicolons, now KOKKOS_ARCH=`${KOKKOS_ARCH}`")
 endif()
 foreach(arch ${KOKKOS_ARCH})
   list(FIND KOKKOS_ARCH_LIST ${arch} indx)
   if (indx EQUAL -1)
-    message(FATAL_ERROR "${arch} is not an accepted value for KOKKOS_ARCH."
+    message(FATAL_ERROR "`${arch}` is not an accepted value in KOKKOS_ARCH=`${KOKKOS_ARCH}`."
       "  Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}")
   endif ()
 endforeach()
@@ -130,7 +130,8 @@ string(REPLACE ";" ":" KOKKOS_INTERNAL_ADDTOPATH "${addpathl}")
 # Set the KOKKOS_SETTINGS String -- this is the primary communication with the
 # makefile configuration.  See Makefile.kokkos
 
-set(KOKKOS_SETTINGS KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH})
+set(KOKKOS_SETTINGS KOKKOS_CMAKE=yes)
+set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH})
 set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_PATH=${KOKKOS_PATH})
 set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFIX})
 
@@ -158,7 +159,7 @@ if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "")
   set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS})
 endif()
 if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "")
-  set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=\"${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}\"")
+  set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}")
 endif()
 
 if (CMAKE_CXX_STANDARD)
diff --git a/packages/kokkos/config/test_all_sandia b/packages/kokkos/config/test_all_sandia
index 15e6049af..d94c38cbc 100755
--- a/packages/kokkos/config/test_all_sandia
+++ b/packages/kokkos/config/test_all_sandia
@@ -241,17 +241,16 @@ elif [ "$MACHINE" = "white" ]; then
 
   BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
   IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
-  CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
-  CUDA_MODULE_LIST2="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.3.0,ibm/xl/13.1.6"
+  CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.4.0,ibm/xl/16.1.0"
 
   # Don't do pthread on white.
   GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
 
   # Format: (compiler module-list build-list exe-name warning-flag)
   COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-             "ibm/13.1.6 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
-             "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
-             "cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+             "gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
+             "cuda/9.0.103 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
   )
 
   if [ -z "$ARCH_FLAG" ]; then
@@ -362,7 +361,7 @@ elif [ "$MACHINE" = "apollo" ]; then
                "gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
                "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
                "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
-               "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
+               "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread,OpenMP" clang++ $CUDA_WARNING_FLAGS"
                "cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
     )
   else
diff --git a/packages/kokkos/containers/src/Kokkos_DualView.hpp b/packages/kokkos/containers/src/Kokkos_DualView.hpp
index 548e96d25..adba0c415 100644
--- a/packages/kokkos/containers/src/Kokkos_DualView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_DualView.hpp
@@ -96,6 +96,7 @@ template< class DataType ,
           class Arg3Type = void>
 class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
 {
+template< class , class , class , class > friend class DualView ;
 public:
   //! \name Typedefs for device types and various Kokkos::View specializations.
   //@{
@@ -182,8 +183,20 @@ public:
   //! \name Counters to keep track of changes ("modified" flags)
   //@{
 
-  View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device;
-  View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host;
+#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+protected:
+  // modified_flags[0] -> host
+  // modified_flags[1] -> device
+  typedef View<unsigned int[2],LayoutLeft,Kokkos::HostSpace> t_modified_flags;
+  t_modified_flags modified_flags;
+
+public:
+#else
+  typedef View<unsigned int[2],LayoutLeft,typename t_host::execution_space> t_modified_flags;
+  typedef View<unsigned int,LayoutLeft,typename t_host::execution_space> t_modified_flag;
+  t_modified_flags modified_flags;
+  t_modified_flag modified_host,modified_device;
+#endif
 
   //@}
   //! \name Constructors
@@ -194,10 +207,14 @@ public:
   /// Both device and host View objects are constructed using their
   /// default constructors.  The "modified" flags are both initialized
   /// to "unmodified."
-  DualView () :
-    modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
-    modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
-  {}
+#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+  DualView () = default;
+#else
+  DualView ():modified_flags (t_modified_flags("DualView::modified_flags")) {
+    modified_host = t_modified_flag(modified_flags,0);
+    modified_device = t_modified_flag(modified_flags,1);
+  }
+#endif
 
   /// \brief Constructor that allocates View objects on both host and device.
   ///
@@ -219,17 +236,24 @@ public:
             const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
     : d_view (label, n0, n1, n2, n3, n4, n5, n6, n7)
     , h_view (create_mirror_view (d_view)) // without UVM, host View mirrors
-    , modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device"))
-    , modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
-  {}
+    , modified_flags (t_modified_flags("DualView::modified_flags"))
+  {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    modified_host = t_modified_flag(modified_flags,0);
+    modified_device = t_modified_flag(modified_flags,1);
+#endif
+  }
 
   //! Copy constructor (shallow copy)
   template<class SS, class LS, class DS, class MS>
   DualView (const DualView<SS,LS,DS,MS>& src) :
     d_view (src.d_view),
     h_view (src.h_view),
-    modified_device (src.modified_device),
-    modified_host (src.modified_host)
+    modified_flags (src.modified_flags)
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    , modified_host(src.modified_host)
+    , modified_device(src.modified_device)
+#endif
   {}
 
   //! Subview constructor
@@ -241,8 +265,11 @@ public:
           )
     : d_view( Kokkos::subview( src.d_view , arg0 , args ... ) )
     , h_view( Kokkos::subview( src.h_view , arg0 , args ... ) )
-    , modified_device (src.modified_device)
-    , modified_host (src.modified_host)
+    , modified_flags (src.modified_flags)
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    , modified_host(src.modified_host)
+    , modified_device(src.modified_device)
+#endif
     {}
 
   /// \brief Create DualView from existing device and host View objects.
@@ -258,8 +285,7 @@ public:
   DualView (const t_dev& d_view_, const t_host& h_view_) :
     d_view (d_view_),
     h_view (h_view_),
-    modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
-    modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
+    modified_flags (t_modified_flags("DualView::modified_flags"))
   {
     if ( int(d_view.rank)     != int(h_view.rank) ||
          d_view.extent(0) != h_view.extent(0) ||
@@ -281,6 +307,10 @@ public:
          d_view.span()        != h_view.span() ) {
       Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views");
     }
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    modified_host = t_modified_flag(modified_flags,0);
+    modified_device = t_modified_flag(modified_flags,1);
+#endif
   }
 
   //@}
@@ -316,6 +346,30 @@ public:
     t_dev,
     t_host>::type& view () const
   {
+    #ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+    constexpr bool device_is_memspace  = std::is_same<Device,typename Device::memory_space>::value;
+    constexpr bool device_is_execspace = std::is_same<Device,typename Device::execution_space>::value;
+    constexpr bool device_exec_is_t_dev_exec  = std::is_same<typename Device::execution_space,typename t_dev::execution_space>::value;
+    constexpr bool device_mem_is_t_dev_mem    = std::is_same<typename Device::memory_space,typename t_dev::memory_space>::value;
+    constexpr bool device_exec_is_t_host_exec  = std::is_same<typename Device::execution_space,typename t_host::execution_space>::value;
+    constexpr bool device_mem_is_t_host_mem    = std::is_same<typename Device::memory_space,typename t_host::memory_space>::value;
+    constexpr bool device_is_t_host_device  = std::is_same<typename Device::execution_space,typename t_host::device_type>::value;
+    constexpr bool device_is_t_dev_device    = std::is_same<typename Device::memory_space,typename t_host::device_type>::value;
+
+    static_assert(
+        device_is_t_dev_device || device_is_t_host_device ||
+        (device_is_memspace  && (device_mem_is_t_dev_mem   || device_mem_is_t_host_mem) ) ||
+        (device_is_execspace && (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) ) ||
+        (
+          (!device_is_execspace && !device_is_memspace) && (
+            (device_mem_is_t_dev_mem   || device_mem_is_t_host_mem)  ||
+            (device_exec_is_t_dev_exec || device_exec_is_t_host_exec)
+          )
+        )
+        ,
+        "Template parameter to .view() must exactly match one of the DualView's device types or one of the execution or memory spaces");
+    #endif
+
     return Impl::if_c<
       std::is_same<
         typename t_dev::memory_space,
@@ -324,6 +378,72 @@ public:
       t_host >::select (d_view , h_view);
   }
 
+  KOKKOS_INLINE_FUNCTION
+  t_host view_host() const {
+    return h_view;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  t_dev view_device() const {
+    return d_view;
+  }
+
+  template<class Device>
+  static int get_device_side() {
+    constexpr bool device_is_memspace  = std::is_same<Device,typename Device::memory_space>::value;
+    constexpr bool device_is_execspace = std::is_same<Device,typename Device::execution_space>::value;
+    constexpr bool device_exec_is_t_dev_exec  = std::is_same<typename Device::execution_space,typename t_dev::execution_space>::value;
+    constexpr bool device_mem_is_t_dev_mem    = std::is_same<typename Device::memory_space,typename t_dev::memory_space>::value;
+    constexpr bool device_exec_is_t_host_exec  = std::is_same<typename Device::execution_space,typename t_host::execution_space>::value;
+    constexpr bool device_mem_is_t_host_mem    = std::is_same<typename Device::memory_space,typename t_host::memory_space>::value;
+    constexpr bool device_is_t_host_device  = std::is_same<typename Device::execution_space,typename t_host::device_type>::value;
+    constexpr bool device_is_t_dev_device    = std::is_same<typename Device::memory_space,typename t_host::device_type>::value;
+
+    #ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+    static_assert(
+        device_is_t_dev_device || device_is_t_host_device ||
+        (device_is_memspace  && (device_mem_is_t_dev_mem   || device_mem_is_t_host_mem) ) ||
+        (device_is_execspace && (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) ) ||
+        (
+          (!device_is_execspace && !device_is_memspace) && (
+            (device_mem_is_t_dev_mem   || device_mem_is_t_host_mem)  ||
+            (device_exec_is_t_dev_exec || device_exec_is_t_host_exec)
+          )
+        )
+        ,
+        "Template parameter to .sync() must exactly match one of the DualView's device types or one of the execution or memory spaces");
+    #endif
+
+    #ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+    int dev = -1;
+    #else
+    int dev = 0;
+    #endif
+    if(device_is_t_dev_device) dev = 1;
+    else if(device_is_t_host_device) dev = 0;
+    else {
+      if(device_is_memspace) {
+        if(device_mem_is_t_dev_mem) dev = 1;
+        if(device_mem_is_t_host_mem) dev = 0;
+        if(device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1;
+      }
+      if(device_is_execspace) {
+        if(device_exec_is_t_dev_exec) dev = 1;
+        if(device_exec_is_t_host_exec) dev = 0;
+        if(device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1;
+      }
+      if(!device_is_execspace && !device_is_memspace) {
+        if(device_mem_is_t_dev_mem) dev = 1;
+        if(device_mem_is_t_host_mem) dev = 0;
+        if(device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1;
+        if(device_exec_is_t_dev_exec) dev = 1;
+        if(device_exec_is_t_host_exec) dev = 0;
+        if(device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1;
+      }
+    }
+    return dev;
+  }
+
   /// \brief Update data on device or host only if data in the other
   ///   space has been marked as modified.
   ///
@@ -347,23 +467,20 @@ public:
         ( std::is_same< Device , int>::value)
         , int >::type& = 0)
   {
-    const unsigned int dev =
-      Impl::if_c<
-        std::is_same<
-          typename t_dev::memory_space,
-          typename Device::memory_space>::value ,
-        unsigned int,
-        unsigned int>::select (1, 0);
-
-    if (dev) { // if Device is the same as DualView's device type
-      if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
+    if(modified_flags.data()==NULL) return;
+
+    int dev = get_device_side<Device>();
+
+    if (dev == 1) { // if Device is the same as DualView's device type
+      if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) {
         deep_copy (d_view, h_view);
-        modified_host() = modified_device() = 0;
+        modified_flags(0) = modified_flags(1) = 0;
       }
-    } else { // hopefully Device is the same as DualView's host type
-      if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
+    }
+    if (dev == 0) { // hopefully Device is the same as DualView's host type
+      if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) {
         deep_copy (h_view, d_view);
-        modified_host() = modified_device() = 0;
+        modified_flags(0) = modified_flags(1) = 0;
       }
     }
     if(std::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
@@ -378,46 +495,71 @@ public:
       ( std::is_same< Device , int>::value)
       , int >::type& = 0 )
   {
-    const unsigned int dev =
-      Impl::if_c<
-        std::is_same<
-          typename t_dev::memory_space,
-          typename Device::memory_space>::value,
-        unsigned int,
-        unsigned int>::select (1, 0);
-    if (dev) { // if Device is the same as DualView's device type
-      if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
+    if(modified_flags.data()==NULL) return;
+
+    int dev = get_device_side<Device>();
+
+    if (dev == 1) { // if Device is the same as DualView's device type
+      if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) {
         Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
       }
-    } else { // hopefully Device is the same as DualView's host type
-      if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
+    }
+    if (dev == 0){ // hopefully Device is the same as DualView's host type
+      if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) {
         Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
       }
     }
   }
 
+  void sync_host() {
+    if( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value )
+      Impl::throw_runtime_exception("Calling sync_host on a DualView with a const datatype.");
+    if(modified_flags.data()==NULL) return;
+    if(modified_flags(1) > modified_flags(0)) {
+      deep_copy (h_view, d_view);
+      modified_flags(1) = modified_flags(0) = 0;
+    }
+  }
+
+  void sync_device() {
+    if( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value )
+      Impl::throw_runtime_exception("Calling sync_device on a DualView with a const datatype.");
+    if(modified_flags.data()==NULL) return;
+    if(modified_flags(0) > modified_flags(1)) {
+      deep_copy (d_view, h_view);
+      modified_flags(1) = modified_flags(0) = 0;
+    }
+  }
+
   template<class Device>
   bool need_sync() const
   {
-    const unsigned int dev =
-      Impl::if_c<
-        std::is_same<
-          typename t_dev::memory_space,
-          typename Device::memory_space>::value ,
-        unsigned int,
-        unsigned int>::select (1, 0);
-
-    if (dev) { // if Device is the same as DualView's device type
-      if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
+    if(modified_flags.data()==NULL) return false;
+    int dev = get_device_side<Device>();
+
+    if (dev == 1) { // if Device is the same as DualView's device type
+      if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) {
         return true;
       }
-    } else { // hopefully Device is the same as DualView's host type
-      if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
+    }
+    if (dev == 0){ // hopefully Device is the same as DualView's host type
+      if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) {
         return true;
       }
     }
     return false;
   }
+
+  inline bool need_sync_host() const {
+    if(modified_flags.data()==NULL) return false;
+    return modified_flags(0)<modified_flags(1);
+  }
+
+  inline bool need_sync_device() const {
+    if(modified_flags.data()==NULL) return false;
+    return modified_flags(1)<modified_flags(0);
+  }
+
   /// \brief Mark data as modified on the given device \c Device.
   ///
   /// If \c Device is the same as this DualView's device type, then
@@ -425,26 +567,22 @@ public:
   /// data as modified.
   template<class Device>
   void modify () {
-    const unsigned int dev =
-      Impl::if_c<
-        std::is_same<
-          typename t_dev::memory_space,
-          typename Device::memory_space>::value,
-        unsigned int,
-        unsigned int>::select (1, 0);
-
-    if (dev) { // if Device is the same as DualView's device type
+    if(modified_flags.data()==NULL) return;
+    int dev = get_device_side<Device>();
+
+    if (dev == 1) { // if Device is the same as DualView's device type
       // Increment the device's modified count.
-      modified_device () = (modified_device () > modified_host () ?
-                            modified_device () : modified_host ()) + 1;
-    } else { // hopefully Device is the same as DualView's host type
+      modified_flags(1) = (modified_flags(1) > modified_flags(0) ?
+                            modified_flags(1) : modified_flags(0)) + 1;
+    }
+    if (dev == 0) { // hopefully Device is the same as DualView's host type
       // Increment the host's modified count.
-      modified_host () = (modified_device () > modified_host () ?
-                          modified_device () : modified_host ())  + 1;
+      modified_flags(0) = (modified_flags(1) > modified_flags(0) ?
+                          modified_flags(1) : modified_flags(0))  + 1;
     }
 
 #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
-    if (modified_host() && modified_device()) {
+    if (modified_flags(0) && modified_flags(1)) {
       std::string msg = "Kokkos::DualView::modify ERROR: ";
       msg += "Concurrent modification of host and device views ";
       msg += "in DualView \"";
@@ -455,6 +593,45 @@ public:
 #endif
   }
 
+  inline void modify_host() {
+    if(modified_flags.data()!=NULL) {
+      modified_flags(0) = (modified_flags(1) > modified_flags(0) ?
+          modified_flags(1) : modified_flags(0))  + 1;
+      #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
+      if (modified_flags(0) && modified_flags(1)) {
+        std::string msg = "Kokkos::DualView::modify_host ERROR: ";
+        msg += "Concurrent modification of host and device views ";
+        msg += "in DualView \"";
+        msg += d_view.label();
+        msg += "\"\n";
+        Kokkos::abort(msg.c_str());
+      }
+    #endif
+    }
+  }
+
+  inline void modify_device() {
+    if(modified_flags.data()!=NULL) {
+      modified_flags(1) = (modified_flags(1) > modified_flags(0) ?
+          modified_flags(1) : modified_flags(0))  + 1;
+      #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
+      if (modified_flags(0) && modified_flags(1)) {
+        std::string msg = "Kokkos::DualView::modify_device ERROR: ";
+        msg += "Concurrent modification of host and device views ";
+        msg += "in DualView \"";
+        msg += d_view.label();
+        msg += "\"\n";
+        Kokkos::abort(msg.c_str());
+      }
+      #endif
+    }
+  }
+
+  inline void clear_sync_state() {
+    if(modified_flags.data()!=NULL) 
+      modified_flags(1) = modified_flags(0) = 0;
+  }
+
   //@}
   //! \name Methods for reallocating or resizing the View objects.
   //@{
@@ -476,7 +653,10 @@ public:
      h_view = create_mirror_view( d_view );
 
      /* Reset dirty flags */
-     modified_device() = modified_host() = 0;
+     if(modified_flags.data()==NULL) {
+       modified_flags = t_modified_flags("DualView::modified_flags");
+     } else
+       modified_flags(1) = modified_flags(0) = 0;
   }
 
   /// \brief Resize both views, copying old contents into new if necessary.
@@ -491,13 +671,16 @@ public:
            const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ,
            const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ,
            const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ) {
-   if(modified_device() >= modified_host()) {
+   if(modified_flags.data()==NULL) {
+     modified_flags = t_modified_flags("DualView::modified_flags");
+   }
+   if(modified_flags(1) >= modified_flags(0)) {
      /* Resize on Device */
      ::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
      h_view = create_mirror_view( d_view );
 
      /* Mark Device copy as modified */
-     modified_device() = modified_device()+1;
+     modified_flags(1) = modified_flags(1)+1;
 
    } else {
      /* Realloc on Device */
@@ -525,7 +708,7 @@ public:
      d_view = create_mirror_view( typename t_dev::execution_space(), h_view );
 
      /* Mark Host copy as modified */
-     modified_host() = modified_host()+1;
+     modified_flags(0) = modified_flags(0)+1;
    }
   }
 
@@ -649,7 +832,10 @@ void
 deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
            const DualView<ST,SL,SD,SM>& src )
 {
-  if (src.modified_device () >= src.modified_host ()) {
+  if(src.modified_flags.data()==NULL || dst.modified_flags.data()==NULL) {
+    return deep_copy(dst.d_view, src.d_view);
+  }
+  if (src.modified_flags(1) >= src.modified_flags(0)) {
     deep_copy (dst.d_view, src.d_view);
     dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
   } else {
@@ -666,7 +852,10 @@ deep_copy (const ExecutionSpace& exec ,
            DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
            const DualView<ST,SL,SD,SM>& src )
 {
-  if (src.modified_device () >= src.modified_host ()) {
+  if(src.modified_flags.data()==NULL || dst.modified_flags.data()==NULL) {
+    return deep_copy(exec, dst.d_view, src.d_view);
+  }
+  if (src.modified_flags(1) >= src.modified_flags(0)) {
     deep_copy (exec, dst.d_view, src.d_view);
     dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
   } else {
diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
index b30009a99..8be2c49a3 100644
--- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
+++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp
@@ -64,7 +64,7 @@ namespace Impl {
 template <typename Specialize>
 struct DynRankDimTraits {
 
-  enum : size_t{unspecified =KOKKOS_INVALID_INDEX};
+  enum : size_t{unspecified = KOKKOS_INVALID_INDEX};
 
   // Compute the rank of the view from the nonzero dimension arguments.
   KOKKOS_INLINE_FUNCTION
@@ -384,8 +384,8 @@ public:
     // Removed dimension checks...
 
       typedef typename DstType::offset_type  dst_offset_type ;
-      dst.m_map.m_offset = dst_offset_type(std::integral_constant<unsigned,0>() , src.layout() ); //Check this for integer input1 for padding, etc
-      dst.m_map.m_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_handle , src.m_track );
+      dst.m_map.m_impl_offset = dst_offset_type(std::integral_constant<unsigned,0>() , src.layout() ); //Check this for integer input1 for padding, etc
+      dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_impl_handle , src.m_track );
       dst.m_track.assign( src.m_track , DstTraits::is_managed );
       dst.m_rank = src.Rank ;
     }
@@ -565,10 +565,14 @@ public:
 
   //----------------------------------------
   // Allow specializations to query their specialized map
-
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   KOKKOS_INLINE_FUNCTION
   const Kokkos::Impl::ViewMapping< traits , void > &
   implementation_map() const { return m_map ; }
+#endif
+  KOKKOS_INLINE_FUNCTION
+  const Kokkos::Impl::ViewMapping< traits , void > &
+  impl_map() const { return m_map ; }
 
   //----------------------------------------
 
@@ -624,7 +628,7 @@ public:
   reference_type operator()() const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) )
-      return implementation_map().reference();
+      return impl_map().reference();
       //return m_map.reference(0,0,0,0,0,0,0);
     }
 
@@ -647,7 +651,7 @@ public:
   typename std::enable_if< !std::is_same<typename drvtraits::value_type, typename drvtraits::scalar_array_type>::value && std::is_integral<iType>::value, reference_type>::type
   operator[](const iType & i0) const
     {
-//      auto map = implementation_map();
+//      auto map = impl_map();
       const size_t dim_scalar = m_map.dimension_scalar();
       const size_t bytes = this->span() / dim_scalar;
 
@@ -785,7 +789,7 @@ public:
   reference_type access() const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) )
-      return implementation_map().reference();
+      return impl_map().reference();
       //return m_map.reference(0,0,0,0,0,0,0);
     }
 
@@ -1004,7 +1008,7 @@ public:
 
   //----------------------------------------
   // Allocation according to allocation properties and array layout
-  // unused arg_layout dimensions must be set toKOKKOS_INVALID_INDEX so that rank deduction can properly take place
+  // unused arg_layout dimensions must be set to KOKKOS_INVALID_INDEX so that rank deduction can properly take place
   template< class ... P >
   explicit inline
   DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
@@ -1179,7 +1183,7 @@ public:
     : DynRankView( Kokkos::Impl::ViewCtorProp< std::string >( arg_label )
     , typename traits::array_layout
           ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 )
-          )
+      )
     {}
 
   // For backward compatibility
@@ -1189,8 +1193,7 @@ public:
       , const typename traits::array_layout & arg_layout
       )
     : DynRankView( Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing )
-
-          , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout)
+                 , arg_layout
       )
     {}
 
@@ -1205,7 +1208,9 @@ public:
       , const size_t arg_N6 =KOKKOS_INVALID_INDEX
       , const size_t arg_N7 =KOKKOS_INVALID_INDEX
       )
-    : DynRankView(Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
+    : DynRankView(Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing )
+      , typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7)
+      )
     {}
 
   //----------------------------------------
@@ -1445,30 +1450,30 @@ public:
       ret_type dst ;
 
       const SubviewExtents< 7 , rank > extents =
-        ExtentGenerator< Args ... >::generator( src.m_map.m_offset.m_dim , args... ) ;
+        ExtentGenerator< Args ... >::generator( src.m_map.m_impl_offset.m_dim , args... ) ;
 
-      dst_offset_type tempdst( src.m_map.m_offset , extents ) ;
+      dst_offset_type tempdst( src.m_map.m_impl_offset , extents ) ;
 
       dst.m_track = src.m_track ;
 
-      dst.m_map.m_offset.m_dim.N0 = tempdst.m_dim.N0 ;
-      dst.m_map.m_offset.m_dim.N1 = tempdst.m_dim.N1 ;
-      dst.m_map.m_offset.m_dim.N2 = tempdst.m_dim.N2 ;
-      dst.m_map.m_offset.m_dim.N3 = tempdst.m_dim.N3 ;
-      dst.m_map.m_offset.m_dim.N4 = tempdst.m_dim.N4 ;
-      dst.m_map.m_offset.m_dim.N5 = tempdst.m_dim.N5 ;
-      dst.m_map.m_offset.m_dim.N6 = tempdst.m_dim.N6 ;
-
-      dst.m_map.m_offset.m_stride.S0 = tempdst.m_stride.S0 ;
-      dst.m_map.m_offset.m_stride.S1 = tempdst.m_stride.S1 ;
-      dst.m_map.m_offset.m_stride.S2 = tempdst.m_stride.S2 ;
-      dst.m_map.m_offset.m_stride.S3 = tempdst.m_stride.S3 ;
-      dst.m_map.m_offset.m_stride.S4 = tempdst.m_stride.S4 ;
-      dst.m_map.m_offset.m_stride.S5 = tempdst.m_stride.S5 ;
-      dst.m_map.m_offset.m_stride.S6 = tempdst.m_stride.S6 ;
-
-      dst.m_map.m_handle = dst_handle_type( src.m_map.m_handle +
-                                      src.m_map.m_offset( extents.domain_offset(0)
+      dst.m_map.m_impl_offset.m_dim.N0 = tempdst.m_dim.N0 ;
+      dst.m_map.m_impl_offset.m_dim.N1 = tempdst.m_dim.N1 ;
+      dst.m_map.m_impl_offset.m_dim.N2 = tempdst.m_dim.N2 ;
+      dst.m_map.m_impl_offset.m_dim.N3 = tempdst.m_dim.N3 ;
+      dst.m_map.m_impl_offset.m_dim.N4 = tempdst.m_dim.N4 ;
+      dst.m_map.m_impl_offset.m_dim.N5 = tempdst.m_dim.N5 ;
+      dst.m_map.m_impl_offset.m_dim.N6 = tempdst.m_dim.N6 ;
+
+      dst.m_map.m_impl_offset.m_stride.S0 = tempdst.m_stride.S0 ;
+      dst.m_map.m_impl_offset.m_stride.S1 = tempdst.m_stride.S1 ;
+      dst.m_map.m_impl_offset.m_stride.S2 = tempdst.m_stride.S2 ;
+      dst.m_map.m_impl_offset.m_stride.S3 = tempdst.m_stride.S3 ;
+      dst.m_map.m_impl_offset.m_stride.S4 = tempdst.m_stride.S4 ;
+      dst.m_map.m_impl_offset.m_stride.S5 = tempdst.m_stride.S5 ;
+      dst.m_map.m_impl_offset.m_stride.S6 = tempdst.m_stride.S6 ;
+
+      dst.m_map.m_impl_handle = dst_handle_type( src.m_map.m_impl_handle +
+                                      src.m_map.m_impl_offset( extents.domain_offset(0)
                                                   , extents.domain_offset(1)
                                                   , extents.domain_offset(2)
                                                   , extents.domain_offset(3)
@@ -1896,6 +1901,7 @@ inline
 typename DynRankView<T,P...>::HostMirror
 create_mirror( const DynRankView<T,P...> & src
              , typename std::enable_if<
+                 std::is_same< typename ViewTraits<T,P...>::specialize , void >::value &&
                  ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
                                , Kokkos::LayoutStride >::value
                >::type * = 0
@@ -1914,6 +1920,7 @@ inline
 typename DynRankView<T,P...>::HostMirror
 create_mirror( const DynRankView<T,P...> & src
              , typename std::enable_if<
+                 std::is_same< typename ViewTraits<T,P...>::specialize , void >::value &&
                  std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
                              , Kokkos::LayoutStride >::value
                >::type * = 0
@@ -1929,7 +1936,11 @@ create_mirror( const DynRankView<T,P...> & src
 
 // Create a mirror in a new space (specialization for different space)
 template<class Space, class T, class ... P>
-typename Impl::MirrorDRVType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::DynRankView<T,P...> & src) {
+typename Impl::MirrorDRVType<Space,T,P ...>::view_type
+create_mirror(const Space& , const Kokkos::DynRankView<T,P...> & src
+             , typename std::enable_if<
+                 std::is_same< typename ViewTraits<T,P...>::specialize , void >::value
+               >::type * = 0) {
   return typename Impl::MirrorDRVType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) );
 }
 
@@ -1985,6 +1996,29 @@ create_mirror_view(const Space& , const Kokkos::DynRankView<T,P...> & src
   return typename Impl::MirrorDRViewType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) );
 }
 
+// Create a mirror view and deep_copy in a new space (specialization for same space)
+template<class Space, class T, class ... P>
+typename Impl::MirrorDRViewType<Space,T,P ...>::view_type
+create_mirror_view_and_copy(const Space& , const Kokkos::DynRankView<T,P...> & src
+  , std::string const& name = ""
+  , typename std::enable_if<Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+  (void)name;
+  return src;
+}
+
+// Create a mirror view and deep_copy in a new space (specialization for different space)
+template<class Space, class T, class ... P>
+typename Impl::MirrorDRViewType<Space,T,P ...>::view_type
+create_mirror_view_and_copy(const Space& , const Kokkos::DynRankView<T,P...> & src
+  , std::string const& name = ""
+  , typename std::enable_if<!Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+  using Mirror = typename Impl::MirrorDRViewType<Space,T,P ...>::view_type;
+  std::string label = name.empty() ? src.label() : name;
+  auto mirror = Mirror( Kokkos::ViewAllocateWithoutInitializing(label), Impl::reconstructLayout(src.layout(), src.rank()) );
+  deep_copy(mirror, src);
+  return mirror;
+}
+
 } //end Kokkos
 
 
diff --git a/packages/kokkos/containers/src/Kokkos_OffsetView.hpp b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp
new file mode 100644
index 000000000..b614764ee
--- /dev/null
+++ b/packages/kokkos/containers/src/Kokkos_OffsetView.hpp
@@ -0,0 +1,1895 @@
+/*
+ * Kokkos_OffsetView.hpp
+ *
+ *  Created on: Apr 23, 2018
+ *      Author: swbova
+ */
+
+#ifndef KOKKOS_OFFSETVIEW_HPP_
+#define KOKKOS_OFFSETVIEW_HPP_
+
+
+#include <Kokkos_Core.hpp>
+
+#include <Kokkos_View.hpp>
+
+namespace Kokkos {
+
+   namespace Experimental {
+      //----------------------------------------------------------------------------
+      //----------------------------------------------------------------------------
+
+      template< class DataType , class ... Properties >
+      class OffsetView ;
+
+      template< class > struct is_offset_view : public std::false_type {};
+
+      template< class D, class ... P >
+      struct is_offset_view< OffsetView<D,P...> > : public std::true_type {};
+
+      template< class D, class ... P >
+      struct is_offset_view< const OffsetView<D,P...> > : public std::true_type {};
+
+#define KOKKOS_INVALID_OFFSET int64_t(0)
+#define KOKKOS_INVALID_INDEX_RANGE {KOKKOS_INVALID_OFFSET, KOKKOS_INVALID_OFFSET}
+
+      template <typename iType, typename std::enable_if< std::is_integral<iType>::value &&
+      std::is_signed<iType>::value, iType >::type = 0>
+      using IndexRange  = Kokkos::Array<iType, 2>;
+
+
+      using index_list_type = std::initializer_list<int64_t>;
+
+
+      //  template <typename iType,
+      //    typename std::enable_if< std::is_integral<iType>::value &&
+      //      std::is_signed<iType>::value, iType >::type = 0> using min_index_type = std::initializer_list<iType>;
+
+      namespace Impl {
+
+         template<class ViewType>
+         struct GetOffsetViewTypeFromViewType {
+
+            typedef OffsetView<typename ViewType::data_type,typename ViewType::array_layout,
+                  typename ViewType::device_type,typename ViewType::memory_traits> type;
+
+         };
+
+         template< unsigned , class MapType, class BeginsType >
+         KOKKOS_INLINE_FUNCTION
+         bool offsetview_verify_operator_bounds( const MapType &, const BeginsType & )
+         { return true ; }
+
+         template< unsigned R , class MapType , class BeginsType, class iType , class ... Args >
+         KOKKOS_INLINE_FUNCTION
+         bool offsetview_verify_operator_bounds
+         ( const MapType & map
+           , const BeginsType & begins
+           , const iType   & i
+           , Args ... args
+         )
+         {
+
+           const bool legalIndex =  ( int64_t(i) >=  begins[R]  ) &&
+               ( int64_t(i) <= int64_t(begins[R] + map.extent(R) - 1) );
+           return  legalIndex
+               && offsetview_verify_operator_bounds<R+1>( map , begins,  args ... );
+         }
+         template< unsigned , class MapType, class BeginsType >
+         inline
+         void offsetview_error_operator_bounds( char * , int , const MapType & , const BeginsType &)
+         {}
+
+         template< unsigned R , class MapType , class BeginsType , class iType , class ... Args >
+         inline
+         void offsetview_error_operator_bounds
+           ( char * buf
+           , int len
+           , const MapType & map
+           , const  BeginsType begins
+           , const iType   & i
+           , Args ... args
+           )
+         {
+           const int64_t b = begins[R];
+           const int64_t e = b + map.extent(R) - 1;
+           const int n =
+             snprintf(buf,len," %ld <= %ld <= %ld %c"
+                     , static_cast<unsigned long>(b)
+                     , static_cast<unsigned long>(i)
+                     , static_cast<unsigned long>(e)
+                     , ( sizeof...(Args) ? ',' : ')' )
+                     );
+           offsetview_error_operator_bounds<R+1>(buf+n,len-n,map,begins,args...);
+         }
+
+         template< class MemorySpace , class MapType , class BeginsType, class ... Args >
+         KOKKOS_INLINE_FUNCTION
+         void offsetview_verify_operator_bounds
+           ( Kokkos::Impl::SharedAllocationTracker const & tracker
+           , const MapType & map , const BeginsType & begins, Args ... args )
+         {
+           if ( ! offsetview_verify_operator_bounds<0>( map , begins, args ... ) ) {
+#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+              enum { LEN = 1024 };
+              char buffer[ LEN ];
+              const std::string label = tracker.template get_label<MemorySpace>();
+              int n = snprintf(buffer,LEN,"OffsetView bounds error of view labeled %s (",label.c_str());
+              offsetview_error_operator_bounds<0>( buffer + n , LEN - n , map ,begins, args ... );
+              Kokkos::Impl::throw_runtime_exception(std::string(buffer));
+#else
+              /* Check #1: is there a SharedAllocationRecord?
+                (we won't use it, but if its not there then there isn't
+                 a corresponding SharedAllocationHeader containing a label).
+                This check should cover the case of Views that don't
+                have the Unmanaged trait but were initialized by pointer. */
+              if (tracker.has_record()) {
+		Kokkos::Impl::operator_bounds_error_on_device<MapType>(
+			      map, Kokkos::Impl::has_printable_label_typedef<MapType>());
+              } else {
+                 Kokkos::abort("OffsetView bounds error");
+              }
+#endif
+           }
+         }
+
+#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+         KOKKOS_INLINE_FUNCTION
+         void runtime_check_rank_host(const size_t rank_dynamic, const size_t rank,
+               const index_list_type minIndices, const std::string & label)
+         {
+            bool isBad = false;
+            std::string message = "Kokkos::Experimental::OffsetView ERROR: for OffsetView labeled '" + label + "':";
+            if (rank_dynamic != rank) {
+               message += "The full rank must be the same as the dynamic rank. full rank = ";
+               message += std::to_string(rank) + " dynamic rank = " + std::to_string(rank_dynamic) + "\n";
+               isBad = true;
+            }
+
+            size_t numOffsets = 0;
+            for(size_t i = 0; i < minIndices.size(); ++i ){
+               if( minIndices.begin()[i] != -KOKKOS_INVALID_OFFSET) numOffsets++;
+            }
+            if (numOffsets != rank_dynamic) {
+               message += "The number of offsets provided ( " + std::to_string(numOffsets) +
+                     " ) must equal the dynamic rank ( " + std::to_string(rank_dynamic) + " ).";
+               isBad = true;
+            }
+
+            if(isBad) Kokkos::abort(message.c_str());
+         }
+#endif
+
+         KOKKOS_INLINE_FUNCTION
+         void runtime_check_rank_device(const size_t rank_dynamic, const size_t rank,
+               const index_list_type minIndices)
+         {
+            if (rank_dynamic != rank) {
+               Kokkos::abort("The full rank of an OffsetView must be the same as the dynamic rank.");
+            }
+            size_t numOffsets = 0;
+            for(size_t i = 0; i < minIndices.size(); ++i ){
+               if( minIndices.begin()[i] != -KOKKOS_INVALID_OFFSET) numOffsets++;
+            }
+            if (numOffsets != rank) {
+               Kokkos::abort("The number of offsets provided to an OffsetView constructor must equal the dynamic rank.");
+            }
+
+         }
+      }
+
+      template< class DataType , class ... Properties >
+      class OffsetView : public ViewTraits< DataType , Properties ... > {
+      public:
+
+         typedef ViewTraits< DataType , Properties ... > traits ;
+
+
+
+      private:
+
+         template< class , class ... > friend class OffsetView ;
+         template< class , class ... > friend class View ;  //FIXME delete this line
+         template< class , class ... > friend class Kokkos::Impl::ViewMapping ;
+
+
+         typedef Kokkos::Impl::ViewMapping< traits , void > map_type ;
+         typedef Kokkos::Impl::SharedAllocationTracker      track_type ;
+      public:
+         enum { Rank = map_type::Rank };
+         typedef Kokkos::Array<int64_t, Rank>  begins_type ;
+
+
+         template <typename iType, typename std::enable_if< std::is_integral<iType>::value, iType>::type = 0>
+         KOKKOS_INLINE_FUNCTION
+         int64_t begin(const iType dimension) const {
+            return dimension < Rank ? m_begins[dimension] : 0;
+         }
+
+         KOKKOS_INLINE_FUNCTION
+         begins_type begins() const { return m_begins;}
+
+         template <typename iType, typename std::enable_if< std::is_integral<iType>::value, iType>::type = 0>
+         KOKKOS_INLINE_FUNCTION
+         int64_t end(const iType dimension) const {return begin(dimension) + m_map.extent(dimension);}
+
+
+      private:
+         track_type  m_track ;
+         map_type    m_map ;
+         begins_type  m_begins;
+
+      public:
+         //----------------------------------------
+         /** \brief  Compatible view of array of scalar types */
+         typedef OffsetView< typename traits::scalar_array_type ,
+               typename traits::array_layout ,
+               typename traits::device_type ,
+               typename traits::memory_traits >
+         array_type ;
+
+         /** \brief  Compatible view of const data type */
+         typedef OffsetView< typename traits::const_data_type ,
+               typename traits::array_layout ,
+               typename traits::device_type ,
+               typename traits::memory_traits >
+         const_type ;
+
+         /** \brief  Compatible view of non-const data type */
+         typedef OffsetView< typename traits::non_const_data_type ,
+               typename traits::array_layout ,
+               typename traits::device_type ,
+               typename traits::memory_traits >
+         non_const_type ;
+
+         /** \brief  Compatible HostMirror view */
+         typedef OffsetView< typename traits::non_const_data_type ,
+               typename traits::array_layout ,
+               typename traits::host_mirror_space >
+         HostMirror ;
+
+         //----------------------------------------
+         // Domain rank and extents
+
+         /** \brief rank() to be implemented
+          */
+         //KOKKOS_INLINE_FUNCTION
+         //static
+         //constexpr unsigned rank() { return map_type::Rank; }
+
+         template< typename iType >
+         KOKKOS_INLINE_FUNCTION constexpr
+         typename std::enable_if< std::is_integral<iType>::value , size_t >::type
+         extent( const iType & r ) const
+         { return m_map.extent(r); }
+
+         template< typename iType >
+         KOKKOS_INLINE_FUNCTION constexpr
+         typename std::enable_if< std::is_integral<iType>::value , int >::type
+         extent_int( const iType & r ) const
+         { return static_cast<int>(m_map.extent(r)); }
+
+         KOKKOS_INLINE_FUNCTION constexpr
+         typename traits::array_layout layout() const
+         { return m_map.layout(); }
+
+
+         KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.dimension_0() *
+               m_map.dimension_1() *
+               m_map.dimension_2() *
+               m_map.dimension_3() *
+               m_map.dimension_4() *
+               m_map.dimension_5() *
+               m_map.dimension_6() *
+               m_map.dimension_7(); }
+
+         KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); }
+         KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); }
+         KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); }
+         KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); }
+         KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); }
+         KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); }
+         KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); }
+         KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); }
+
+         template< typename iType >
+         KOKKOS_INLINE_FUNCTION constexpr
+         typename std::enable_if< std::is_integral<iType>::value , size_t >::type
+         stride(iType r) const {
+            return (r == 0 ? m_map.stride_0() :
+                  (r == 1 ? m_map.stride_1() :
+                        (r == 2 ? m_map.stride_2() :
+                              (r == 3 ? m_map.stride_3() :
+                                    (r == 4 ? m_map.stride_4() :
+                                          (r == 5 ? m_map.stride_5() :
+                                                (r == 6 ? m_map.stride_6() :
+                                                      m_map.stride_7())))))));
+         }
+
+         template< typename iType >
+         KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); }
+
+         //----------------------------------------
+         // Range span is the span which contains all members.
+
+         typedef typename map_type::reference_type  reference_type ;
+         typedef typename map_type::pointer_type    pointer_type ;
+
+         enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value };
+
+         KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); }
+         KOKKOS_INLINE_FUNCTION bool span_is_contiguous() const { return m_map.span_is_contiguous(); }
+         KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); }
+
+         //----------------------------------------
+         // Allow specializations to query their specialized map
+
+         KOKKOS_INLINE_FUNCTION
+         const Kokkos::Impl::ViewMapping< traits , void > &
+         implementation_map() const { return m_map ; }
+
+         //----------------------------------------
+
+      private:
+
+         enum {
+            is_layout_left = std::is_same< typename traits::array_layout
+            , Kokkos::LayoutLeft >::value ,
+
+            is_layout_right = std::is_same< typename traits::array_layout
+            , Kokkos::LayoutRight >::value ,
+
+            is_layout_stride = std::is_same< typename traits::array_layout
+            , Kokkos::LayoutStride >::value ,
+
+            is_default_map =
+                  std::is_same< typename traits::specialize , void >::value &&
+                  ( is_layout_left || is_layout_right || is_layout_stride )
+         };
+
+         template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space
+               { KOKKOS_FORCEINLINE_FUNCTION static void check() {} };
+
+         template< class Space > struct verify_space<Space,false>
+         { KOKKOS_FORCEINLINE_FUNCTION static void check()
+         { Kokkos::abort("Kokkos::View ERROR: attempt to access inaccessible memory space");
+         };
+         };
+
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+
+#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( ARG ) \
+      OffsetView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
+      Kokkos::Experimental::Impl::offsetview_verify_operator_bounds< typename traits::memory_space > ARG ;
+
+#else
+
+#define KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( ARG ) \
+      OffsetView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
+
+#endif
+      public:
+
+         //------------------------------
+         // Rank 0 operator()
+
+         KOKKOS_FORCEINLINE_FUNCTION
+         reference_type
+         operator()() const
+         {
+            return m_map.reference();
+         }
+         //------------------------------
+         // Rank 1 operator()
+
+
+         template< typename I0>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0>::value
+               && ( 1 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0) const
+         {
+
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) )
+                          const size_t j0 = i0 - m_begins[0];
+            return m_map.reference(j0);
+         }
+
+         template< typename I0>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0>::value
+               && ( 1 == Rank )
+               && is_default_map
+               && ! is_layout_stride
+         ), reference_type >::type
+         operator()( const I0 & i0 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) )
+                          const size_t j0 = i0 - m_begins[0];
+            return m_map.m_impl_handle[ j0 ];
+         }
+
+         template< typename I0 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0>::value
+               && ( 1 == Rank )
+               && is_default_map
+               && is_layout_stride
+         ), reference_type >::type
+         operator()( const I0 & i0) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) )
+                          const size_t j0 = i0 - m_begins[0];
+            return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * j0 ];
+         }
+         //------------------------------
+         // Rank 1 operator[]
+
+         template< typename I0 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0>::value
+               && ( 1 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator[]( const I0 & i0 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) )
+                          const size_t j0 = i0 - m_begins[0];
+            return m_map.reference(j0);
+         }
+
+         template< typename I0 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0>::value
+               && ( 1 == Rank )
+               && is_default_map
+               && ! is_layout_stride
+         ), reference_type >::type
+         operator[]( const I0 & i0 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) )
+                          const size_t j0 = i0 - m_begins[0];
+            return m_map.m_impl_handle[ j0 ];
+         }
+
+         template< typename I0 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0>::value
+               && ( 1 == Rank )
+               && is_default_map
+               && is_layout_stride
+         ), reference_type >::type
+         operator[]( const I0 & i0 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0) )
+                          const size_t j0 = i0 - m_begins[0];
+            return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * j0 ];
+         }
+
+
+         //------------------------------
+         // Rank 2
+
+         template< typename I0 , typename I1 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1>::value
+               && ( 2 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            return m_map.reference(j0,j1);
+         }
+
+         template< typename I0 , typename I1 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1>::value
+               && ( 2 == Rank )
+               && is_default_map
+               && is_layout_left && ( traits::rank_dynamic == 0 )
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            return m_map.m_impl_handle[ j0 + m_map.m_impl_offset.m_dim.N0 * j1 ];
+         }
+
+         template< typename I0 , typename I1>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1>::value
+               && ( 2 == Rank )
+               && is_default_map
+               && is_layout_left && ( traits::rank_dynamic != 0 )
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            return m_map.m_impl_handle[ j0 + m_map.m_impl_offset.m_stride * j1 ];
+         }
+
+         template< typename I0 , typename I1 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1>::value
+               && ( 2 == Rank )
+               && is_default_map
+               && is_layout_right && ( traits::rank_dynamic == 0 )
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            return m_map.m_impl_handle[ j1 + m_map.m_impl_offset.m_dim.N1 * j0 ];
+         }
+
+         template< typename I0 , typename I1 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1>::value
+               && ( 2 == Rank )
+               && is_default_map
+               && is_layout_right && ( traits::rank_dynamic != 0 )
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            return m_map.m_impl_handle[ j1 + m_map.m_impl_offset.m_stride * j0 ];
+         }
+
+         template< typename I0 , typename I1>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1>::value
+               && ( 2 == Rank )
+               && is_default_map
+               && is_layout_stride
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            return m_map.m_impl_handle[ j0 * m_map.m_impl_offset.m_stride.S0 +
+                                        j1 * m_map.m_impl_offset.m_stride.S1 ];
+         }
+
+         //------------------------------
+         // Rank 3
+
+         template< typename I0 , typename I1 , typename I2 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2>::value
+               && ( 3 == Rank )
+               && is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            return m_map.m_impl_handle[ m_map.m_impl_offset(j0, j1, j2) ];
+         }
+
+         template< typename I0 , typename I1 , typename I2>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2>::value
+               && ( 3 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            return m_map.reference(j0, j1, j2);
+         }
+
+         //------------------------------
+         // Rank 4
+
+         template< typename I0 , typename I1 , typename I2 , typename I3>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3>::value
+               && ( 4 == Rank )
+               && is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2, i3) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            return m_map.m_impl_handle[ m_map.m_impl_offset(j0,j1,j2,j3) ];
+         }
+
+         template< typename I0 , typename I1 , typename I2 , typename I3 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3>::value
+               && ( 4 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2, i3) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            return m_map.reference(j0,j1,j2,j3);
+         }
+
+         //------------------------------
+         // Rank 5
+
+         template< typename I0 , typename I1 , typename I2 , typename I3
+         , typename I4>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4>::value
+               && ( 5 == Rank )
+               && is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+               , const I4 & i4 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2, i3, i4) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            const size_t j4 = i4 - m_begins[4];
+            return m_map.m_impl_handle[ m_map.m_impl_offset(j0, j1,j2, j3, j4) ];
+         }
+
+         template< typename I0 , typename I1 , typename I2 , typename I3
+         , typename I4>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4>::value
+               && ( 5 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+               , const I4 & i4) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map, m_begins, i0,i1, i2, i3, i4) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            const size_t j4 = i4 - m_begins[4];
+            return m_map.reference(j0,j1,j2,j3,j4);
+         }
+
+         //------------------------------
+         // Rank 6
+
+         template< typename I0 , typename I1 , typename I2 , typename I3
+         , typename I4 , typename I5 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5>::value
+               && ( 6 == Rank )
+               && is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+               , const I4 & i4 , const I5 & i5 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            const size_t j4 = i4 - m_begins[4];
+            const size_t j5 = i5 - m_begins[5];
+            return m_map.m_impl_handle[ m_map.m_impl_offset(j0,j1,j2,j3,j4,j5) ];
+         }
+
+         template< typename I0 , typename I1 , typename I2 , typename I3
+         , typename I4 , typename I5>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5>::value
+               && ( 6 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+               , const I4 & i4 , const I5 & i5) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            const size_t j4 = i4 - m_begins[4];
+            const size_t j5 = i5 - m_begins[5];
+            return m_map.reference(j0,j1,j2,j3,j4,j5);
+         }
+
+         //------------------------------
+         // Rank 7
+
+         template< typename I0 , typename I1 , typename I2 , typename I3
+         , typename I4 , typename I5 , typename I6>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6>::value
+               && ( 7 == Rank )
+               && is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+               , const I4 & i4 , const I5 & i5 , const I6 & i6) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5, i6) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            const size_t j4 = i4 - m_begins[4];
+            const size_t j5 = i5 - m_begins[5];
+            const size_t j6 = i6 - m_begins[6];
+            return m_map.m_impl_handle[ m_map.m_impl_offset(j0,j1,j2,j3,j4,j5,j6) ];
+         }
+
+         template< typename I0 , typename I1 , typename I2 , typename I3
+         , typename I4 , typename I5 , typename I6 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6>::value
+               && ( 7 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+               , const I4 & i4 , const I5 & i5 , const I6 & i6) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5, i6) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            const size_t j4 = i4 - m_begins[4];
+            const size_t j5 = i5 - m_begins[5];
+            const size_t j6 = i6 - m_begins[6];
+            return m_map.reference(j0,j1,j2,j3,j4,j5,j6);
+         }
+
+         //------------------------------
+         // Rank 8
+
+         template< typename I0 , typename I1 , typename I2 , typename I3
+         , typename I4 , typename I5 , typename I6 , typename I7 >
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7>::value
+               && ( 8 == Rank )
+               && is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+               , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5, i6, i7) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            const size_t j4 = i4 - m_begins[4];
+            const size_t j5 = i5 - m_begins[5];
+            const size_t j6 = i6 - m_begins[6];
+            const size_t j7 = i7 - m_begins[7];
+            return m_map.m_impl_handle[ m_map.m_impl_offset(j0,j1,j2,j3,j4,j5,j6,j7) ];
+         }
+
+         template< typename I0 , typename I1 , typename I2 , typename I3
+         , typename I4 , typename I5 , typename I6 , typename I7>
+         KOKKOS_FORCEINLINE_FUNCTION
+         typename std::enable_if<
+         ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7>::value
+               && ( 8 == Rank )
+               && ! is_default_map
+         ), reference_type >::type
+         operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+               , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
+         {
+            KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY( (m_track,m_map,m_begins, i0,i1, i2, i3, i4, i5, i6, i7) )
+                          const size_t j0 = i0 - m_begins[0];
+            const size_t j1 = i1 - m_begins[1];
+            const size_t j2 = i2 - m_begins[2];
+            const size_t j3 = i3 - m_begins[3];
+            const size_t j4 = i4 - m_begins[4];
+            const size_t j5 = i5 - m_begins[5];
+            const size_t j6 = i6 - m_begins[6];
+            const size_t j7 = i7 - m_begins[7];
+            return m_map.reference(j0,j1,j2,j3,j4,j5,j6,j7);
+         }
+
+
+#undef KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY
+
+         //----------------------------------------
+         // Standard destructor, constructors, and assignment operators
+
+         KOKKOS_INLINE_FUNCTION
+         ~OffsetView() {}
+
+         KOKKOS_INLINE_FUNCTION
+         OffsetView() : m_track(), m_map() {
+
+            for(size_t i = 0; i < Rank; ++i) m_begins[i] = KOKKOS_INVALID_INDEX;
+         }
+
+         KOKKOS_INLINE_FUNCTION
+         OffsetView( const OffsetView & rhs ) : m_track( rhs.m_track, traits::is_managed ), m_map( rhs.m_map ),
+         m_begins(rhs.m_begins) {}
+
+         KOKKOS_INLINE_FUNCTION
+         OffsetView( OffsetView && rhs ) : m_track( std::move(rhs.m_track) ),
+         m_map( std::move(rhs.m_map)), m_begins(std::move(rhs.m_begins)) {}
+
+         KOKKOS_INLINE_FUNCTION
+         OffsetView & operator = ( const OffsetView & rhs ) {
+            m_track = rhs.m_track ;
+            m_map = rhs.m_map ;
+            m_begins = rhs.m_begins;
+            return *this ;
+         }
+
+         KOKKOS_INLINE_FUNCTION
+         OffsetView & operator = ( OffsetView && rhs ) {
+            m_track = std::move(rhs.m_track) ;
+            m_map = std::move(rhs.m_map) ;
+            m_begins = std::move(rhs.m_begins) ;
+            return *this ;
+         }
+
+         //interoperability with View
+      private:
+         typedef View< typename traits::scalar_array_type ,
+               typename traits::array_layout ,
+               typename traits::device_type ,
+               typename traits::memory_traits > view_type;
+      public:
+
+         KOKKOS_INLINE_FUNCTION
+         view_type view() const {
+
+            view_type v(m_track, m_map);
+            return v ;
+         }
+
+         template<class RT, class ... RP>
+         KOKKOS_INLINE_FUNCTION
+         OffsetView( const View<RT, RP...> & aview) :
+         m_track(aview.impl_track()), m_map(){
+
+            typedef typename OffsetView<RT,RP...>::traits  SrcTraits ;
+            typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
+            static_assert( Mapping::is_assignable , "Incompatible OffsetView copy construction" );
+            Mapping::assign( m_map , aview.impl_map() , m_track );
+
+            for (int i = 0; i < aview.Rank; ++i) {
+               m_begins[i] = 0;
+            }
+         }
+
+         template<class RT, class ... RP>
+         KOKKOS_INLINE_FUNCTION
+         OffsetView( const View<RT, RP...> & aview
+               ,const index_list_type & minIndices) :
+               m_track(aview.impl_track()), m_map(){
+
+            typedef typename OffsetView<RT,RP...>::traits  SrcTraits ;
+            typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
+            static_assert( Mapping::is_assignable , "Incompatible OffsetView copy construction" );
+            Mapping::assign( m_map , aview.impl_map() , m_track );
+
+#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+            Kokkos::Experimental::Impl::runtime_check_rank_host(traits::rank_dynamic, Rank, minIndices, label());
+#else
+            Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic, Rank, minIndices);
+
+#endif
+
+            for (size_t i = 0; i < minIndices.size(); ++i) {
+               m_begins[i] = minIndices.begin()[i];
+            }
+         }
+         template<class RT, class ... RP>
+         KOKKOS_INLINE_FUNCTION
+         OffsetView( const View<RT, RP...> & aview
+               ,const begins_type & beg) :
+               m_track(aview.impl_track()), m_map(), m_begins(beg){
+
+            typedef typename OffsetView<RT,RP...>::traits  SrcTraits ;
+            typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
+            static_assert( Mapping::is_assignable , "Incompatible OffsetView copy construction" );
+            Mapping::assign( m_map , aview.impl_map() , m_track );
+
+
+            //#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+            //        Kokkos::Experimental::Impl::runtime_check_rank_host(traits::rank_dynamic, Rank, minIndices, label());
+            //#else
+            //        Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic, Rank, minIndices);
+            //
+            //#endif
+
+         }
+
+         // may assign unmanaged from managed.
+
+
+         template< class RT , class ... RP >
+         KOKKOS_INLINE_FUNCTION
+         OffsetView( const OffsetView<RT,RP...> & rhs )
+         : m_track( rhs.m_track , traits::is_managed )
+         , m_map()
+         , m_begins(rhs.m_begins)
+         {
+            typedef typename OffsetView<RT,RP...>::traits  SrcTraits ;
+            typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void >  Mapping ;
+            static_assert( Mapping::is_assignable , "Incompatible OffsetView copy construction" );
+            Mapping::assign( m_map , rhs.m_map , rhs.m_track );  //swb what about assign?
+         }
+
+
+         //----------------------------------------
+         // Allocation tracking properties
+         KOKKOS_INLINE_FUNCTION
+         int use_count() const
+         { return m_track.use_count(); }
+
+         inline
+         const std::string label() const
+         { return m_track.template get_label< typename traits::memory_space >(); }
+
+
+         template< typename Label>
+         explicit inline
+         OffsetView( const Label & arg_label
+               ,typename std::enable_if<Kokkos::Impl::is_view_label<Label>::value , const index_list_type >::type
+               range0
+               ,const index_list_type range1 = KOKKOS_INVALID_INDEX_RANGE
+               ,const index_list_type range2 = KOKKOS_INVALID_INDEX_RANGE
+               ,const index_list_type range3 = KOKKOS_INVALID_INDEX_RANGE
+               ,const index_list_type range4 = KOKKOS_INVALID_INDEX_RANGE
+               ,const index_list_type range5 = KOKKOS_INVALID_INDEX_RANGE
+               ,const index_list_type range6 = KOKKOS_INVALID_INDEX_RANGE
+               ,const index_list_type range7 = KOKKOS_INVALID_INDEX_RANGE
+
+         ) : OffsetView( Kokkos::Impl::ViewCtorProp< std::string >( arg_label ),
+               typename traits::array_layout
+               ( range0.begin()[1] - range0.begin()[0] + 1, range1.begin()[1] - range1.begin()[0] + 1 ,
+                     range2.begin()[1] - range2.begin()[0] + 1, range3.begin()[1] - range3.begin()[0] + 1,
+                     range4.begin()[1] - range4.begin()[0] + 1, range5.begin()[1] - range5.begin()[0] + 1 ,
+                     range6.begin()[1] - range6.begin()[0] + 1, range7.begin()[1] - range7.begin()[0] + 1 ),
+                     {range0.begin()[0], range1.begin()[0], range2.begin()[0], range3.begin()[0], range4.begin()[0],
+                           range5.begin()[0], range6.begin()[0], range7.begin()[0] })
+         {
+
+         }
+
+
+
+         template<class ... P >
+         explicit KOKKOS_INLINE_FUNCTION
+         OffsetView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
+               ,typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer , typename traits::array_layout >::type const & arg_layout
+               ,const index_list_type minIndices
+         )
+         : m_track() // No memory tracking
+         , m_map( arg_prop , arg_layout )
+         {
+
+
+            for (size_t i = 0; i < minIndices.size(); ++i) {
+               m_begins[i] = minIndices.begin()[i];
+            }
+            static_assert(
+                  std::is_same< pointer_type
+                  , typename Kokkos::Impl::ViewCtorProp< P... >::pointer_type
+                  >::value ,
+                  "When constructing OffsetView to wrap user memory, you must supply matching pointer type" );
+         }
+
+         template<class ... P >
+         explicit inline
+         OffsetView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
+               , typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer , typename traits::array_layout>::type const & arg_layout
+               ,const index_list_type minIndices
+         )
+         : m_track()
+         , m_map()
+
+         {
+
+            for(size_t i = 0; i < Rank; ++i)
+               m_begins[i] = minIndices.begin()[i];
+
+            // Append layout and spaces if not input
+            typedef Kokkos::Impl::ViewCtorProp< P ... > alloc_prop_input ;
+
+            // use 'std::integral_constant<unsigned,I>' for non-types
+            // to avoid duplicate class error.
+            typedef Kokkos::Impl::ViewCtorProp
+                  < P ..., typename std::conditional < alloc_prop_input::has_label
+                  , std::integral_constant<unsigned,0>, typename std::string >::type
+                  , typename std::conditional
+                  < alloc_prop_input::has_memory_space
+                  , std::integral_constant<unsigned,1>
+            , typename traits::device_type::memory_space
+            >::type
+            , typename std::conditional
+            < alloc_prop_input::has_execution_space
+            , std::integral_constant<unsigned,2>
+            , typename traits::device_type::execution_space
+            >::type
+            > alloc_prop ;
+
+            static_assert( traits::is_managed
+                  , "OffsetView allocation constructor requires managed memory" );
+
+            if ( alloc_prop::initialize &&
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+                  ! alloc_prop::execution_space::is_initialized()
+#else
+            ! alloc_prop::execution_space::impl_is_initialized()
+#endif
+            ) {
+               // If initializing view data then
+               // the execution space must be initialized.
+               Kokkos::Impl::throw_runtime_exception("Constructing OffsetView and initializing data with uninitialized execution space");
+            }
+
+            // Copy the input allocation properties with possibly defaulted properties
+            alloc_prop prop( arg_prop );
+
+            //------------------------------------------------------------
+#if defined( KOKKOS_ENABLE_CUDA )
+            // If allocating in CudaUVMSpace must fence before and after
+            // the allocation to protect against possible concurrent access
+            // on the CPU and the GPU.
+            // Fence using the trait's executon space (which will be Kokkos::Cuda)
+            // to avoid incomplete type errors from usng Kokkos::Cuda directly.
+            if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
+               traits::device_type::memory_space::execution_space::fence();
+            }
+#endif
+            //------------------------------------------------------------
+
+            Kokkos::Impl::SharedAllocationRecord<> *
+            record = m_map.allocate_shared( prop , arg_layout );
+
+            //------------------------------------------------------------
+#if defined( KOKKOS_ENABLE_CUDA )
+            if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
+               traits::device_type::memory_space::execution_space::fence();
+            }
+#endif
+            //------------------------------------------------------------
+
+            // Setup and initialization complete, start tracking
+            m_track.assign_allocated_record_to_uninitialized( record );
+
+#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
+            Kokkos::Experimental::Impl::runtime_check_rank_host(traits::rank_dynamic, Rank, minIndices, label());
+#else
+            Kokkos::Experimental::Impl::runtime_check_rank_device(traits::rank_dynamic, Rank, minIndices);
+
+#endif
+
+         }
+
+
+      };
+
+
+
+      /** \brief Temporary free function rank()
+       *         until rank() is implemented
+       *         in the View
+       */
+      template < typename D , class ... P >
+      KOKKOS_INLINE_FUNCTION
+      constexpr unsigned rank( const OffsetView<D , P...> & V ) { return V.Rank; } //Temporary until added to view
+
+      //----------------------------------------------------------------------------
+      //----------------------------------------------------------------------------
+      namespace Impl {
+
+         template< class T >
+         KOKKOS_INLINE_FUNCTION
+         typename  std::enable_if< std::is_integral<T>::value, T>::type
+         shift_input(const T arg, const int64_t offset)
+         {
+            return arg - offset;
+         }
+
+         KOKKOS_INLINE_FUNCTION
+         Kokkos::Impl::ALL_t
+         shift_input(const Kokkos::Impl::ALL_t arg, const int64_t offset)
+         {
+            return arg;
+         }
+
+         template< class T >
+         KOKKOS_INLINE_FUNCTION
+         typename  std::enable_if< std::is_integral<T>::value, Kokkos::pair<T,T> >::type
+         shift_input(const Kokkos::pair<T, T> arg, const int64_t offset)
+         {
+
+            return Kokkos::make_pair<T,T>(arg.first - offset, arg.second - offset);
+
+         }
+         template< class T >
+	 inline
+         typename  std::enable_if< std::is_integral<T>::value, std::pair<T,T> >::type
+         shift_input(const std::pair<T, T> arg, const int64_t offset)
+         {
+
+            return std::make_pair<T,T>(arg.first - offset, arg.second - offset);
+
+         }
+
+         template <size_t N, class Arg, class A>
+         KOKKOS_INLINE_FUNCTION
+         void
+         map_arg_to_new_begin(const size_t i,
+               Kokkos::Array<int64_t, N> &subviewBegins, typename std::enable_if< N != 0, const Arg>::type shiftedArg,
+               const Arg arg, const A viewBegins, size_t & counter) {
+
+            if( !std::is_integral<Arg>::value) {
+               subviewBegins[counter] = shiftedArg == arg ? viewBegins[i] : 0;
+               counter++;
+            }
+         }
+
+         template <size_t N, class Arg, class A>
+         KOKKOS_INLINE_FUNCTION
+         void
+         map_arg_to_new_begin(const size_t i,
+               Kokkos::Array<int64_t, N> &subviewBegins, typename std::enable_if< N == 0, const Arg>::type shiftedArg,
+               const Arg arg, const A viewBegins, size_t & counter) {
+
+         }
+
+
+         template< class D, class ... P , class T >
+         KOKKOS_INLINE_FUNCTION
+         typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+         < void /* deduce subview type from source view traits */
+         , ViewTraits< D , P... >
+         , T
+         >::type >::type
+         subview_offset(const OffsetView< D, P... > & src, T arg) {
+
+            auto theView = src.view();
+            auto begins = src.begins();
+
+            T shiftedArg = shift_input(arg, begins[0]);
+
+            constexpr size_t rank = Kokkos::Impl::ViewMapping
+                  < void /* deduce subview type from source view traits */
+                  , ViewTraits< D , P... >
+            , T>::type::Rank;
+
+            auto theSubview = Kokkos::subview( theView , shiftedArg);
+
+            Kokkos::Array<int64_t, rank> subviewBegins;
+            size_t counter = 0;
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg, arg, begins, counter);
+
+            typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            , ViewTraits< D , P... > , T >::type >::type offsetView(theSubview, subviewBegins);
+
+            return offsetView;
+
+         }
+
+         template< class D, class ... P , class T0, class T1 >
+         KOKKOS_INLINE_FUNCTION
+         typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+         < void /* deduce subview type from source view traits */
+         , ViewTraits< D , P... >
+         , T0, T1
+         >::type >::type
+         subview_offset(const Kokkos::Experimental::OffsetView< D, P... > & src, T0 arg0, T1 arg1) {
+
+            auto theView = src.view();
+            auto begins = src.begins();
+
+            T0 shiftedArg0 = shift_input(arg0, begins[0]);
+            T1 shiftedArg1 = shift_input(arg1, begins[1]);
+
+            auto theSubview = Kokkos::subview(theView , shiftedArg0, shiftedArg1);
+            constexpr size_t rank = Kokkos::Impl::ViewMapping
+                  < void /* deduce subview type from source view traits */
+                  , ViewTraits< D , P... >
+            , T0, T1>::type::Rank;
+
+            Kokkos::Array<int64_t, rank> subviewBegins;
+            size_t counter = 0;
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter);
+
+            typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            , ViewTraits< D , P... > , T0, T1 >::type >::type offsetView(theSubview, subviewBegins);
+
+            return offsetView;
+
+         }
+
+         template< class D, class ... P , class T0, class T1, class T2 >
+         KOKKOS_INLINE_FUNCTION
+         typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+         < void /* deduce subview type from source view traits */
+         , ViewTraits< D , P... >
+         , T0, T1, T2
+         >::type >::type
+         subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2) {
+
+            auto theView = src.view();
+            auto begins = src.begins();
+
+            T0 shiftedArg0 = shift_input(arg0, begins[0]);
+            T1 shiftedArg1 = shift_input(arg1, begins[1]);
+            T2 shiftedArg2 = shift_input(arg2, begins[2]);
+
+            auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2);
+
+            constexpr size_t rank = Kokkos::Impl::ViewMapping
+                  < void /* deduce subview type from source view traits */
+                  , ViewTraits< D , P... >
+            , T0, T1, T2>::type::Rank;
+
+            Kokkos::Array<int64_t, rank> subviewBegins;
+
+            size_t counter = 0;
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter);
+
+            typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            , ViewTraits< D , P... > , T0, T1, T2 >::type >::type offsetView(theSubview, subviewBegins);
+
+            return offsetView;
+         }
+
+         template< class D, class ... P , class T0, class T1, class T2, class T3 >
+         KOKKOS_INLINE_FUNCTION
+         typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+         < void /* deduce subview type from source view traits */
+         , ViewTraits< D , P... >
+         , T0, T1, T2, T3
+         >::type >::type
+         subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3) {
+
+            auto theView = src.view();
+            auto begins = src.begins();
+
+            T0 shiftedArg0 = shift_input(arg0, begins[0]);
+            T1 shiftedArg1 = shift_input(arg1, begins[1]);
+            T2 shiftedArg2 = shift_input(arg2, begins[2]);
+            T3 shiftedArg3 = shift_input(arg3, begins[3]);
+
+            auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3);
+
+            constexpr size_t rank = Kokkos::Impl::ViewMapping
+                  < void /* deduce subview type from source view traits */
+                  , ViewTraits< D , P... >
+            , T0, T1, T2, T3>::type::Rank;
+            Kokkos::Array<int64_t, rank> subviewBegins;
+
+            size_t counter = 0;
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter);
+
+            typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            , ViewTraits< D , P... > , T0, T1, T2, T3 >::type >::type offsetView(theSubview, subviewBegins);
+
+            return offsetView;
+         }
+
+         template< class D, class ... P , class T0, class T1, class T2, class T3, class T4 >
+         KOKKOS_INLINE_FUNCTION
+         typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+         < void /* deduce subview type from source view traits */
+         , ViewTraits< D , P... >
+         , T0, T1, T2, T3, T4
+         >::type >::type
+         subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3, T4 arg4) {
+
+            auto theView = src.view();
+            auto begins = src.begins();
+
+            T0 shiftedArg0 = shift_input(arg0, begins[0]);
+            T1 shiftedArg1 = shift_input(arg1, begins[1]);
+            T2 shiftedArg2 = shift_input(arg2, begins[2]);
+            T3 shiftedArg3 = shift_input(arg3, begins[3]);
+            T4 shiftedArg4 = shift_input(arg4, begins[4]);
+
+            auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3, shiftedArg4);
+
+            constexpr size_t rank = Kokkos::Impl::ViewMapping
+                  < void /* deduce subview type from source view traits */
+                  , ViewTraits< D , P... >
+            , T0, T1, T2, T3, T4>::type::Rank;
+            Kokkos::Array<int64_t, rank> subviewBegins;
+
+            size_t counter = 0;
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(4, subviewBegins, shiftedArg4, arg4, begins, counter);
+
+            typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            , ViewTraits< D , P... > , T0, T1, T2, T3, T4 >::type >::type offsetView(theSubview, subviewBegins);
+
+            return offsetView;
+         }
+
+         template< class D, class ... P , class T0, class T1, class T2, class T3, class T4,
+         class T5 >
+         KOKKOS_INLINE_FUNCTION
+         typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+         < void /* deduce subview type from source view traits */
+         , ViewTraits< D , P... >
+         , T0, T1, T2, T3, T4, T5
+         >::type >::type
+         subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) {
+
+            auto theView = src.view();
+            auto begins = src.begins();
+
+            T0 shiftedArg0 = shift_input(arg0, begins[0]);
+            T1 shiftedArg1 = shift_input(arg1, begins[1]);
+            T2 shiftedArg2 = shift_input(arg2, begins[2]);
+            T3 shiftedArg3 = shift_input(arg3, begins[3]);
+            T4 shiftedArg4 = shift_input(arg4, begins[4]);
+            T5 shiftedArg5 = shift_input(arg5, begins[5]);
+
+            auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3, shiftedArg4, shiftedArg5);
+
+            constexpr size_t rank = Kokkos::Impl::ViewMapping
+                  < void /* deduce subview type from source view traits */
+                  , ViewTraits< D , P... >
+            , T0, T1, T2, T3, T4, T5>::type::Rank;
+
+            Kokkos::Array<int64_t, rank> subviewBegins;
+
+            size_t counter = 0;
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(4, subviewBegins, shiftedArg4, arg4, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(5, subviewBegins, shiftedArg5, arg5, begins, counter);
+
+            typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            , ViewTraits< D , P... > , T0, T1, T2, T3, T4, T5 >::type >::type offsetView(theSubview, subviewBegins);
+
+            return offsetView;
+         }
+         template< class D, class ... P , class T0, class T1, class T2, class T3, class T4,
+         class T5, class T6>
+         KOKKOS_INLINE_FUNCTION
+         typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+         < void /* deduce subview type from source view traits */
+         , ViewTraits< D , P... >
+         , T0, T1, T2, T3, T4, T5, T6
+         >::type >::type
+         subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5,
+               T6 arg6) {
+
+            auto theView = src.view();
+            auto begins = src.begins();
+
+            T0 shiftedArg0 = shift_input(arg0, begins[0]);
+            T1 shiftedArg1 = shift_input(arg1, begins[1]);
+            T2 shiftedArg2 = shift_input(arg2, begins[2]);
+            T3 shiftedArg3 = shift_input(arg3, begins[3]);
+            T4 shiftedArg4 = shift_input(arg4, begins[4]);
+            T5 shiftedArg5 = shift_input(arg5, begins[5]);
+            T6 shiftedArg6 = shift_input(arg6, begins[6]);
+
+            auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3, shiftedArg4, shiftedArg5,
+                  shiftedArg6);
+
+            constexpr size_t rank = Kokkos::Impl::ViewMapping
+                  < void /* deduce subview type from source view traits */
+                  , ViewTraits< D , P... >
+            , T0, T1, T2, T3, T4, T5, T6>::type::Rank;
+
+            Kokkos::Array<int64_t, rank> subviewBegins;
+
+            size_t counter = 0;
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(4, subviewBegins, shiftedArg4, arg4, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(5, subviewBegins, shiftedArg5, arg5, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(6, subviewBegins, shiftedArg6, arg6, begins, counter);
+
+            typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            , ViewTraits< D , P... > , T0, T1, T2, T3, T4, T5,
+            T6 >::type >::type offsetView(theSubview, subviewBegins);
+
+            return offsetView;
+         }
+
+         template< class D, class ... P , class T0, class T1, class T2, class T3, class T4,
+         class T5, class T6, class T7>
+         KOKKOS_INLINE_FUNCTION
+         typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+         < void /* deduce subview type from source view traits */
+         , ViewTraits< D , P... >
+         , T0, T1, T2, T3, T4, T5, T6, T7
+         >::type >::type
+         subview_offset(const OffsetView< D, P... > & src, T0 arg0, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5,
+               T6 arg6, T7 arg7) {
+
+            auto theView = src.view();
+            auto begins = src.begins();
+
+            T0 shiftedArg0 = shift_input(arg0, begins[0]);
+            T1 shiftedArg1 = shift_input(arg1, begins[1]);
+            T2 shiftedArg2 = shift_input(arg2, begins[2]);
+            T3 shiftedArg3 = shift_input(arg3, begins[3]);
+            T4 shiftedArg4 = shift_input(arg4, begins[4]);
+            T5 shiftedArg5 = shift_input(arg5, begins[5]);
+            T6 shiftedArg6 = shift_input(arg6, begins[6]);
+            T7 shiftedArg7 = shift_input(arg7, begins[7]);
+
+            auto theSubview = Kokkos::subview( theView , shiftedArg0, shiftedArg1, shiftedArg2, shiftedArg3, shiftedArg4, shiftedArg5,
+                  shiftedArg6, shiftedArg7);
+
+            constexpr size_t rank = Kokkos::Impl::ViewMapping
+                  < void /* deduce subview type from source view traits */
+                  , ViewTraits< D , P... >
+            , T0, T1, T2, T3, T4, T5, T6, T7>::type::Rank;
+
+            Kokkos::Array<int64_t, rank> subviewBegins;
+
+            size_t counter = 0;
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(0, subviewBegins, shiftedArg0, arg0, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(1, subviewBegins, shiftedArg1, arg1, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(2, subviewBegins, shiftedArg2, arg2, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(3, subviewBegins, shiftedArg3, arg3, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(4, subviewBegins, shiftedArg4, arg4, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(5, subviewBegins, shiftedArg5, arg5, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(6, subviewBegins, shiftedArg6, arg6, begins, counter);
+            Kokkos::Experimental::Impl::map_arg_to_new_begin(7, subviewBegins, shiftedArg7, arg7, begins, counter);
+
+            typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping<
+            void /* deduce subview type from source view traits */
+            , ViewTraits< D , P... > , T0, T1, T2, T3, T4, T5,
+            T6, T7 >::type >::type offsetView(theSubview, subviewBegins);
+
+            return offsetView;
+         }
+      }
+
+      template< class D, class ... P , class ... Args >
+      KOKKOS_INLINE_FUNCTION
+      typename Kokkos::Experimental::Impl::GetOffsetViewTypeFromViewType<typename Kokkos::Impl::ViewMapping
+      < void /* deduce subview type from source view traits */
+      , ViewTraits< D , P... >
+      , Args ...
+      >::type >::type
+      subview( const OffsetView< D, P... > & src , Args ... args )
+      {
+         static_assert( OffsetView< D , P... >::Rank == sizeof...(Args) ,
+               "subview requires one argument for each source OffsetView rank" );
+
+
+         return Kokkos::Experimental::Impl::subview_offset(src, args...);
+
+
+      }
+
+   }
+}
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+   namespace Experimental {
+      template< class LT , class ... LP , class RT , class ... RP >
+      KOKKOS_INLINE_FUNCTION
+      bool operator == ( const OffsetView<LT,LP...> & lhs ,
+            const OffsetView<RT,RP...> & rhs )
+            {
+         // Same data, layout, dimensions
+         typedef ViewTraits<LT,LP...>  lhs_traits ;
+         typedef ViewTraits<RT,RP...>  rhs_traits ;
+
+         return
+               std::is_same< typename lhs_traits::const_value_type ,
+               typename rhs_traits::const_value_type >::value &&
+               std::is_same< typename lhs_traits::array_layout ,
+               typename rhs_traits::array_layout >::value &&
+               std::is_same< typename lhs_traits::memory_space ,
+               typename rhs_traits::memory_space >::value &&
+               unsigned(lhs_traits::rank) == unsigned(rhs_traits::rank) &&
+               lhs.data()        == rhs.data() &&
+               lhs.span()        == rhs.span() &&
+               lhs.extent(0) == rhs.extent(0) &&
+               lhs.extent(1) == rhs.extent(1) &&
+               lhs.extent(2) == rhs.extent(2) &&
+               lhs.extent(3) == rhs.extent(3) &&
+               lhs.extent(4) == rhs.extent(4) &&
+               lhs.extent(5) == rhs.extent(5) &&
+               lhs.extent(6) == rhs.extent(6) &&
+               lhs.extent(7) == rhs.extent(7) &&
+               lhs.begin(0) == rhs.begin(0) &&
+               lhs.begin(1) == rhs.begin(1) &&
+               lhs.begin(2) == rhs.begin(2) &&
+               lhs.begin(3) == rhs.begin(3) &&
+               lhs.begin(4) == rhs.begin(4) &&
+               lhs.begin(5) == rhs.begin(5) &&
+               lhs.begin(6) == rhs.begin(6) &&
+               lhs.begin(7) == rhs.begin(7)
+               ;
+            }
+
+      template< class LT , class ... LP , class RT , class ... RP >
+      KOKKOS_INLINE_FUNCTION
+      bool operator != ( const OffsetView<LT,LP...> & lhs ,
+            const OffsetView<RT,RP...> & rhs )
+            {
+         return ! ( operator==(lhs,rhs) );
+            }
+
+      template< class LT , class ... LP , class RT , class ... RP >
+      KOKKOS_INLINE_FUNCTION
+      bool operator == ( const View<LT,LP...> & lhs ,
+            const OffsetView<RT,RP...> & rhs )
+            {
+         // Same data, layout, dimensions
+         typedef ViewTraits<LT,LP...>  lhs_traits ;
+         typedef ViewTraits<RT,RP...>  rhs_traits ;
+
+         return
+               std::is_same< typename lhs_traits::const_value_type ,
+               typename rhs_traits::const_value_type >::value &&
+               std::is_same< typename lhs_traits::array_layout ,
+               typename rhs_traits::array_layout >::value &&
+               std::is_same< typename lhs_traits::memory_space ,
+               typename rhs_traits::memory_space >::value &&
+               unsigned(lhs_traits::rank) == unsigned(rhs_traits::rank) &&
+               lhs.data()        == rhs.data() &&
+               lhs.span()        == rhs.span() &&
+               lhs.extent(0) == rhs.extent(0) &&
+               lhs.extent(1) == rhs.extent(1) &&
+               lhs.extent(2) == rhs.extent(2) &&
+               lhs.extent(3) == rhs.extent(3) &&
+               lhs.extent(4) == rhs.extent(4) &&
+               lhs.extent(5) == rhs.extent(5) &&
+               lhs.extent(6) == rhs.extent(6) &&
+               lhs.extent(7) == rhs.extent(7)
+               ;
+            }
+
+      template< class LT , class ... LP , class RT , class ... RP >
+      KOKKOS_INLINE_FUNCTION
+      bool operator == ( const OffsetView<LT,LP...> & lhs ,
+            const View<RT,RP...> & rhs )
+            { return rhs == lhs;}
+
+   }
+} /* namespace Kokkos */
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+
+namespace Kokkos {
+   namespace Experimental {
+      template< class DT , class ... DP >
+      inline
+      void deep_copy
+      ( const OffsetView<DT,DP...> & dst
+            , typename ViewTraits<DT,DP...>::const_value_type & value
+            , typename std::enable_if<
+            std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value
+            >::type * = 0 )
+      {
+         static_assert(
+               std::is_same< typename ViewTraits<DT,DP...>::non_const_value_type ,
+               typename ViewTraits<DT,DP...>::value_type >::value
+               , "deep_copy requires non-const type" );
+
+         auto dstView = dst.view();
+         Kokkos::deep_copy( dstView , value );
+
+      }
+
+      template< class DT , class ... DP , class ST , class ... SP >
+      inline
+      void deep_copy
+      ( const OffsetView<DT,DP...> & dst
+            , const OffsetView<ST,SP...> & value
+            , typename std::enable_if<
+            std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value
+            >::type * = 0 )
+      {
+         static_assert(
+               std::is_same< typename ViewTraits<DT,DP...>::value_type ,
+               typename ViewTraits<ST,SP...>::non_const_value_type >::value
+               , "deep_copy requires matching non-const destination type" );
+
+         auto dstView = dst.view();
+         Kokkos::deep_copy( dstView , value.view() );
+
+      }
+      template< class DT , class ... DP , class ST , class ... SP >
+      inline
+      void deep_copy
+      ( const OffsetView<DT,DP...> & dst
+            , const View<ST,SP...> & value
+            , typename std::enable_if<
+            std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value
+            >::type * = 0 )
+      {
+         static_assert(
+               std::is_same< typename ViewTraits<DT,DP...>::value_type ,
+               typename ViewTraits<ST,SP...>::non_const_value_type >::value
+               , "deep_copy requires matching non-const destination type" );
+
+         auto dstView = dst.view();
+         Kokkos::deep_copy( dstView , value);
+
+      }
+
+      template< class DT , class ... DP , class ST , class ... SP >
+      inline
+      void deep_copy
+      ( const View<DT,DP...> & dst
+            , const OffsetView<ST,SP...> & value
+            , typename std::enable_if<
+            std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value
+            >::type * = 0 )
+      {
+         static_assert(
+               std::is_same< typename ViewTraits<DT,DP...>::value_type ,
+               typename ViewTraits<ST,SP...>::non_const_value_type >::value
+               , "deep_copy requires matching non-const destination type" );
+
+         Kokkos::deep_copy( dst , value.view() );
+
+      }
+      namespace Impl {
+
+         // Deduce Mirror Types
+         template<class Space, class T, class ... P>
+         struct MirrorOffsetViewType {
+            // The incoming view_type
+            typedef typename Kokkos::Experimental::OffsetView<T,P...> src_view_type;
+            // The memory space for the mirror view
+            typedef typename Space::memory_space memory_space;
+            // Check whether it is the same memory space
+            enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value };
+            // The array_layout
+            typedef typename src_view_type::array_layout array_layout;
+            // The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
+            typedef typename src_view_type::non_const_data_type data_type;
+            // The destination view type if it is not the same memory space
+            typedef Kokkos::Experimental::OffsetView<data_type,array_layout,Space> dest_view_type;
+            // If it is the same memory_space return the existsing view_type
+            // This will also keep the unmanaged trait if necessary
+            typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type;
+         };
+
+         template<class Space, class T, class ... P>
+         struct MirrorOffsetType {
+            // The incoming view_type
+            typedef typename Kokkos::Experimental::OffsetView<T,P...> src_view_type;
+            // The memory space for the mirror view
+            typedef typename Space::memory_space memory_space;
+            // Check whether it is the same memory space
+            enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value };
+            // The array_layout
+            typedef typename src_view_type::array_layout array_layout;
+            // The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
+            typedef typename src_view_type::non_const_data_type data_type;
+            // The destination view type if it is not the same memory space
+            typedef Kokkos::Experimental::OffsetView<data_type,array_layout,Space> view_type;
+         };
+
+      }
+
+      template< class T , class ... P >
+      inline
+      typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror
+      create_mirror( const Kokkos::Experimental::OffsetView<T,P...> & src
+            , typename std::enable_if<
+            ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
+            , Kokkos::LayoutStride >::value
+            >::type * = 0
+      )
+      {
+         typedef OffsetView<T,P...>             src_type ;
+         typedef typename src_type::HostMirror  dst_type ;
+
+         return dst_type( Kokkos::Impl::ViewCtorProp< std::string >(std::string( src.label() ).append("_mirror") ),
+               typename Kokkos::ViewTraits<T,P...>::array_layout
+               ( src.extent(0), src.extent(1), src.extent(2), src.extent(3), src.extent(4),
+                     src.extent(5), src.extent(6), src.extent(7) ),
+                     { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
+                           src.begin(5), src.begin(6), src.begin(7) });
+      }
+
+      template< class T , class ... P >
+      inline
+      typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror
+      create_mirror( const Kokkos::Experimental::OffsetView<T,P...> & src
+            , typename std::enable_if<
+            std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
+            , Kokkos::LayoutStride >::value
+            >::type * = 0
+      )
+      {
+         typedef OffsetView<T,P...>             src_type ;
+         typedef typename src_type::HostMirror  dst_type ;
+
+         Kokkos::LayoutStride layout ;
+
+         layout.dimension[0] = src.extent(0);
+         layout.dimension[1] = src.extent(1);
+         layout.dimension[2] = src.extent(2);
+         layout.dimension[3] = src.extent(3);
+         layout.dimension[4] = src.extent(4);
+         layout.dimension[5] = src.extent(5);
+         layout.dimension[6] = src.extent(6);
+         layout.dimension[7] = src.extent(7);
+
+         layout.stride[0] = src.stride_0();
+         layout.stride[1] = src.stride_1();
+         layout.stride[2] = src.stride_2();
+         layout.stride[3] = src.stride_3();
+         layout.stride[4] = src.stride_4();
+         layout.stride[5] = src.stride_5();
+         layout.stride[6] = src.stride_6();
+         layout.stride[7] = src.stride_7();
+
+         return dst_type( std::string( src.label() ).append("_mirror") , layout,
+               { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
+                     src.begin(5), src.begin(6), src.begin(7) } );
+      }
+
+
+      // Create a mirror in a new space (specialization for different space)
+      template<class Space, class T, class ... P>
+      typename Kokkos::Experimental::Impl::MirrorOffsetType<Space,T,P ...>::view_type
+      create_mirror(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src) {
+         return typename Kokkos::Experimental::Impl::MirrorOffsetType<Space,T,P ...>::view_type(src.label(),src.layout(),
+               { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
+                     src.begin(5), src.begin(6), src.begin(7) } );
+      }
+
+
+      template< class T , class ... P >
+      inline
+      typename Kokkos::Experimental::OffsetView< T, P... >::HostMirror
+      create_mirror_view( const typename Kokkos::Experimental::OffsetView< T,P... > & src
+            , typename std::enable_if<(
+                  std::is_same< typename Kokkos::Experimental::OffsetView<T,P...>::memory_space
+                  , typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror::memory_space
+                  >::value
+                  &&
+                  std::is_same< typename Kokkos::Experimental::OffsetView<T,P...>::data_type
+                  , typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror::data_type
+                  >::value
+            )>::type * = 0
+      )
+      {
+         return src ;
+      }
+
+      template< class T , class ... P >
+      inline
+      typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror
+      create_mirror_view( const Kokkos::Experimental::OffsetView<T,P...> & src
+            , typename std::enable_if< ! (
+                  std::is_same< typename Kokkos::Experimental::OffsetView<T,P...>::memory_space
+                  , typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror::memory_space
+                  >::value
+                  &&
+                  std::is_same< typename Kokkos::Experimental::OffsetView<T,P...>::data_type
+                  , typename Kokkos::Experimental::OffsetView<T,P...>::HostMirror::data_type
+                  >::value
+            )>::type * = 0
+      )
+      {
+	return Kokkos::Experimental::create_mirror( src );
+      }
+
+      // Create a mirror view in a new space (specialization for same space)
+      template<class Space, class T, class ... P>
+      typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space,T,P ...>::view_type
+      create_mirror_view(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src
+            , typename std::enable_if<Impl::MirrorOffsetViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+         return src;
+      }
+
+      // Create a mirror view in a new space (specialization for different space)
+      template<class Space, class T, class ... P>
+      typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space,T,P ...>::view_type
+      create_mirror_view(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src
+            , typename std::enable_if<!Impl::MirrorOffsetViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+         return typename Kokkos::Experimental::Impl::MirrorOffsetViewType<Space,T,P ...>::view_type(src.label(),src.layout(),
+               { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
+                     src.begin(5), src.begin(6), src.begin(7) } );
+      }
+      //
+      //  // Create a mirror view and deep_copy in a new space (specialization for same space)
+      //  template<class Space, class T, class ... P>
+      //  typename Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type
+      //  create_mirror_view_and_copy(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src
+      //                              , std::string const& name = ""
+      //                                  , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+      //    (void)name;
+      //    return src;
+      //  }
+      //
+      //  // Create a mirror view and deep_copy in a new space (specialization for different space)
+      //  template<class Space, class T, class ... P>
+      //  typename Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type
+      //  create_mirror_view_and_copy(const Space& , const Kokkos::Experimental::OffsetView<T,P...> & src
+      //                              , std::string const& name = ""
+      //                                  , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+      //    using Mirror = typename Kokkos::Experimental::Impl::MirrorViewType<Space,T,P ...>::view_type;
+      //    std::string label = name.empty() ? src.label() : name;
+      //    auto mirror = Mirror(ViewAllocateWithoutInitializing(label), src.layout(),
+      //                         { src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
+      //                             src.begin(5), src.begin(6), src.begin(7) });
+      //    deep_copy(mirror, src);
+      //    return mirror;
+      //  }
+
+   }
+} /* namespace Kokkos */
+
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+
+#endif /* KOKKOS_OFFSETVIEW_HPP_ */
diff --git a/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
index c522d85c5..f63ce4b88 100644
--- a/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
+++ b/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
@@ -47,7 +47,9 @@
 #include <string>
 #include <vector>
 
-#include <Kokkos_Core.hpp>
+#include <Kokkos_View.hpp>
+#include <Kokkos_Parallel.hpp>
+#include <Kokkos_Parallel_Reduce.hpp>
 
 namespace Kokkos {
 
diff --git a/packages/kokkos/containers/src/Kokkos_Vector.hpp b/packages/kokkos/containers/src/Kokkos_Vector.hpp
index 8204ba776..76c515941 100644
--- a/packages/kokkos/containers/src/Kokkos_Vector.hpp
+++ b/packages/kokkos/containers/src/Kokkos_Vector.hpp
@@ -86,14 +86,13 @@ public:
   vector():DV() {
     _size = 0;
     _extra_storage = 1.1;
-    DV::modified_host() = 1;
   }
 
 
   vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Arg1Type>("Vector",size_t(n*(1.1))) {
     _size = n;
     _extra_storage = 1.1;
-    DV::modified_host() = 1;
+    DV::modified_flags(0) = 1;
 
     assign(n,val);
   }
@@ -119,16 +118,16 @@ public:
 
           /* Assign value either on host or on device */
 
-    if( DV::modified_host() >= DV::modified_device() ) {
+    if( DV::template need_sync<typename DV::t_dev::device_type>() ) {
       set_functor_host f(DV::h_view,val);
       parallel_for(n,f);
       DV::t_host::execution_space::fence();
-      DV::modified_host()++;
+      DV::template modify<typename DV::t_host::device_type>();
     } else {
       set_functor f(DV::d_view,val);
       parallel_for(n,f);
       DV::t_dev::execution_space::fence();
-      DV::modified_device()++;
+      DV::template modify<typename DV::t_dev::device_type>();
     }
   }
 
@@ -137,7 +136,8 @@ public:
   }
 
   void push_back(Scalar val) {
-    DV::modified_host()++;
+    DV::template sync<typename DV::t_host::device_type>();
+    DV::template modify<typename DV::t_host::device_type>();
     if(_size == span()) {
       size_t new_size = _size*_extra_storage;
       if(new_size == _size) new_size++;
@@ -247,10 +247,10 @@ public:
   }
 
   void on_host() {
-    DV::modified_host() = DV::modified_device() + 1;
+    DV::template modify<typename DV::t_host::device_type>();
   }
   void on_device() {
-    DV::modified_device() = DV::modified_host() + 1;
+    DV::template modify<typename DV::t_dev::device_type>();
   }
 
   void set_overallocation(float extra) {
diff --git a/packages/kokkos/containers/unit_tests/CMakeLists.txt b/packages/kokkos/containers/unit_tests/CMakeLists.txt
index 3dbe79183..0f94afec8 100644
--- a/packages/kokkos/containers/unit_tests/CMakeLists.txt
+++ b/packages/kokkos/containers/unit_tests/CMakeLists.txt
@@ -23,6 +23,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
     threads/TestThreads_DynRankViewAPI_rank12345.cpp
     threads/TestThreads_DynRankViewAPI_rank67.cpp
     threads/TestThreads_ErrorReporter.cpp
+    threads/TestThreads_OffsetView.cpp
     threads/TestThreads_ScatterView.cpp
     threads/TestThreads_StaticCrsGraph.cpp
     threads/TestThreads_UnorderedMap.cpp
@@ -47,6 +48,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
     serial/TestSerial_DynRankViewAPI_rank12345.cpp
     serial/TestSerial_DynRankViewAPI_rank67.cpp
     serial/TestSerial_ErrorReporter.cpp
+    serial/TestSerial_OffsetView.cpp
     serial/TestSerial_ScatterView.cpp
     serial/TestSerial_StaticCrsGraph.cpp
     serial/TestSerial_UnorderedMap.cpp
@@ -71,6 +73,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
     openmp/TestOpenMP_DynRankViewAPI_rank12345.cpp
     openmp/TestOpenMP_DynRankViewAPI_rank67.cpp
     openmp/TestOpenMP_ErrorReporter.cpp
+    openmp/TestOpenMP_OffsetView.cpp
     openmp/TestOpenMP_ScatterView.cpp
     openmp/TestOpenMP_StaticCrsGraph.cpp
     openmp/TestOpenMP_UnorderedMap.cpp
@@ -95,6 +98,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
     cuda/TestCuda_DynRankViewAPI_rank12345.cpp
     cuda/TestCuda_DynRankViewAPI_rank67.cpp
     cuda/TestCuda_ErrorReporter.cpp
+    cuda/TestCuda_OffsetView.cpp
     cuda/TestCuda_ScatterView.cpp
     cuda/TestCuda_StaticCrsGraph.cpp
     cuda/TestCuda_UnorderedMap.cpp
diff --git a/packages/kokkos/containers/unit_tests/Makefile b/packages/kokkos/containers/unit_tests/Makefile
index 52d5d61aa..c0e5d2820 100644
--- a/packages/kokkos/containers/unit_tests/Makefile
+++ b/packages/kokkos/containers/unit_tests/Makefile
@@ -39,6 +39,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 	OBJ_CUDA += TestCuda_DynRankViewAPI_rank12345.o
 	OBJ_CUDA += TestCuda_DynRankViewAPI_rank67.o
 	OBJ_CUDA += TestCuda_ErrorReporter.o
+	OBJ_CUDA += TestCuda_OffsetView.o
 	OBJ_CUDA += TestCuda_ScatterView.o
 	OBJ_CUDA += TestCuda_StaticCrsGraph.o
 	OBJ_CUDA += TestCuda_UnorderedMap.o
@@ -57,6 +58,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
 	OBJ_ROCM += TestROCm_DynRankViewAPI_rank12345.o
 	OBJ_ROCM += TestROCm_DynRankViewAPI_rank67.o
 	OBJ_ROCM += TestROCm_ErrorReporter.o
+	OBJ_ROCM += TestROCm_OffsetView.o
 	OBJ_ROCM += TestROCm_ScatterView.o
 	OBJ_ROCM += TestROCm_StaticCrsGraph.o
 	OBJ_ROCM += TestROCm_UnorderedMap.o
@@ -75,6 +77,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
 	OBJ_THREADS += TestThreads_DynRankViewAPI_rank12345.o
 	OBJ_THREADS += TestThreads_DynRankViewAPI_rank67.o
 	OBJ_THREADS += TestThreads_ErrorReporter.o
+	OBJ_THREADS += TestThreads_OffsetView.o
 	OBJ_THREADS += TestThreads_ScatterView.o
 	OBJ_THREADS += TestThreads_StaticCrsGraph.o
 	OBJ_THREADS += TestThreads_UnorderedMap.o
@@ -93,6 +96,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
 	OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank12345.o
 	OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank67.o
 	OBJ_OPENMP += TestOpenMP_ErrorReporter.o
+	OBJ_OPENMP += TestOpenMP_OffsetView.o
 	OBJ_OPENMP += TestOpenMP_ScatterView.o
 	OBJ_OPENMP += TestOpenMP_StaticCrsGraph.o
 	OBJ_OPENMP += TestOpenMP_UnorderedMap.o
@@ -111,6 +115,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
 	OBJ_SERIAL += TestSerial_DynRankViewAPI_rank12345.o
 	OBJ_SERIAL += TestSerial_DynRankViewAPI_rank67.o
 	OBJ_SERIAL += TestSerial_ErrorReporter.o
+	OBJ_SERIAL += TestSerial_OffsetView.o
 	OBJ_SERIAL += TestSerial_ScatterView.o
 	OBJ_SERIAL += TestSerial_StaticCrsGraph.o
 	OBJ_SERIAL += TestSerial_UnorderedMap.o
diff --git a/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp
index 8c073710e..6684a5545 100644
--- a/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp
+++ b/packages/kokkos/containers/unit_tests/TestDynViewAPI.hpp
@@ -729,6 +729,7 @@ public:
   static void run_tests() {
     run_test_resize_realloc();
     run_test_mirror();
+    run_test_mirror_and_copy();
     run_test_scalar();
     run_test();
     run_test_const();
@@ -885,6 +886,69 @@ public:
     }
   }
 
+  static void run_test_mirror_and_copy()
+  {
+    // LayoutLeft
+    {
+      Kokkos::DynRankView< double, Kokkos::LayoutLeft, Kokkos::HostSpace > a_org( "A", 10 );
+      a_org(5) = 42.0;
+      Kokkos::DynRankView< double, Kokkos::LayoutLeft, Kokkos::HostSpace > a_h = a_org;
+      auto a_h2 = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), a_h );
+      auto a_d = Kokkos::create_mirror_view_and_copy( DeviceType(), a_h );
+      auto a_h3 = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), a_d );
+
+      int equal_ptr_h_h2 = a_h.data()  == a_h2.data() ? 1 : 0;
+      int equal_ptr_h_d  = a_h.data()  ==  a_d.data() ? 1 : 0;
+      int equal_ptr_h2_d = a_h2.data() ==  a_d.data() ? 1 : 0;
+      int equal_ptr_h3_d = a_h3.data() ==  a_d.data() ? 1 : 0;
+
+      int is_same_memspace = std::is_same< Kokkos::HostSpace, typename DeviceType::memory_space >::value ? 1 : 0;
+      ASSERT_EQ( equal_ptr_h_h2, 1 );
+      ASSERT_EQ( equal_ptr_h_d, is_same_memspace );
+      ASSERT_EQ( equal_ptr_h2_d, is_same_memspace );
+      ASSERT_EQ( equal_ptr_h3_d, is_same_memspace );
+
+      ASSERT_EQ( a_h.extent(0), a_h3.extent(0) );
+      ASSERT_EQ( a_h.extent(0), a_h2.extent(0) );
+      ASSERT_EQ( a_h.extent(0), a_d .extent(0) );
+      ASSERT_EQ( a_h.extent(0), a_h3.extent(0) );
+      ASSERT_EQ( a_h.rank(), a_org.rank() );
+      ASSERT_EQ( a_h.rank(), a_h2.rank() );
+      ASSERT_EQ( a_h.rank(), a_h3.rank() );
+      ASSERT_EQ( a_h.rank(), a_d.rank() );
+      ASSERT_EQ( a_org(5), a_h3(5) );
+    }
+    // LayoutRight
+    {
+      Kokkos::DynRankView< double, Kokkos::LayoutRight, Kokkos::HostSpace > a_org( "A", 10 );
+      a_org(5) = 42.0;
+      Kokkos::DynRankView< double, Kokkos::LayoutRight, Kokkos::HostSpace > a_h = a_org;
+      auto a_h2 = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), a_h );
+      auto a_d = Kokkos::create_mirror_view_and_copy( DeviceType(), a_h );
+      auto a_h3 = Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace(), a_d );
+
+      int equal_ptr_h_h2 = a_h.data()  == a_h2.data() ? 1 : 0;
+      int equal_ptr_h_d  = a_h.data()  ==  a_d.data() ? 1 : 0;
+      int equal_ptr_h2_d = a_h2.data() ==  a_d.data() ? 1 : 0;
+      int equal_ptr_h3_d = a_h3.data() ==  a_d.data() ? 1 : 0;
+
+      int is_same_memspace = std::is_same< Kokkos::HostSpace, typename DeviceType::memory_space >::value ? 1 : 0;
+      ASSERT_EQ( equal_ptr_h_h2, 1 );
+      ASSERT_EQ( equal_ptr_h_d, is_same_memspace );
+      ASSERT_EQ( equal_ptr_h2_d, is_same_memspace );
+      ASSERT_EQ( equal_ptr_h3_d, is_same_memspace );
+
+      ASSERT_EQ( a_h.extent(0), a_h3.extent(0) );
+      ASSERT_EQ( a_h.extent(0), a_h2.extent(0) );
+      ASSERT_EQ( a_h.extent(0), a_d .extent(0) );
+      ASSERT_EQ( a_h.rank(), a_org.rank() );
+      ASSERT_EQ( a_h.rank(), a_h2.rank() );
+      ASSERT_EQ( a_h.rank(), a_h3.rank() );
+      ASSERT_EQ( a_h.rank(), a_d.rank() );
+      ASSERT_EQ( a_org(5), a_h3(5) );
+    }
+  }
+
   static void run_test_scalar()
   {
     typedef typename dView0::HostMirror  hView0 ; //HostMirror of DynRankView is a DynRankView
diff --git a/packages/kokkos/containers/unit_tests/TestOffsetView.hpp b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp
new file mode 100644
index 000000000..6965199d4
--- /dev/null
+++ b/packages/kokkos/containers/unit_tests/TestOffsetView.hpp
@@ -0,0 +1,426 @@
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+
+/*
+ * FIXME the OffsetView class is really not very well tested.
+ */
+#ifndef CONTAINERS_UNIT_TESTS_TESTOFFSETVIEW_HPP_
+#define CONTAINERS_UNIT_TESTS_TESTOFFSETVIEW_HPP_
+
+
+
+#include <gtest/gtest.h>
+#include <iostream>
+#include <cstdlib>
+#include <cstdio>
+#include <impl/Kokkos_Timer.hpp>
+#include <Kokkos_OffsetView.hpp>
+#include <KokkosExp_MDRangePolicy.hpp>
+
+using std::endl;
+using std::cout;
+
+namespace Test{
+
+   template <typename Scalar, typename Device>
+   void test_offsetview_construction(unsigned int size)
+   {
+
+      typedef Kokkos::Experimental::OffsetView<Scalar**, Device> offset_view_type;
+      typedef Kokkos::View<Scalar**, Device> view_type;
+
+      Kokkos::Experimental::index_list_type range0 = {-1, 3};
+      Kokkos::Experimental::index_list_type range1 = {-2, 2};
+
+      offset_view_type ov("firstOV", range0, range1);
+
+      ASSERT_EQ("firstOV", ov.label());
+      ASSERT_EQ(2, ov.Rank);
+
+      ASSERT_EQ(ov.begin(0), -1);
+      ASSERT_EQ(ov.end(0), 4);
+
+      ASSERT_EQ(ov.begin(1), -2);
+      ASSERT_EQ(ov.end(1), 3);
+
+      ASSERT_EQ(ov.extent(0), 5);
+      ASSERT_EQ(ov.extent(1), 5);
+
+      const int ovmin0 = ov.begin(0);
+      const int ovend0 = ov.end(0);
+      const int ovmin1 = ov.begin(1);
+      const int ovend1 = ov.end(1);
+
+#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
+      {
+         Kokkos::Experimental::OffsetView<Scalar*, Device> offsetV1("OneDOffsetView", range0);
+
+         Kokkos::RangePolicy<Device, int> rangePolicy1(offsetV1.begin(0), offsetV1.end(0));
+         Kokkos::parallel_for(rangePolicy1, KOKKOS_LAMBDA (const int i){
+            offsetV1(i) = 1;
+         }
+         );
+	 Kokkos::fence();
+
+         int OVResult = 0;
+         Kokkos::parallel_reduce(rangePolicy1, KOKKOS_LAMBDA(const int i, int & updateMe){
+            updateMe += offsetV1(i);
+         }, OVResult);
+	 
+	 Kokkos::fence();
+         ASSERT_EQ(OVResult, offsetV1.end(0) - offsetV1.begin(0)) << "found wrong number of elements in OffsetView that was summed.";
+
+      }
+      {  //test deep copy of scalar const value into mirro
+         const int constVal = 6;
+         typename offset_view_type::HostMirror hostOffsetView =
+               Kokkos::Experimental::create_mirror_view(ov);
+
+         Kokkos::Experimental::deep_copy(hostOffsetView, constVal);
+
+         for(int i = hostOffsetView.begin(0); i < hostOffsetView.end(0); ++i) {
+            for(int j = hostOffsetView.begin(1); j < hostOffsetView.end(1); ++j) {
+               ASSERT_EQ(hostOffsetView(i,j),  constVal) << "Bad data found in OffsetView";
+            }
+         }
+      }
+
+      typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type;
+      typedef typename range_type::point_type point_type;
+
+      range_type rangePolicy2D(point_type{ {ovmin0, ovmin1 } },
+            point_type{ { ovend0, ovend1 } });
+
+      const int constValue = 9;
+      Kokkos::parallel_for(rangePolicy2D, KOKKOS_LAMBDA (const int i, const int j) {
+         ov(i,j) =  constValue;
+      }
+      );
+      
+      //test offsetview to offsetviewmirror deep copy
+      typename offset_view_type::HostMirror hostOffsetView =
+            Kokkos::Experimental::create_mirror_view(ov);
+
+      Kokkos::Experimental::deep_copy(hostOffsetView, ov);
+
+      for(int i = hostOffsetView.begin(0); i < hostOffsetView.end(0); ++i) {
+         for(int j = hostOffsetView.begin(1); j < hostOffsetView.end(1); ++j) {
+            ASSERT_EQ(hostOffsetView(i,j),  constValue) << "Bad data found in OffsetView";
+         }
+      }
+      
+     int OVResult = 0;
+      Kokkos::parallel_reduce(rangePolicy2D, KOKKOS_LAMBDA(const int i, const int j, int & updateMe){
+         updateMe += ov(i, j);
+      }, OVResult);
+
+      int answer = 0;
+      for(int i = ov.begin(0); i < ov.end(0); ++i) {
+         for(int j = ov.begin(1); j < ov.end(1); ++j) {
+            answer += constValue;
+         }
+      }
+      
+      ASSERT_EQ(OVResult, answer) << "Bad data found in OffsetView";
+#endif
+
+      {
+         offset_view_type ovCopy(ov);
+         ASSERT_EQ(ovCopy==ov, true) <<
+               "Copy constructor or equivalence operator broken";
+      }
+      
+      {
+         offset_view_type ovAssigned = ov;
+         ASSERT_EQ(ovAssigned==ov, true) <<
+               "Assignment operator or equivalence operator broken";
+      }
+      
+      {  //construct OffsetView from a View plus begins array
+         const int extent0 = 100;
+         const int extent1 = 200;
+         const int extent2 = 300;
+         Kokkos::View<Scalar***, Device> view3D("view3D", extent0, extent1, extent2);
+
+         Kokkos::deep_copy(view3D, 1);
+
+         Kokkos::Array<int64_t,3> begins = {{-10, -20, -30}};
+         Kokkos::Experimental::OffsetView<Scalar***, Device> offsetView3D(view3D, begins);
+
+         typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<3>, Kokkos::IndexType<int64_t> > range3_type;
+         typedef typename range3_type::point_type point3_type;
+
+         range3_type rangePolicy3DZero(point3_type{ {0, 0, 0 } },
+               point3_type{ { extent0, extent1, extent2 } });
+
+#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
+        int view3DSum = 0;
+         Kokkos::parallel_reduce(rangePolicy3DZero, KOKKOS_LAMBDA(const int i, const int j, int k, int & updateMe){
+            updateMe += view3D(i, j, k);
+         }, view3DSum);
+
+         range3_type rangePolicy3D(point3_type{ {begins[0], begins[1], begins[2] } },
+               point3_type{ { begins[0] + extent0, begins[1] + extent1, begins[2] + extent2 } });
+         int offsetView3DSum = 0;
+
+         Kokkos::parallel_reduce(rangePolicy3D, KOKKOS_LAMBDA(const int i, const int j, int k, int & updateMe){
+            updateMe += offsetView3D(i, j, k);
+         }, offsetView3DSum);
+
+         ASSERT_EQ(view3DSum, offsetView3DSum) << "construction of OffsetView from View and begins array broken.";
+#endif
+      }
+      view_type viewFromOV = ov.view();
+
+      ASSERT_EQ(viewFromOV == ov, true) <<
+            "OffsetView::view() or equivalence operator View == OffsetView broken";
+
+      {
+         offset_view_type ovFromV(viewFromOV, {-1, -2});
+
+         ASSERT_EQ(ovFromV == viewFromOV , true) <<
+               "Construction of OffsetView from View or equivalence operator OffsetView == View broken";
+      }
+      {
+         offset_view_type ovFromV = viewFromOV;
+         ASSERT_EQ(ovFromV == viewFromOV , true) <<
+               "Construction of OffsetView from View by assignment (implicit conversion) or equivalence operator OffsetView == View broken";
+      }
+
+      {// test offsetview to view deep copy
+         view_type aView("aView", ov.extent(0), ov.extent(1));
+         Kokkos::Experimental::deep_copy(aView, ov);
+
+#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
+         int sum = 0;
+         Kokkos::parallel_reduce(rangePolicy2D, KOKKOS_LAMBDA(const int i, const int j, int & updateMe){
+            updateMe += ov(i, j) - aView(i- ov.begin(0), j-ov.begin(1));
+         }, sum);
+
+         ASSERT_EQ(sum, 0) << "deep_copy(view, offsetView) broken.";
+#endif
+      }
+
+      {// test view to  offsetview deep copy
+         view_type aView("aView", ov.extent(0), ov.extent(1));
+
+         Kokkos::deep_copy(aView, 99);
+         Kokkos::Experimental::deep_copy(ov, aView);
+	 
+
+#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
+         int sum = 0;
+         Kokkos::parallel_reduce(rangePolicy2D, KOKKOS_LAMBDA(const int i, const int j, int & updateMe){
+            updateMe += ov(i, j) - aView(i- ov.begin(0), j-ov.begin(1));
+         }, sum);
+
+         ASSERT_EQ(sum, 0) << "deep_copy(offsetView, view) broken.";
+#endif
+      }
+   }
+   template <typename Scalar, typename Device>
+   void test_offsetview_subview(unsigned int size)
+   {
+      {//test subview 1
+          Kokkos::Experimental::OffsetView<Scalar*, Device> sliceMe("offsetToSlice", {-10, 20});
+          {
+             auto offsetSubviewa = Kokkos::Experimental::subview(sliceMe, 0);
+             ASSERT_EQ(offsetSubviewa.Rank, 0) << "subview of offset is broken.";
+          }
+
+       }
+      {//test subview 2
+         Kokkos::Experimental::OffsetView<Scalar**, Device> sliceMe("offsetToSlice", {-10,20}, {-20,30});
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(),-2);
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL());
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+      }
+
+
+      {//test subview rank 3
+
+         Kokkos::Experimental::OffsetView<Scalar***, Device> sliceMe("offsetToSlice", {-10,20}, {-20,30}, {-30,40});
+
+         //slice 1
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe,Kokkos::ALL(),Kokkos::ALL(), 0);
+            ASSERT_EQ(offsetSubview.Rank, 2) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe,Kokkos::ALL(), 0,Kokkos::ALL());
+            ASSERT_EQ(offsetSubview.Rank, 2) << "subview of offset is broken.";
+         }
+
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe,0, Kokkos::ALL(),Kokkos::ALL());
+            ASSERT_EQ(offsetSubview.Rank, 2) << "subview of offset is broken.";
+
+         }
+         {
+	   auto offsetSubview = Kokkos::Experimental::subview(sliceMe,0, Kokkos::ALL(), Kokkos::make_pair(-30, -21));
+            ASSERT_EQ(offsetSubview.Rank, 2) << "subview of offset is broken.";
+
+            ASSERT_EQ(offsetSubview.begin(0) , -20);
+            ASSERT_EQ(offsetSubview.end(0) , 31);
+            ASSERT_EQ(offsetSubview.begin(1) , 0);
+            ASSERT_EQ(offsetSubview.end(1) , 9);
+
+#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
+            typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type;
+            typedef typename range_type::point_type point_type;
+
+            const int b0 = offsetSubview.begin(0);
+            const int b1 = offsetSubview.begin(1);
+
+            const int e0 = offsetSubview.end(0);
+            const int e1 = offsetSubview.end(1);
+
+            range_type rangeP2D(point_type{ {b0, b1 } }, point_type{ { e0, e1} });
+
+            Kokkos::parallel_for(rangeP2D, KOKKOS_LAMBDA(const int i, const int j) {
+               offsetSubview(i,j) =  6;
+            }
+            );
+
+            int sum = 0;
+             Kokkos::parallel_reduce(rangeP2D, KOKKOS_LAMBDA(const int i, const int j, int & updateMe){
+                updateMe += offsetSubview(i, j);
+             }, sum);
+
+            ASSERT_EQ(sum, 6*(e0-b0)*(e1-b1));
+#endif
+         }
+
+         // slice 2
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, 0);
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, 0, Kokkos::ALL());
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL(), 0);
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+      }
+
+      {//test subview rank 4
+
+         Kokkos::Experimental::OffsetView<Scalar****, Device> sliceMe("offsetToSlice", {-10,20}, {-20,30}, {-30,40}, {-40, 50});
+
+         //slice 1
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(),Kokkos::ALL(), Kokkos::ALL(), 0);
+            ASSERT_EQ(offsetSubview.Rank, 3) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), Kokkos::ALL(), 0, Kokkos::ALL());
+            ASSERT_EQ(offsetSubview.Rank, 3) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe ,Kokkos::ALL(), 0, Kokkos::ALL(),Kokkos::ALL());
+            ASSERT_EQ(offsetSubview.Rank, 3) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe , 0, Kokkos::ALL(), Kokkos::ALL(),  Kokkos::ALL() );
+            ASSERT_EQ(offsetSubview.Rank, 3) << "subview of offset is broken.";
+         }
+
+         // slice 2
+         auto offsetSubview2a = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), Kokkos::ALL(), 0, 0);
+         ASSERT_EQ(offsetSubview2a.Rank, 2) << "subview of offset is broken.";
+         {
+            auto offsetSubview2b = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, Kokkos::ALL(), 0);
+            ASSERT_EQ(offsetSubview2b.Rank, 2) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview2b = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, 0, Kokkos::ALL());
+            ASSERT_EQ(offsetSubview2b.Rank, 2) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview2b = Kokkos::Experimental::subview(sliceMe,  0, Kokkos::ALL(), 0, Kokkos::ALL());
+            ASSERT_EQ(offsetSubview2b.Rank, 2) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview2b = Kokkos::Experimental::subview(sliceMe,  0, 0, Kokkos::ALL(), Kokkos::ALL());
+            ASSERT_EQ(offsetSubview2b.Rank, 2) << "subview of offset is broken.";
+         }
+         // slice 3
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, Kokkos::ALL(), 0, 0, 0);
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe, 0, Kokkos::ALL(), 0, 0);
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe,  0, 0, Kokkos::ALL(), 0);
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+         {
+            auto offsetSubview = Kokkos::Experimental::subview(sliceMe,  0, 0, 0, Kokkos::ALL());
+            ASSERT_EQ(offsetSubview.Rank, 1) << "subview of offset is broken.";
+         }
+
+      }
+
+   }
+
+   TEST_F( TEST_CATEGORY, offsetview_construction) {
+      test_offsetview_construction<int,TEST_EXECSPACE>(10);
+   }
+   TEST_F( TEST_CATEGORY, offsetview_subview) {
+      test_offsetview_subview<int,TEST_EXECSPACE>(10);
+   }
+
+} // namespace Test
+
+#endif /* CONTAINERS_UNIT_TESTS_TESTOFFSETVIEW_HPP_ */
diff --git a/packages/kokkos/containers/unit_tests/TestScatterView.hpp b/packages/kokkos/containers/unit_tests/TestScatterView.hpp
index 106d2cf98..d402a91b9 100644
--- a/packages/kokkos/containers/unit_tests/TestScatterView.hpp
+++ b/packages/kokkos/containers/unit_tests/TestScatterView.hpp
@@ -80,7 +80,9 @@ void test_scatter_view_config(int n)
     Kokkos::Experimental::contribute(original_view, scatter_view);
   }
 #if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+  Kokkos::fence();
   auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), original_view);
+  Kokkos::fence();
   for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) {
     auto val0 = host_view(i, 0);
     auto val1 = host_view(i, 1);
@@ -111,9 +113,6 @@ struct TestDuplicatedScatterView {
     test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
       Kokkos::Experimental::ScatterDuplicated,
       Kokkos::Experimental::ScatterNonAtomic>(n);
-    test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
-      Kokkos::Experimental::ScatterDuplicated,
-      Kokkos::Experimental::ScatterAtomic>(n);
   }
 };
 
@@ -127,6 +126,16 @@ struct TestDuplicatedScatterView<Kokkos::Cuda> {
 };
 #endif
 
+#ifdef KOKKOS_ENABLE_ROCM
+// disable duplicated instantiation with ROCm until
+// UniqueToken can support it
+template <>
+struct TestDuplicatedScatterView<Kokkos::Experimental::ROCm> {
+  TestDuplicatedScatterView(int) {
+  }
+};
+#endif
+
 template <typename ExecSpace>
 void test_scatter_view(int n)
 {
@@ -142,16 +151,28 @@ void test_scatter_view(int n)
       Kokkos::Experimental::ScatterNonDuplicated,
       Kokkos::Experimental::ScatterNonAtomic>(n);
   }
+#ifdef KOKKOS_ENABLE_SERIAL
+  if (!std::is_same<ExecSpace, Kokkos::Serial>::value) {
+#endif
   test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
     Kokkos::Experimental::ScatterNonDuplicated,
     Kokkos::Experimental::ScatterAtomic>(n);
+#ifdef KOKKOS_ENABLE_SERIAL
+  }
+#endif
 
   TestDuplicatedScatterView<ExecSpace> duptest(n);
 }
 
 TEST_F( TEST_CATEGORY, scatterview) {
+#ifndef KOKKOS_ENABLE_ROCM
   test_scatter_view<TEST_EXECSPACE>(10);
+#ifdef KOKKOS_ENABLE_DEBUG
+  test_scatter_view<TEST_EXECSPACE>(100000);
+#else
   test_scatter_view<TEST_EXECSPACE>(10000000);
+#endif
+#endif
 }
 
 } // namespace Test
diff --git a/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp b/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
index 06d7ed824..7ba307079 100644
--- a/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
+++ b/packages/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
@@ -46,6 +46,7 @@
 #include <vector>
 
 #include <Kokkos_StaticCrsGraph.hpp>
+#include <Kokkos_Core.hpp>
 
 /*--------------------------------------------------------------------------*/
 namespace Test {
diff --git a/packages/kokkos/containers/unit_tests/cuda/TestCuda_OffsetView.cpp b/packages/kokkos/containers/unit_tests/cuda/TestCuda_OffsetView.cpp
new file mode 100644
index 000000000..546f6d603
--- /dev/null
+++ b/packages/kokkos/containers/unit_tests/cuda/TestCuda_OffsetView.cpp
@@ -0,0 +1,47 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<cuda/TestCuda_Category.hpp>
+#include<TestOffsetView.hpp>
+
diff --git a/packages/kokkos/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp b/packages/kokkos/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp
new file mode 100644
index 000000000..169dae321
--- /dev/null
+++ b/packages/kokkos/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp
@@ -0,0 +1,47 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<openmp/TestOpenMP_Category.hpp>
+#include<TestOffsetView.hpp>
+
diff --git a/packages/kokkos/containers/unit_tests/rocm/TestROCm_Category.hpp b/packages/kokkos/containers/unit_tests/rocm/TestROCm_Category.hpp
index 555ddd6bd..d520bbc5a 100644
--- a/packages/kokkos/containers/unit_tests/rocm/TestROCm_Category.hpp
+++ b/packages/kokkos/containers/unit_tests/rocm/TestROCm_Category.hpp
@@ -60,6 +60,6 @@ protected:
 } // namespace Test
 
 #define TEST_CATEGORY rocm
-#define TEST_EXECSPACE Kokkos::ROCm
+#define TEST_EXECSPACE Kokkos::Experimental::ROCm
 
 #endif
diff --git a/packages/kokkos/containers/unit_tests/serial/TestSerial_OffsetView.cpp b/packages/kokkos/containers/unit_tests/serial/TestSerial_OffsetView.cpp
new file mode 100644
index 000000000..fadd748ef
--- /dev/null
+++ b/packages/kokkos/containers/unit_tests/serial/TestSerial_OffsetView.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<serial/TestSerial_Category.hpp>
+#include<TestOffsetView.hpp>
+
diff --git a/packages/kokkos/containers/unit_tests/threads/TestThreads_OffsetView.cpp b/packages/kokkos/containers/unit_tests/threads/TestThreads_OffsetView.cpp
new file mode 100644
index 000000000..d1eaa265e
--- /dev/null
+++ b/packages/kokkos/containers/unit_tests/threads/TestThreads_OffsetView.cpp
@@ -0,0 +1,47 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<threads/TestThreads_Category.hpp>
+#include<TestOffsetView.hpp>
+
diff --git a/packages/kokkos/core/src/CMakeLists.txt b/packages/kokkos/core/src/CMakeLists.txt
index eb0261670..ab7f3f55c 100644
--- a/packages/kokkos/core/src/CMakeLists.txt
+++ b/packages/kokkos/core/src/CMakeLists.txt
@@ -108,3 +108,7 @@ else()
 
 endif()
 #-----------------------------------------------------------------------------
+
+# build and install pkgconfig file
+CONFIGURE_FILE(kokkos.pc.in kokkos.pc @ONLY)
+INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig)
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
index ada3f64fe..c31b7f5b5 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
@@ -208,7 +208,7 @@ struct CudaParallelLaunch< DriverType
                     , const int          shmem
                     , const cudaStream_t stream = 0 )
   {
-    if ( grid.x && ( block.x * block.y * block.z ) ) {
+    if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) {
 
       if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) <
            sizeof( DriverType ) ) {
@@ -264,7 +264,7 @@ struct CudaParallelLaunch< DriverType
                     , const int          shmem
                     , const cudaStream_t stream = 0 )
   {
-    if ( grid.x && ( block.x * block.y * block.z ) ) {
+    if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) {
 
       if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) <
            sizeof( DriverType ) ) {
@@ -321,7 +321,7 @@ struct CudaParallelLaunch< DriverType
                     , const int          shmem
                     , const cudaStream_t stream = 0 )
   {
-    if ( grid.x && ( block.x * block.y * block.z ) ) {
+    if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) {
 
       if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) <
            sizeof( DriverType ) ) {
@@ -370,7 +370,7 @@ struct CudaParallelLaunch< DriverType
                     , const int          shmem
                     , const cudaStream_t stream = 0 )
   {
-    if ( grid.x && ( block.x * block.y * block.z ) ) {
+    if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) {
 
       if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) <
            sizeof( DriverType ) ) {
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
index 302cf13d4..4fa460996 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
@@ -453,6 +453,8 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
           , arg_label.c_str()
           , SharedAllocationHeader::maximum_label_length
           );
+  // Set last element zero, in case c_str is too long
+  header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
 
   // Copy to device memory
   Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
@@ -491,6 +493,9 @@ SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
           , arg_label.c_str()
           , SharedAllocationHeader::maximum_label_length
           );
+
+  // Set last element zero, in case c_str is too long
+  RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
 }
 
 SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
@@ -525,6 +530,8 @@ SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
           , arg_label.c_str()
           , SharedAllocationHeader::maximum_label_length
           );
+  // Set last element zero, in case c_str is too long
+  RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
 }
 
 //----------------------------------------------------------------------------
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
index 8249da6a8..16952a3ae 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
@@ -689,9 +689,13 @@ Cuda::size_type cuda_internal_multiprocessor_count()
 
 CudaSpace::size_type cuda_internal_maximum_concurrent_block_count()
 {
+  #if defined(KOKKOS_ARCH_KEPLER)
+  // Compute capability 3.0 through 3.7
+  enum : int { max_resident_blocks_per_multiprocessor = 16 };
+  #else
   // Compute capability 5.0 through 6.2
   enum : int { max_resident_blocks_per_multiprocessor = 32 };
-
+  #endif
    return CudaInternal::singleton().m_multiProcCount
           * max_resident_blocks_per_multiprocessor ;
 };
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp
index 31f405dd8..145d93ed7 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp
@@ -52,22 +52,22 @@
 
 namespace Kokkos { namespace Impl {
 
-template<class DriverType, bool Large>
+template<class DriverType, class LaunchBounds, bool Large>
 struct CudaGetMaxBlockSize;
 
-template<class DriverType, bool Large = (CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))>
+template<class DriverType, class LaunchBounds>
 int cuda_get_max_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
                             const size_t shmem_extra_block, const size_t shmem_extra_thread) {
-  return CudaGetMaxBlockSize<DriverType,Large>::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread);
+  return CudaGetMaxBlockSize<DriverType,LaunchBounds,(CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))>::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread);
 }
 
 
 template<class DriverType>
-struct CudaGetMaxBlockSize<DriverType,true> {
+struct CudaGetMaxBlockSize<DriverType,Kokkos::LaunchBounds<>,true> {
   static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
                             const size_t shmem_extra_block, const size_t shmem_extra_thread) {
     int numBlocks;
-    int blockSize=32;
+    int blockSize=1024;
     int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
                     FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
     cudaOccupancyMaxActiveBlocksPerMultiprocessor(
@@ -76,8 +76,9 @@ struct CudaGetMaxBlockSize<DriverType,true> {
         blockSize,
         sharedmem);
 
-    while (blockSize<1024 && numBlocks>0) {
-      blockSize*=2;
+    if(numBlocks>0) return blockSize;
+    while (blockSize>32 && numBlocks==0) {
+      blockSize/=2;
       sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
                   FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
 
@@ -87,19 +88,30 @@ struct CudaGetMaxBlockSize<DriverType,true> {
           blockSize,
           sharedmem);
     }
-    if(numBlocks>0) return blockSize;
-    else return blockSize/2;
+    int blockSizeUpperBound = blockSize*2;
+    while (blockSize<blockSizeUpperBound && numBlocks>0) {
+      blockSize+=32;
+      sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                  FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+            &numBlocks,
+            cuda_parallel_launch_constant_memory<DriverType>,
+            blockSize,
+            sharedmem);
+    }
+    return blockSize - 32;
   }
 };
 
 template<class DriverType>
-struct CudaGetMaxBlockSize<DriverType,false> {
+struct CudaGetMaxBlockSize<DriverType,Kokkos::LaunchBounds<>,false> {
   static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
                             const size_t shmem_extra_block, const size_t shmem_extra_thread) {
     int numBlocks;
 
-    int blockSize=32;
-    int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+    unsigned int blockSize=1024;
+    unsigned int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
                     FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
     cudaOccupancyMaxActiveBlocksPerMultiprocessor(
         &numBlocks,
@@ -107,8 +119,9 @@ struct CudaGetMaxBlockSize<DriverType,false> {
         blockSize,
         sharedmem);
 
-    while (blockSize<1024 && numBlocks>0) {
-      blockSize*=2;
+    if(numBlocks>0) return blockSize;
+    while (blockSize>32 && numBlocks==0) {
+      blockSize/=2;
       sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
                   FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
 
@@ -118,24 +131,121 @@ struct CudaGetMaxBlockSize<DriverType,false> {
           blockSize,
           sharedmem);
     }
-    if(numBlocks>0) return blockSize;
-    else return blockSize/2;
+    unsigned int blockSizeUpperBound = blockSize*2;
+    while (blockSize<blockSizeUpperBound && numBlocks>0) {
+      blockSize+=32;
+      sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                  FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+            &numBlocks,
+            cuda_parallel_launch_local_memory<DriverType>,
+            blockSize,
+            sharedmem);
+    }
+    return blockSize - 32;
   }
 };
 
+template<class DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM>
+struct CudaGetMaxBlockSize<DriverType,Kokkos::LaunchBounds<MaxThreadsPerBlock,MinBlocksPerSM>,true> {
+  static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
+                            const size_t shmem_extra_block, const size_t shmem_extra_thread) {
+    int numBlocks = 0, oldNumBlocks = 0;
+    unsigned int blockSize=MaxThreadsPerBlock;
+    unsigned int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                    FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+    cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+        &numBlocks,
+        cuda_parallel_launch_constant_memory<DriverType,MaxThreadsPerBlock,MinBlocksPerSM>,
+        blockSize,
+        sharedmem);
+
+    if(static_cast<unsigned int>(numBlocks)>=MinBlocksPerSM) return blockSize;
 
+    while (blockSize>32 && static_cast<unsigned int>(numBlocks)<MinBlocksPerSM) {
+      blockSize/=2;
+      sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                  FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
 
-template<class DriverType, bool Large>
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+          &numBlocks,
+          cuda_parallel_launch_constant_memory<DriverType>,
+          blockSize,
+          sharedmem);
+    }
+    unsigned int blockSizeUpperBound = (blockSize*2<MaxThreadsPerBlock?blockSize*2:MaxThreadsPerBlock);
+    while (blockSize<blockSizeUpperBound && static_cast<unsigned int>(numBlocks)>MinBlocksPerSM) {
+      blockSize+=32;
+      sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                  FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+      oldNumBlocks = numBlocks;
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+            &numBlocks,
+            cuda_parallel_launch_constant_memory<DriverType>,
+            blockSize,
+            sharedmem);
+    }
+    if(static_cast<unsigned int>(oldNumBlocks)>=MinBlocksPerSM) return blockSize - 32;
+    return -1;
+  }
+};
+
+template<class DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM>
+struct CudaGetMaxBlockSize<DriverType,Kokkos::LaunchBounds<MaxThreadsPerBlock,MinBlocksPerSM>,false> {
+  static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
+                            const size_t shmem_extra_block, const size_t shmem_extra_thread) {
+    int numBlocks = 0, oldNumBlocks = 0;
+    unsigned int blockSize=MaxThreadsPerBlock;
+    int sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                    FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+    cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+        &numBlocks,
+        cuda_parallel_launch_local_memory<DriverType,MaxThreadsPerBlock,MinBlocksPerSM>,
+        blockSize,
+        sharedmem);
+    if(static_cast<unsigned int>(numBlocks)>=MinBlocksPerSM) return blockSize;
+
+    while (blockSize>32 && static_cast<unsigned int>(numBlocks)<MinBlocksPerSM) {
+      blockSize/=2;
+      sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                  FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+          &numBlocks,
+          cuda_parallel_launch_local_memory<DriverType>,
+          blockSize,
+          sharedmem);
+    }
+    unsigned int blockSizeUpperBound = (blockSize*2<MaxThreadsPerBlock?blockSize*2:MaxThreadsPerBlock);
+    while (blockSize<blockSizeUpperBound && static_cast<unsigned int>(numBlocks)>=MinBlocksPerSM) {
+      blockSize+=32;
+      sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                  FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+      oldNumBlocks = numBlocks;
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+            &numBlocks,
+            cuda_parallel_launch_local_memory<DriverType>,
+            blockSize,
+            sharedmem);
+    }
+    if(static_cast<unsigned int>(oldNumBlocks)>=MinBlocksPerSM) return blockSize - 32;
+    return -1;
+  }
+};
+
+
+template<class DriverType, class LaunchBounds, bool Large>
 struct CudaGetOptBlockSize;
 
-template<class DriverType, bool Large = (CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))>
+template<class DriverType, class LaunchBounds>
 int cuda_get_opt_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
                             const size_t shmem_extra_block, const size_t shmem_extra_thread) {
-  return CudaGetOptBlockSize<DriverType,Large>::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread);
+  return CudaGetOptBlockSize<DriverType,LaunchBounds,(CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))>::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread);
 }
 
 template<class DriverType>
-struct CudaGetOptBlockSize<DriverType,true> {
+struct CudaGetOptBlockSize<DriverType,Kokkos::LaunchBounds<>,true> {
   static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
                             const size_t shmem_extra_block, const size_t shmem_extra_thread) {
     int blockSize=16;
@@ -165,7 +275,7 @@ struct CudaGetOptBlockSize<DriverType,true> {
 };
 
 template<class DriverType>
-struct CudaGetOptBlockSize<DriverType,false> {
+struct CudaGetOptBlockSize<DriverType,Kokkos::LaunchBounds<>,false> {
   static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
                             const size_t shmem_extra_block, const size_t shmem_extra_thread) {
     int blockSize=16;
@@ -194,6 +304,75 @@ struct CudaGetOptBlockSize<DriverType,false> {
   }
 };
 
+template<class DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM>
+struct CudaGetOptBlockSize<DriverType,Kokkos::LaunchBounds< MaxThreadsPerBlock, MinBlocksPerSM >,true> {
+  static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
+                            const size_t shmem_extra_block, const size_t shmem_extra_thread) {
+    int blockSize=16;
+    int numBlocks;
+    int sharedmem;
+    int maxOccupancy=0;
+    int bestBlockSize=0;
+    int max_threads_per_block = std::min(MaxThreadsPerBlock,cuda_internal_maximum_warp_count()*CudaTraits::WarpSize);
+
+    while(blockSize < max_threads_per_block ) {
+      blockSize*=2;
+
+      //calculate the occupancy with that optBlockSize and check whether its larger than the largest one found so far
+      sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                  FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+              &numBlocks,
+              cuda_parallel_launch_constant_memory<DriverType,MaxThreadsPerBlock,MinBlocksPerSM>,
+              blockSize,
+              sharedmem);
+      if(numBlocks >= int(MinBlocksPerSM) && blockSize<=int(MaxThreadsPerBlock)) {
+        if(maxOccupancy < numBlocks*blockSize) {
+           maxOccupancy = numBlocks*blockSize;
+           bestBlockSize = blockSize;
+        }
+      }
+    }
+    if(maxOccupancy > 0)
+      return bestBlockSize;
+    return -1;
+  }
+};
+
+template<class DriverType, unsigned int MaxThreadsPerBlock, unsigned int MinBlocksPerSM>
+struct CudaGetOptBlockSize<DriverType,Kokkos::LaunchBounds< MaxThreadsPerBlock, MinBlocksPerSM >,false> {
+  static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length,
+                            const size_t shmem_extra_block, const size_t shmem_extra_thread) {
+    int blockSize=16;
+    int numBlocks;
+    int sharedmem;
+    int maxOccupancy=0;
+    int bestBlockSize=0;
+    int max_threads_per_block = std::min(MaxThreadsPerBlock,cuda_internal_maximum_warp_count()*CudaTraits::WarpSize);
+
+    while(blockSize < max_threads_per_block ) {
+      blockSize*=2;
+      sharedmem = shmem_extra_block + shmem_extra_thread*(blockSize/vector_length) +
+                  FunctorTeamShmemSize< typename DriverType::functor_type  >::value( f , blockSize/vector_length );
+
+      cudaOccupancyMaxActiveBlocksPerMultiprocessor(
+              &numBlocks,
+              cuda_parallel_launch_local_memory<DriverType,MaxThreadsPerBlock,MinBlocksPerSM>,
+              blockSize,
+              sharedmem);
+      if(numBlocks >= int(MinBlocksPerSM) && blockSize<=int(MaxThreadsPerBlock)) {
+        if(maxOccupancy < numBlocks*blockSize) {
+          maxOccupancy = numBlocks*blockSize;
+          bestBlockSize = blockSize;
+        }
+      }
+    }
+    if(maxOccupancy > 0)
+      return bestBlockSize;
+    return -1;
+  }
+};
+
 }} // namespace Kokkos::Impl
 
 #endif // KOKKOS_ENABLE_CUDA
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
index 80192bf33..8363a4566 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
@@ -148,6 +148,9 @@ namespace Kokkos {
 namespace Impl {
 namespace {
   static int lock_array_copied = 0;
+  inline int eliminate_warning_for_lock_array() {
+    return lock_array_copied;
+  }
 }
 }
 }
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
index eac4abac1..2ae1cc0dd 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
@@ -60,6 +60,7 @@
 #include <Cuda/Kokkos_Cuda_Internal.hpp>
 #include <Cuda/Kokkos_Cuda_Locks.hpp>
 #include <Kokkos_Vectorization.hpp>
+#include <Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
 
 #if defined(KOKKOS_ENABLE_PROFILING)
 #include <impl/Kokkos_Profiling_Interface.hpp>
@@ -114,6 +115,7 @@ public:
 
   //----------------------------------------
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   template< class FunctorType >
   inline static
   int team_size_max( const FunctorType & functor )
@@ -131,7 +133,35 @@ public:
 
       return n ;
     }
+#endif
+
+  template<class FunctorType>
+  int team_size_max( const FunctorType& f, const ParallelForTag& ) const {
+    typedef Impl::ParallelFor< FunctorType , TeamPolicy<Properties...> > closure_type;
+    int block_size = Kokkos::Impl::cuda_get_max_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(),
+        (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) );
+    return block_size/vector_length();
+  }
 
+  template<class FunctorType>
+  int team_size_max( const FunctorType& f, const ParallelReduceTag& ) const {
+    typedef Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,TeamPolicyInternal,FunctorType> functor_analysis_type;
+    typedef typename Impl::ParallelReduceReturnValue<void,typename functor_analysis_type::value_type,FunctorType>::reducer_type reducer_type;
+    typedef Impl::ParallelReduce< FunctorType , TeamPolicy<Properties...>, reducer_type > closure_type;
+    typedef Impl::FunctorValueTraits< FunctorType , typename traits::work_tag > functor_value_traits;
+
+    int block_size = Kokkos::Impl::cuda_get_max_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(),
+        (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) +
+                                                          ((functor_value_traits::StaticValueSize!=0)?0:functor_value_traits::value_size( f )));
+
+    // Currently we require Power-of-2 team size for reductions.
+    int p2 = 1;
+    while(p2<=block_size) p2*=2;
+    p2/=2;
+    return p2/vector_length();
+  }
+
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   template< class FunctorType >
   static int team_size_recommended( const FunctorType & functor )
     { return team_size_max( functor ); }
@@ -143,11 +173,41 @@ public:
       if(max<1) max = 1;
       return max;
     }
+#endif
+
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType& f, const ParallelForTag& ) const {
+    typedef Impl::ParallelFor< FunctorType , TeamPolicy<Properties...> > closure_type;
+    int block_size = Kokkos::Impl::cuda_get_opt_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(),
+        (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double));
+    return block_size/vector_length();
+  }
+
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType& f, const ParallelReduceTag& ) const {
+    typedef Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,TeamPolicyInternal,FunctorType> functor_analysis_type;
+    typedef typename Impl::ParallelReduceReturnValue<void,typename functor_analysis_type::value_type,FunctorType>::reducer_type reducer_type;
+    typedef Impl::ParallelReduce< FunctorType , TeamPolicy<Properties...>, reducer_type > closure_type;
+    typedef Impl::FunctorValueTraits< FunctorType , typename traits::work_tag > functor_value_traits;
+
+    int block_size = Kokkos::Impl::cuda_get_opt_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(),
+        (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) +
+                                                          ((functor_value_traits::StaticValueSize!=0)?0:functor_value_traits::value_size( f )));
+    return block_size/vector_length();
+  }
+
 
   inline static
   int vector_length_max()
     { return Impl::CudaTraits::WarpSize; }
 
+  inline static
+  int scratch_size_max(int level)
+    { return (level==0?
+        1024*40:             // 48kB is the max for CUDA, but we need some for team_member.reduce etc.
+        20*1024*1024);   // arbitrarily setting this to 20MB, for a Volta V100 that would give us about 3.2GB for 2 teams per SM
+    }
+
   //----------------------------------------
 
   inline int vector_length()   const { return m_vector_length ; }
@@ -419,7 +479,7 @@ public:
   void execute() const
     {
       const typename Policy::index_type nwork = m_policy.end() - m_policy.begin();
-      const int block_size = Kokkos::Impl::cuda_get_opt_block_size< ParallelFor >( m_functor , 1, 0 , 0 );
+      const int block_size = Kokkos::Impl::cuda_get_opt_block_size< ParallelFor, LaunchBounds>( m_functor , 1, 0 , 0 );
       const dim3 block(  1 , block_size , 1);
       const dim3 grid( std::min( typename Policy::index_type(( nwork + block.y - 1 ) / block.y) , typename Policy::index_type(cuda_internal_maximum_grid_count()) ) , 1 , 1);
 
@@ -654,7 +714,7 @@ public:
     : m_functor( arg_functor )
     , m_league_size( arg_policy.league_size() )
     , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
-        Kokkos::Impl::cuda_get_opt_block_size< ParallelFor >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() )
+        Kokkos::Impl::cuda_get_opt_block_size< ParallelFor, LaunchBounds >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() )
     , m_vector_size( arg_policy.vector_length() )
     , m_shmem_begin( sizeof(double) * ( m_team_size + 2 ) )
     , m_shmem_size( arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( m_functor , m_team_size ) )
@@ -670,7 +730,7 @@ public:
       }
 
       if ( int(m_team_size) >
-           int(Kokkos::Impl::cuda_get_max_block_size< ParallelFor >
+           int(Kokkos::Impl::cuda_get_max_block_size< ParallelFor, LaunchBounds >
                  ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) {
         Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > requested too large team size."));
       }
@@ -725,12 +785,13 @@ public:
   const Policy        m_policy ;
   const ReducerType   m_reducer ;
   const pointer_type  m_result_ptr ;
+  const bool          m_result_ptr_device_accessible ;
   size_type *         m_scratch_space ;
   size_type *         m_scratch_flags ;
   size_type *         m_unified_space ;
 
-  // Shall we use the shfl based reduction or not (only use it for static sized types of more than 128bit
-  enum { UseShflReduction = ((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) };
+  // Shall we use the shfl based reduction or not (only use it for static sized types of more than 128bit)
+  enum { UseShflReduction = false };//((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) };
   // Some crutch to do function overloading
 private:
   typedef double DummyShflReductionType;
@@ -752,12 +813,12 @@ public:
 
   __device__ inline
   void operator() () const {
-    run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) );
+/*    run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) );
   }
 
   __device__ inline
   void run(const DummySHMEMReductionType& ) const
-  {
+  {*/
     const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
       word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) );
 
@@ -786,7 +847,8 @@ public:
       // This is the final block with the final result at the final threads' location
 
       size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ;
-      size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
+      size_type * const global = m_result_ptr_device_accessible? reinterpret_cast<size_type*>(m_result_ptr) : 
+                                 ( m_unified_space ? m_unified_space : m_scratch_space );
 
       if ( threadIdx.y == 0 ) {
         Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
@@ -798,10 +860,9 @@ public:
     }
   }
 
-  __device__ inline
+/*  __device__ inline
    void run(const DummyShflReductionType&) const
    {
-
      value_type value;
      ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &value);
      // Number of blocks is bounded so that the reduction can be limited to two passes.
@@ -832,7 +893,7 @@ public:
          *result = value;
        }
      }
-   }
+   }*/
 
   // Determine block size constrained by shared memory:
   static inline
@@ -863,16 +924,18 @@ public:
 
       CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute
 
-      Cuda::fence();
+      if(!m_result_ptr_device_accessible) {
+        Cuda::fence();
 
-      if ( m_result_ptr ) {
-        if ( m_unified_space ) {
-          const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer)  );
-          for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
-        }
-        else {
-          const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer)  );
-          DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size );
+        if ( m_result_ptr ) {
+          if ( m_unified_space ) {
+            const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer)  );
+            for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
+          }
+          else {
+            const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer)  );
+            DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size );
+          }
         }
       }
     }
@@ -883,17 +946,18 @@ public:
     }
   }
 
-  template< class HostViewType >
+  template< class ViewType >
   ParallelReduce( const FunctorType  & arg_functor
                 , const Policy       & arg_policy
-                , const HostViewType & arg_result
+                , const ViewType & arg_result
                 , typename std::enable_if<
-                   Kokkos::is_view< HostViewType >::value
+                   Kokkos::is_view< ViewType >::value
                 ,void*>::type = NULL)
   : m_functor( arg_functor )
   , m_policy(  arg_policy )
   , m_reducer( InvalidType() )
   , m_result_ptr( arg_result.data() )
+  , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ViewType::memory_space>::accessible )
   , m_scratch_space( 0 )
   , m_scratch_flags( 0 )
   , m_unified_space( 0 )
@@ -906,6 +970,7 @@ public:
   , m_policy(  arg_policy )
   , m_reducer( reducer )
   , m_result_ptr( reducer.view().data() )
+  , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ReducerType::result_view_type::memory_space>::accessible )
   , m_scratch_space( 0 )
   , m_scratch_flags( 0 )
   , m_unified_space( 0 )
@@ -953,6 +1018,7 @@ public:
   const Policy        m_policy ; // used for workrange and nwork
   const ReducerType   m_reducer ;
   const pointer_type  m_result_ptr ;
+  const bool          m_result_ptr_device_accessible ;
   size_type *         m_scratch_space ;
   size_type *         m_scratch_flags ;
   size_type *         m_unified_space ;
@@ -960,7 +1026,7 @@ public:
   typedef typename Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank, Policy, FunctorType, typename Policy::work_tag, reference_type> DeviceIteratePattern;
 
   // Shall we use the shfl based reduction or not (only use it for static sized types of more than 128bit
-  enum { UseShflReduction = ((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) };
+  enum { UseShflReduction = ((sizeof(value_type)>2*sizeof(double)) && (ValueTraits::StaticValueSize!=0)) };
   // Some crutch to do function overloading
 private:
   typedef double DummyShflReductionType;
@@ -978,12 +1044,12 @@ public:
   inline
   __device__
   void operator() (void) const {
-    run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) );
+/*    run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) );
   }
 
   __device__ inline
   void run(const DummySHMEMReductionType& ) const
-  {
+  {*/
     const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
       word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) );
 
@@ -1007,7 +1073,8 @@ public:
 
       // This is the final block with the final result at the final threads' location
       size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ;
-      size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
+      size_type * const global = m_result_ptr_device_accessible? reinterpret_cast<size_type*>(m_result_ptr) :
+                                 ( m_unified_space ? m_unified_space : m_scratch_space );
 
       if ( threadIdx.y == 0 ) {
         Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
@@ -1019,7 +1086,7 @@ public:
     }
   }
 
-  __device__ inline
+/*  __device__ inline
    void run(const DummyShflReductionType&) const
    {
 
@@ -1051,7 +1118,7 @@ public:
        }
      }
    }
-
+*/
   // Determine block size constrained by shared memory:
   static inline
   unsigned local_block_size( const FunctorType & f )
@@ -1089,16 +1156,18 @@ public:
 
       CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute
 
-      Cuda::fence();
+      if(!m_result_ptr_device_accessible) {
+        Cuda::fence();
 
-      if ( m_result_ptr ) {
-        if ( m_unified_space ) {
-          const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer)  );
-          for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
-        }
-        else {
-          const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer)  );
-          DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size );
+        if ( m_result_ptr ) {
+          if ( m_unified_space ) {
+            const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer)  );
+            for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
+          }
+          else {
+            const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer)  );
+            DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size );
+          }
         }
       }
     }
@@ -1109,17 +1178,18 @@ public:
     }
   }
 
-  template< class HostViewType >
+  template< class ViewType >
   ParallelReduce( const FunctorType  & arg_functor
                 , const Policy       & arg_policy
-                , const HostViewType & arg_result
+                , const ViewType & arg_result
                 , typename std::enable_if<
-                   Kokkos::is_view< HostViewType >::value
+                   Kokkos::is_view< ViewType >::value
                 ,void*>::type = NULL)
   : m_functor( arg_functor )
   , m_policy(  arg_policy )
   , m_reducer( InvalidType() )
   , m_result_ptr( arg_result.data() )
+  , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ViewType::memory_space>::accessible )
   , m_scratch_space( 0 )
   , m_scratch_flags( 0 )
   , m_unified_space( 0 )
@@ -1132,6 +1202,7 @@ public:
   , m_policy(  arg_policy )
   , m_reducer( reducer )
   , m_result_ptr( reducer.view().data() )
+  , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ReducerType::result_view_type::memory_space>::accessible )
   , m_scratch_space( 0 )
   , m_scratch_flags( 0 )
   , m_unified_space( 0 )
@@ -1174,7 +1245,7 @@ public:
   typedef FunctorType      functor_type ;
   typedef Cuda::size_type  size_type ;
 
-  enum { UseShflReduction = (true && ValueTraits::StaticValueSize) };
+  enum { UseShflReduction = (true && (ValueTraits::StaticValueSize!=0)) };
 
 private:
   typedef double DummyShflReductionType;
@@ -1191,6 +1262,7 @@ private:
   const FunctorType   m_functor ;
   const ReducerType   m_reducer ;
   const pointer_type  m_result_ptr ;
+  const bool          m_result_ptr_device_accessible ;
   size_type *         m_scratch_space ;
   size_type *         m_scratch_flags ;
   size_type *         m_unified_space ;
@@ -1279,7 +1351,8 @@ public:
       // This is the final block with the final result at the final threads' location
 
       size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ;
-      size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
+      size_type * const global = m_result_ptr_device_accessible? reinterpret_cast<size_type*>(m_result_ptr) :
+                                 ( m_unified_space ? m_unified_space : m_scratch_space );
 
       if ( threadIdx.y == 0 ) {
         Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
@@ -1312,12 +1385,18 @@ public:
         , value );
     }
 
-    pointer_type const result = (pointer_type) (m_unified_space ? m_unified_space : m_scratch_space) ;
+    pointer_type const result = m_result_ptr_device_accessible? m_result_ptr :
+                                (pointer_type) ( m_unified_space ? m_unified_space : m_scratch_space );
 
     value_type init;
     ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init);
-    if(Impl::cuda_inter_block_reduction<FunctorType,ValueJoin,WorkTag>
-           (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,blockDim.y)) {
+    if(
+        Impl::cuda_inter_block_reduction<FunctorType,ValueJoin,WorkTag>
+           (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,blockDim.y)
+        //This breaks a test
+        //   Kokkos::Impl::CudaReductionsFunctor<FunctorType,WorkTag,false,true>::scalar_inter_block_reduction(ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x ,
+        //              kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags)
+    ) {
       const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
       if(id==0) {
         Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
@@ -1331,7 +1410,7 @@ public:
     {
       const int nwork = m_league_size * m_team_size ;
       if ( nwork ) {
-        const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024) )
+        const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024*32) )
           :std::min( m_league_size , m_team_size );
 
         m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count );
@@ -1344,16 +1423,18 @@ public:
 
         CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem_size_total ); // copy to device and execute
 
-        Cuda::fence();
-
-        if ( m_result_ptr ) {
-          if ( m_unified_space ) {
-            const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) );
-            for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
-          }
-          else {
-            const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) );
-            DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size );
+        if(!m_result_ptr_device_accessible) {
+          Cuda::fence();
+
+          if ( m_result_ptr ) {
+            if ( m_unified_space ) {
+              const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) );
+              for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
+            }
+            else {
+              const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) );
+              DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size );
+            }
           }
         }
       }
@@ -1364,16 +1445,17 @@ public:
       }
     }
 
-  template< class HostViewType >
+  template< class ViewType >
   ParallelReduce( const FunctorType  & arg_functor
                 , const Policy       & arg_policy
-                , const HostViewType & arg_result
+                , const ViewType & arg_result
                 , typename std::enable_if<
-                                   Kokkos::is_view< HostViewType >::value
+                                   Kokkos::is_view< ViewType >::value
                                 ,void*>::type = NULL)
   : m_functor( arg_functor )
   , m_reducer( InvalidType() )
   , m_result_ptr( arg_result.data() )
+  , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ViewType::memory_space>::accessible )
   , m_scratch_space( 0 )
   , m_scratch_flags( 0 )
   , m_unified_space( 0 )
@@ -1383,17 +1465,17 @@ public:
   , m_scratch_ptr{NULL,NULL}
   , m_scratch_size{
     arg_policy.scratch_size(0,( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
-        Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
+        Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(),
                                                                  arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
                                                                  arg_policy.vector_length() )
     ), arg_policy.scratch_size(1,( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
-        Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
+        Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(),
                                                                  arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
                                                                  arg_policy.vector_length() )
         )}
   , m_league_size( arg_policy.league_size() )
   , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
-      Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
+      Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(),
                                                                arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
                                                                arg_policy.vector_length() )
   , m_vector_size( arg_policy.vector_length() )
@@ -1430,9 +1512,7 @@ public:
       Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch memory"));
     }
 
-    if ( unsigned(m_team_size) >
-         unsigned(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce >
-               ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) {
+    if ( int(m_team_size) > arg_policy.team_size_max(m_functor,ParallelReduceTag()) ) {
       Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size."));
     }
 
@@ -1444,6 +1524,7 @@ public:
   : m_functor( arg_functor )
   , m_reducer( reducer )
   , m_result_ptr( reducer.view().data() )
+  , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ReducerType::result_view_type::memory_space>::accessible )
   , m_scratch_space( 0 )
   , m_scratch_flags( 0 )
   , m_unified_space( 0 )
@@ -1453,7 +1534,7 @@ public:
   , m_scratch_ptr{NULL,NULL}
   , m_league_size( arg_policy.league_size() )
   , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
-      Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
+      Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(),
                                                                arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
       arg_policy.vector_length() )
   , m_vector_size( arg_policy.vector_length() )
@@ -1486,10 +1567,7 @@ public:
          CudaTraits::SharedMemoryCapacity < shmem_size_total ) {
       Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size"));
     }
-
-    if ( int(m_team_size) >
-         int(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce >
-               ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) {
+    if ( int(m_team_size) > arg_policy.team_size_max(m_functor,ParallelReduceTag()) ) {
       Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size."));
     }
 
@@ -1753,7 +1831,7 @@ public:
       // Occupancy calculator assumes whole block.
 
       m_team_size =
-        Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >
+        Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >
           ( arg_functor
           , arg_policy.vector_length()
           , arg_policy.team_scratch_size(0)
@@ -1970,7 +2048,9 @@ private:
     const WorkRange range( m_policy , blockIdx.x , gridDim.x );
 
     for ( typename Policy::member_type iwork_base = range.begin(); iwork_base < range.end() ; iwork_base += blockDim.y ) {
-
+      #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      unsigned MASK=KOKKOS_IMPL_CUDA_ACTIVEMASK;
+      #endif
       const typename Policy::member_type iwork = iwork_base + threadIdx.y ;
 
       __syncthreads(); // Don't overwrite previous iteration values until they are used
@@ -1981,7 +2061,11 @@ private:
       for ( unsigned i = threadIdx.y ; i < word_count.value ; ++i ) {
         shared_data[i + word_count.value] = shared_data[i] = shared_accum[i] ;
       }
-
+      #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      KOKKOS_IMPL_CUDA_SYNCWARP_MASK(MASK);
+      #else
+      KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+      #endif
       if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values.
 
       // Call functor to accumulate inclusive scan value for this work item
@@ -2189,6 +2273,9 @@ private:
     const WorkRange range( m_policy , blockIdx.x , gridDim.x );
 
     for ( typename Policy::member_type iwork_base = range.begin(); iwork_base < range.end() ; iwork_base += blockDim.y ) {
+      #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      unsigned MASK=KOKKOS_IMPL_CUDA_ACTIVEMASK;
+      #endif
 
       const typename Policy::member_type iwork = iwork_base + threadIdx.y ;
 
@@ -2201,6 +2288,11 @@ private:
         shared_data[i + word_count.value] = shared_data[i] = shared_accum[i] ;
       }
 
+      #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      KOKKOS_IMPL_CUDA_SYNCWARP_MASK(MASK);
+      #else
+      KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+      #endif
       if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values.
 
       // Call functor to accumulate inclusive scan value for this work item
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
index 676ba38c6..82d691f7d 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
@@ -194,8 +194,9 @@ void cuda_shfl_up( T & out , T const & in , int delta ,
  */
 
 template< class ValueType , class JoinOp>
-__device__
-inline void cuda_intra_warp_reduction( ValueType& result,
+__device__ inline
+typename std::enable_if< !Kokkos::is_reducer<ValueType>::value >::type
+cuda_intra_warp_reduction( ValueType& result,
                                        const JoinOp& join,
                                        const uint32_t max_active_thread = blockDim.y) {
 
@@ -214,8 +215,9 @@ inline void cuda_intra_warp_reduction( ValueType& result,
 }
 
 template< class ValueType , class JoinOp>
-__device__
-inline void cuda_inter_warp_reduction( ValueType& value,
+__device__ inline
+typename std::enable_if< !Kokkos::is_reducer<ValueType>::value >::type
+cuda_inter_warp_reduction( ValueType& value,
                                        const JoinOp& join,
                                        const int max_active_thread = blockDim.y) {
 
@@ -247,8 +249,9 @@ inline void cuda_inter_warp_reduction( ValueType& value,
 }
 
 template< class ValueType , class JoinOp>
-__device__
-inline void cuda_intra_block_reduction( ValueType& value,
+__device__ inline
+typename std::enable_if< !Kokkos::is_reducer<ValueType>::value >::type
+cuda_intra_block_reduction( ValueType& value,
                                         const JoinOp& join,
                                         const int max_active_thread = blockDim.y) {
   cuda_intra_warp_reduction(value,join,max_active_thread);
@@ -314,31 +317,52 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
         if( id + 1 < int(gridDim.x) )
           join(value, tmp);
       }
-      int active = KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
+      int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
       if (int(blockDim.x*blockDim.y) > 2) {
         value_type tmp = Kokkos::shfl_down(value, 2,32);
         if( id + 2 < int(gridDim.x) )
           join(value, tmp);
       }
-      active += KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
       if (int(blockDim.x*blockDim.y) > 4) {
         value_type tmp = Kokkos::shfl_down(value, 4,32);
         if( id + 4 < int(gridDim.x) )
           join(value, tmp);
       }
-      active += KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
       if (int(blockDim.x*blockDim.y) > 8) {
         value_type tmp = Kokkos::shfl_down(value, 8,32);
         if( id + 8 < int(gridDim.x) )
           join(value, tmp);
       }
-      active += KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
       if (int(blockDim.x*blockDim.y) > 16) {
         value_type tmp = Kokkos::shfl_down(value, 16,32);
         if( id + 16 < int(gridDim.x) )
           join(value, tmp);
       }
-      active += KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
     }
   }
   //The last block has in its thread=0 the global reduction value through "value"
@@ -478,31 +502,52 @@ cuda_inter_block_reduction( const ReducerType& reducer,
         if( id + 1 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
-      int active = KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
+      int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
       if (int(blockDim.x*blockDim.y) > 2) {
         value_type tmp = Kokkos::shfl_down(value, 2,32);
         if( id + 2 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
-      active += KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
       if (int(blockDim.x*blockDim.y) > 4) {
         value_type tmp = Kokkos::shfl_down(value, 4,32);
         if( id + 4 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
-      active += KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
       if (int(blockDim.x*blockDim.y) > 8) {
         value_type tmp = Kokkos::shfl_down(value, 8,32);
         if( id + 8 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
-      active += KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
       if (int(blockDim.x*blockDim.y) > 16) {
         value_type tmp = Kokkos::shfl_down(value, 16,32);
         if( id + 16 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
-      active += KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+      active += KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
     }
   }
 
@@ -513,6 +558,213 @@ cuda_inter_block_reduction( const ReducerType& reducer,
 #endif
 }
 
+template<class FunctorType, class ArgTag, bool DoScan, bool UseShfl>
+struct CudaReductionsFunctor;
+
+template<class FunctorType, class ArgTag>
+struct CudaReductionsFunctor<FunctorType, ArgTag, false, true> {
+  typedef FunctorValueTraits< FunctorType , ArgTag >  ValueTraits ;
+  typedef FunctorValueJoin<   FunctorType , ArgTag >  ValueJoin ;
+  typedef FunctorValueInit<   FunctorType , ArgTag >  ValueInit ;
+  typedef FunctorValueOps<    FunctorType , ArgTag >  ValueOps ;
+  typedef typename ValueTraits::pointer_type  pointer_type ;
+  typedef typename ValueTraits::value_type Scalar;
+
+  __device__
+  static inline void scalar_intra_warp_reduction(
+      const FunctorType& functor,
+      Scalar value,                            // Contribution
+      const bool skip_vector,                  // Skip threads if Kokkos vector lanes are not part of the reduction
+      const int width,                         // How much of the warp participates
+      Scalar& result)
+  {
+    unsigned mask = width==32?0xffffffff:((1<<width)-1)<<((threadIdx.y*blockDim.x+threadIdx.x)%(32/width))*width;
+    for(int delta=skip_vector?blockDim.x:1; delta<width; delta*=2) {
+      Scalar tmp;
+      cuda_shfl_down(tmp,value,delta,width,mask);
+      ValueJoin::join( functor , &value, &tmp);
+    }
+
+    cuda_shfl(result,value,0,width,mask);
+  }
+
+
+  __device__
+  static inline void scalar_intra_block_reduction(
+      const FunctorType& functor,
+      Scalar value,
+      const bool skip,
+      Scalar* my_global_team_buffer_element,
+      const int shared_elements,
+      Scalar* shared_team_buffer_element) {
+
+    const int warp_id = (threadIdx.y*blockDim.x)/32;
+    Scalar* const my_shared_team_buffer_element =
+        shared_team_buffer_element + warp_id%shared_elements;
+
+    // Warp Level Reduction, ignoring Kokkos vector entries
+    scalar_intra_warp_reduction(functor,value,skip,32,value);
+
+    if(warp_id<shared_elements) {
+        *my_shared_team_buffer_element=value;
+    }
+    // Wait for every warp to be done before using one warp to do final cross warp reduction
+    __syncthreads();
+
+    const int num_warps = blockDim.x*blockDim.y/32;
+    for(int w = shared_elements; w<num_warps; w+=shared_elements) {
+      if(warp_id>=w && warp_id<w+shared_elements) {
+        if((threadIdx.y*blockDim.x + threadIdx.x)%32==0)
+          ValueJoin::join( functor , my_shared_team_buffer_element, &value);
+      }
+      __syncthreads();
+    }
+
+
+    if( warp_id == 0) {
+      ValueInit::init( functor , &value );
+      for(unsigned int i=threadIdx.y*blockDim.x+threadIdx.x; i<blockDim.y*blockDim.x/32; i+=32)
+        ValueJoin::join( functor , &value,&shared_team_buffer_element[i]);
+      scalar_intra_warp_reduction(functor,value,false,32,*my_global_team_buffer_element);
+    }
+  }
+
+  __device__
+  static inline bool scalar_inter_block_reduction(
+      const FunctorType     & functor ,
+      const Cuda::size_type   block_id ,
+      const Cuda::size_type   block_count ,
+      Cuda::size_type * const shared_data ,
+      Cuda::size_type * const global_data ,
+      Cuda::size_type * const global_flags )  {
+    Scalar* const global_team_buffer_element = ((Scalar*) global_data);
+    Scalar* const my_global_team_buffer_element = global_team_buffer_element + blockIdx.x;
+    Scalar* shared_team_buffer_elements = ((Scalar*) shared_data);
+    Scalar value = shared_team_buffer_elements[threadIdx.y];
+    int shared_elements=blockDim.x*blockDim.y/32;
+    int global_elements=block_count;
+    __syncthreads();
+
+    scalar_intra_block_reduction(functor,value,true,my_global_team_buffer_element,shared_elements,shared_team_buffer_elements);
+    __syncthreads();
+    unsigned int num_teams_done = 0;
+    if(threadIdx.x + threadIdx.y == 0) {
+      __threadfence();
+      num_teams_done = Kokkos::atomic_fetch_add(global_flags,1)+1;
+    }
+    bool is_last_block = false;
+    if(__syncthreads_or(num_teams_done == gridDim.x)) {
+      is_last_block=true;
+      *global_flags = 0;
+      ValueInit::init( functor, &value);
+      for(int i=threadIdx.y*blockDim.x+threadIdx.x; i<global_elements; i+=blockDim.x*blockDim.y) {
+        ValueJoin::join( functor , &value,&global_team_buffer_element[i]);
+      }
+      scalar_intra_block_reduction(functor,value,false,shared_team_buffer_elements+(blockDim.y-1),shared_elements,shared_team_buffer_elements);
+    }
+    return is_last_block;
+  }
+};
+
+template<class FunctorType, class ArgTag>
+struct CudaReductionsFunctor<FunctorType, ArgTag, false, false> {
+  typedef FunctorValueTraits< FunctorType , ArgTag >  ValueTraits ;
+  typedef FunctorValueJoin<   FunctorType , ArgTag >  ValueJoin ;
+  typedef FunctorValueInit<   FunctorType , ArgTag >  ValueInit ;
+  typedef FunctorValueOps<    FunctorType , ArgTag >  ValueOps ;
+  typedef typename ValueTraits::pointer_type  pointer_type ;
+  typedef typename ValueTraits::value_type Scalar;
+
+  __device__
+  static inline void scalar_intra_warp_reduction(
+      const FunctorType& functor,
+      Scalar* value,                           // Contribution
+      const bool skip_vector,                  // Skip threads if Kokkos vector lanes are not part of the reduction
+      const int width)                         // How much of the warp participates
+  {
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+    unsigned mask = width==32?0xffffffff:((1<<width)-1)<<((threadIdx.y*blockDim.x+threadIdx.x)%(32/width))*width;
+#endif
+    const int lane_id = (threadIdx.y*blockDim.x+threadIdx.x)%32;
+    for(int delta=skip_vector?blockDim.x:1; delta<width; delta*=2) {
+      if(lane_id + delta<32) {
+        ValueJoin::join( functor , value, value+delta);
+      }
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+      KOKKOS_IMPL_CUDA_SYNCWARP_MASK(mask);
+#else
+      KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+#endif
+    }
+    *value=*(value-lane_id);
+  }
+
+
+  __device__
+  static inline void scalar_intra_block_reduction(
+      const FunctorType& functor,
+      Scalar value,
+      const bool skip,
+      Scalar* result,
+      const int shared_elements,
+      Scalar* shared_team_buffer_element) {
+
+    const int warp_id = (threadIdx.y*blockDim.x)/32;
+    Scalar* const my_shared_team_buffer_element =
+        shared_team_buffer_element + threadIdx.y*blockDim.x+threadIdx.x;
+    *my_shared_team_buffer_element = value;
+    // Warp Level Reduction, ignoring Kokkos vector entries
+    scalar_intra_warp_reduction(functor,my_shared_team_buffer_element,skip,32);
+    // Wait for every warp to be done before using one warp to do final cross warp reduction
+    __syncthreads();
+
+    if( warp_id == 0) {
+      const unsigned int delta = (threadIdx.y*blockDim.x+threadIdx.x)*32;
+      if(delta<blockDim.x*blockDim.y)
+        *my_shared_team_buffer_element = shared_team_buffer_element[delta];
+      KOKKOS_IMPL_CUDA_SYNCWARP;   
+      scalar_intra_warp_reduction(functor,my_shared_team_buffer_element,false,blockDim.x*blockDim.y/32);
+      if(threadIdx.x + threadIdx.y == 0) *result = *shared_team_buffer_element;
+    }
+  }
+
+  __device__
+  static inline bool scalar_inter_block_reduction(
+      const FunctorType     & functor ,
+      const Cuda::size_type   block_id ,
+      const Cuda::size_type   block_count ,
+      Cuda::size_type * const shared_data ,
+      Cuda::size_type * const global_data ,
+      Cuda::size_type * const global_flags )  {
+    Scalar* const global_team_buffer_element = ((Scalar*) global_data);
+    Scalar* const my_global_team_buffer_element = global_team_buffer_element + blockIdx.x;
+    Scalar* shared_team_buffer_elements = ((Scalar*) shared_data);
+    Scalar value = shared_team_buffer_elements[threadIdx.y];
+    int shared_elements=blockDim.x*blockDim.y/32;
+    int global_elements=block_count;
+    __syncthreads();
+
+    scalar_intra_block_reduction(functor,value,true,my_global_team_buffer_element,shared_elements,shared_team_buffer_elements);
+    __syncthreads();
+
+    unsigned int num_teams_done = 0;
+    if(threadIdx.x + threadIdx.y == 0) {
+      __threadfence();
+      num_teams_done = Kokkos::atomic_fetch_add(global_flags,1)+1;
+    }
+    bool is_last_block = false;
+    if(__syncthreads_or(num_teams_done == gridDim.x)) {
+      is_last_block=true;
+      *global_flags = 0;
+      ValueInit::init( functor, &value);
+      for(int i=threadIdx.y*blockDim.x+threadIdx.x; i<global_elements; i+=blockDim.x*blockDim.y) {
+        ValueJoin::join( functor , &value,&global_team_buffer_element[i]);
+      }
+      scalar_intra_block_reduction(functor,value,false,shared_team_buffer_elements+(blockDim.y-1),shared_elements,shared_team_buffer_elements);
+    }
+    return is_last_block;
+  }
+};
 //----------------------------------------------------------------------------
 // See section B.17 of Cuda C Programming Guide Version 3.2
 // for discussion of
@@ -639,14 +891,15 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
  *
  *  Global reduce result is in the last threads' 'shared_data' location.
  */
+
 template< bool DoScan , class FunctorType , class ArgTag >
 __device__
-bool cuda_single_inter_block_reduce_scan( const FunctorType     & functor ,
-                                          const Cuda::size_type   block_id ,
-                                          const Cuda::size_type   block_count ,
-                                          Cuda::size_type * const shared_data ,
-                                          Cuda::size_type * const global_data ,
-                                          Cuda::size_type * const global_flags )
+bool cuda_single_inter_block_reduce_scan2( const FunctorType     & functor ,
+                                    const Cuda::size_type   block_id ,
+                                    const Cuda::size_type   block_count ,
+                                    Cuda::size_type * const shared_data ,
+                                    Cuda::size_type * const global_data ,
+                                    Cuda::size_type * const global_flags )
 {
   typedef Cuda::size_type                  size_type ;
   typedef FunctorValueTraits< FunctorType , ArgTag >  ValueTraits ;
@@ -655,7 +908,6 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType     & functor ,
   typedef FunctorValueOps<    FunctorType , ArgTag >  ValueOps ;
 
   typedef typename ValueTraits::pointer_type    pointer_type ;
-  //typedef typename ValueTraits::reference_type  reference_type ;
 
   // '__ffs' = position of the least significant bit set to 1.
   // 'blockDim.y' is guaranteed to be a power of two so this
@@ -678,12 +930,7 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType     & functor ,
     size_type * const shared = shared_data + word_count.value * BlockSizeMask ;
     size_type * const global = global_data + word_count.value * block_id ;
 
-//#if (__CUDA_ARCH__ < 500)
     for ( int i = int(threadIdx.y) ; i < int(word_count.value) ; i += int(blockDim.y) ) { global[i] = shared[i] ; }
-//#else
-//    for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; }
-//#endif
-
   }
 
   // Contributing blocks note that their contribution has been completed via an atomic-increment flag
@@ -725,6 +972,22 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType     & functor ,
   return is_last_block ;
 }
 
+template< bool DoScan , class FunctorType , class ArgTag >
+__device__
+bool cuda_single_inter_block_reduce_scan( const FunctorType     & functor ,
+                                          const Cuda::size_type   block_id ,
+                                          const Cuda::size_type   block_count ,
+                                          Cuda::size_type * const shared_data ,
+                                          Cuda::size_type * const global_data ,
+                                          Cuda::size_type * const global_flags )
+{
+  typedef FunctorValueTraits< FunctorType , ArgTag >  ValueTraits ;
+  if(!DoScan && ValueTraits::StaticValueSize)
+    return Kokkos::Impl::CudaReductionsFunctor<FunctorType,ArgTag,false,(ValueTraits::StaticValueSize>16)>::scalar_inter_block_reduction(functor,block_id,block_count,shared_data,global_data,global_flags);
+  else
+    return cuda_single_inter_block_reduce_scan2<DoScan, FunctorType, ArgTag>(functor, block_id, block_count, shared_data, global_data, global_flags);
+}
+
 // Size in bytes required for inter block reduce or scan
 template< bool DoScan , class FunctorType , class ArgTag >
 inline
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
index 73ec409b2..9eb32f07c 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
@@ -160,7 +160,7 @@ public:
 
   template<class ValueType>
   KOKKOS_INLINE_FUNCTION
-  void team_broadcast( ValueType & val, const int& thread_id) const
+  void team_broadcast( ValueType & val, const int& thread_id ) const
     {
       #ifdef __CUDA_ARCH__
       if ( 1 == blockDim.z ) { // team == block
@@ -178,6 +178,29 @@ public:
       }
       #endif
     }
+	
+  template<class Closure, class ValueType>
+  KOKKOS_INLINE_FUNCTION
+  void team_broadcast( Closure const & f, ValueType & val, const int& thread_id ) const
+    {
+      #ifdef __CUDA_ARCH__
+      f( val );
+
+      if ( 1 == blockDim.z ) { // team == block
+        __syncthreads();
+        // Wait for shared data write until all threads arrive here
+        if ( threadIdx.x == 0u && threadIdx.y == (uint32_t)thread_id ) {
+          *((ValueType*) m_team_reduce) = val ;
+        }
+        __syncthreads(); // Wait for shared data read until root thread writes
+        val = *((ValueType*) m_team_reduce);
+      }
+      else { // team <= warp
+        ValueType tmp( val ); // input might not be a register variable
+        cuda_shfl( val, tmp, blockDim.x * thread_id, blockDim.x * blockDim.y );
+      }
+      #endif
+    }
 
   //--------------------------------------------------------------------------
   /**\brief  Reduction across a team
@@ -200,92 +223,7 @@ public:
   team_reduce( ReducerType const & reducer ) const noexcept
     {
       #ifdef __CUDA_ARCH__
-
-      typedef typename ReducerType::value_type value_type ;
-
-      value_type tmp( reducer.reference() );
-
-      // reduce within the warp using shuffle
-
-      const int wx =
-        ( threadIdx.x + blockDim.x * threadIdx.y ) & CudaTraits::WarpIndexMask ;
-
-      for ( int i = CudaTraits::WarpSize ; (int)blockDim.x <= ( i >>= 1 ) ; ) {
-
-        cuda_shfl_down( reducer.reference() , tmp , i , CudaTraits::WarpSize );
-
-        // Root of each vector lane reduces:
-        if ( 0 == threadIdx.x && wx < i ) {
-          reducer.join( tmp , reducer.reference() );
-        }
-      }
-
-      if ( 1 < blockDim.z ) { // team <= warp
-        // broadcast result from root vector lange of root thread
-
-        cuda_shfl( reducer.reference() , tmp
-                 , blockDim.x * threadIdx.y , CudaTraits::WarpSize );
-
-      }
-      else { // team == block
-        // Reduce across warps using shared memory
-        // Broadcast result within block
-
-        // Number of warps, blockDim.y may not be power of two:
-        const int nw  = ( blockDim.x * blockDim.y + CudaTraits::WarpIndexMask ) >> CudaTraits::WarpIndexShift ;
-
-        // Warp index:
-        const int wy = ( blockDim.x * threadIdx.y ) >> CudaTraits::WarpIndexShift ;
-
-        // Number of shared memory entries for the reduction:
-        int nsh = m_team_reduce_size / sizeof(value_type);
-
-        // Using at most one entry per warp:
-        if ( nw < nsh ) nsh = nw ;
-
-        __syncthreads(); // Wait before shared data write
-
-        if ( 0 == wx && wy < nsh ) {
-          ((value_type*) m_team_reduce)[wy] = tmp ;
-        }
-
-        // When more warps than shared entries:
-        for ( int i = nsh ; i < nw ; i += nsh ) {
-
-          __syncthreads();
-
-          if ( 0 == wx && i <= wy ) {
-            const int k = wy - i ;
-            if ( k < nsh ) {
-              reducer.join( *((value_type*) m_team_reduce + k) , tmp );
-            }
-          }
-        }
-
-        __syncthreads();
-
-        // One warp performs the inter-warp reduction:
-
-        if ( 0 == wy ) {
-
-          // Start at power of two covering nsh
-
-          for ( int i = 1 << ( 32 - __clz(nsh-1) ) ; ( i >>= 1 ) ; ) {
-            const int k = wx + i ;
-            if ( wx < i && k < nsh ) {
-              reducer.join( ((value_type*)m_team_reduce)[wx]
-                          , ((value_type*)m_team_reduce)[k] );
-              __threadfence_block();
-            }
-          }
-        }
-
-        __syncthreads(); // Wait for reduction
-
-        // Broadcast result to all threads
-        reducer.reference() = *((value_type*)m_team_reduce);
-      }
-
+      cuda_intra_block_reduction(reducer,blockDim.y);
       #endif /* #ifdef __CUDA_ARCH__ */
     }
 
@@ -801,7 +739,11 @@ void parallel_for
       ; i += blockDim.x ) {
     closure(i);
   }
+  #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
   KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x);
+  #else
+  KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+  #endif
 #endif
 }
 
@@ -970,7 +912,11 @@ KOKKOS_INLINE_FUNCTION
 void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) {
 #ifdef __CUDA_ARCH__
   if(threadIdx.x == 0) lambda();
+  #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
   KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x);
+  #else
+  KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+  #endif
 #endif
 }
 
@@ -979,7 +925,11 @@ KOKKOS_INLINE_FUNCTION
 void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) {
 #ifdef __CUDA_ARCH__
   if(threadIdx.x == 0 && threadIdx.y == 0) lambda();
+  #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
   KOKKOS_IMPL_CUDA_SYNCWARP_MASK(blockDim.x==32?0xffffffff:((1<<blockDim.x)-1)<<(threadIdx.y%(32/blockDim.x))*blockDim.x);
+  #else
+  KOKKOS_IMPL_CUDA_SYNCWARP_MASK;
+  #endif
 #endif
 }
 
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp
index 9a9448f67..25951b81b 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp
@@ -2,9 +2,11 @@
 
 #if defined( __CUDA_ARCH__ )
 #if ( CUDA_VERSION < 9000 )
+#define KOKKOS_IMPL_CUDA_ACTIVEMASK 0
 #define KOKKOS_IMPL_CUDA_SYNCWARP __threadfence_block()
-#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(x) __threadfence_block()
+#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK __threadfence_block()
 #define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot(x)
+#define KOKKOS_IMPL_CUDA_BALLOT_MASK(x) __ballot(x)
 #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl(x,y,z)
 #define KOKKOS_IMPL_CUDA_SHFL_MASK(m,x,y,z) __shfl(x,y,z)
 #define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up(x,y,z)
@@ -12,9 +14,11 @@
 #define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) __shfl_down(x,y,z)
 #define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m,x,y,z) __shfl_down(x,y,z)
 #else
+#define KOKKOS_IMPL_CUDA_ACTIVEMASK __activemask()
 #define KOKKOS_IMPL_CUDA_SYNCWARP __syncwarp(0xffffffff)
-#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) __syncwarp(m)
+#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) __syncwarp(m);
 #define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot_sync(__activemask(),x)
+#define KOKKOS_IMPL_CUDA_BALLOT_MASK(m,x) __ballot_sync(m,x)
 #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl_sync(0xffffffff,x,y,z)
 #define KOKKOS_IMPL_CUDA_SHFL_MASK(m,x,y,z) __shfl_sync(m,x,y,z)
 #define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up_sync(0xffffffff,x,y,z)
@@ -23,11 +27,16 @@
 #define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m,x,y,z) __shfl_down_sync(m,x,y,z)
 #endif 
 #else
+#define KOKKOS_IMPL_CUDA_ACTIVEMASK 0
 #define KOKKOS_IMPL_CUDA_SYNCWARP 
+#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK
 #define KOKKOS_IMPL_CUDA_BALLOT(x) 0
+#define KOKKOS_IMPL_CUDA_BALLOT_MASK(x) 0
 #define KOKKOS_IMPL_CUDA_SHFL(x,y,z) 0
+#define KOKKOS_IMPL_CUDA_SHFL_MASK(m,x,y,z) 0
 #define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) 0
 #define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) 0
+#define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m,x,y,z) 0
 #endif 
 
 #if ( CUDA_VERSION >= 9000 ) && (!defined(KOKKOS_COMPILER_CLANG))
diff --git a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
index 49b11f3ae..af2aff8b3 100644
--- a/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
+++ b/packages/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
@@ -279,6 +279,8 @@ public:
   KOKKOS_INLINE_FUNCTION
   static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker )
     {
+      if(arg_data_ptr == NULL) return handle_type();
+
 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
       // Assignment of texture = non-texture requires creation of a texture object
       // which can only occur on the host.  In addition, 'get_record' is only valid
diff --git a/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
index 976bdf05d..fb0d6cde8 100644
--- a/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
+++ b/packages/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
@@ -46,6 +46,8 @@
 
 #include <initializer_list>
 
+#include <Kokkos_Layout.hpp>
+
 #include<impl/KokkosExp_Host_IterateTile.hpp>
 #include <Kokkos_ExecPolicy.hpp>
 #include <Kokkos_Parallel.hpp>
@@ -63,13 +65,15 @@
 namespace Kokkos {
 
 // ------------------------------------------------------------------ //
-
+// Moved to Kokkos_Layout.hpp for more general accessibility
+/*
 enum class Iterate
 {
   Default, // Default for the device
   Left,    // Left indices stride fastest
   Right,   // Right indices stride fastest
 };
+*/
 
 template <typename ExecSpace>
 struct default_outer_direction
diff --git a/packages/kokkos/core/src/Kokkos_Array.hpp b/packages/kokkos/core/src/Kokkos_Array.hpp
index c602b0353..8e5862fe9 100644
--- a/packages/kokkos/core/src/Kokkos_Array.hpp
+++ b/packages/kokkos/core/src/Kokkos_Array.hpp
@@ -45,11 +45,13 @@
 #define KOKKOS_ARRAY_HPP
 
 #include <Kokkos_Macros.hpp>
+#include <impl/Kokkos_Error.hpp>
 
 #include <type_traits>
 #include <algorithm>
 #include <limits>
 #include <cstddef>
+#include <string>
 
 namespace Kokkos {
 
@@ -132,6 +134,7 @@ public:
 
   KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return N ; }
   KOKKOS_INLINE_FUNCTION static constexpr bool      empty(){ return false ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return N ; }
 
   template< typename iType >
   KOKKOS_INLINE_FUNCTION
@@ -160,7 +163,7 @@ public:
       return & m_internal_implementation_private_member_data[0];
     }
 
-  #ifdef KOKKOS_ROCM_CLANG_WORKAROUND
+  #ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
   // Do not default unless move and move-assignment are also defined
   KOKKOS_INLINE_FUNCTION
   ~Array() = default ;
@@ -197,6 +200,7 @@ public:
 
   KOKKOS_INLINE_FUNCTION static constexpr size_type size()  { return 0 ; }
   KOKKOS_INLINE_FUNCTION static constexpr bool      empty() { return true ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return 0 ; }
 
   template< typename iType >
   KOKKOS_INLINE_FUNCTION
@@ -261,6 +265,7 @@ public:
 
   KOKKOS_INLINE_FUNCTION constexpr size_type size()  const { return m_size ; }
   KOKKOS_INLINE_FUNCTION constexpr bool      empty() const { return 0 != m_size ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return m_size ; }
 
   template< typename iType >
   KOKKOS_INLINE_FUNCTION
@@ -336,6 +341,7 @@ public:
 
   KOKKOS_INLINE_FUNCTION constexpr size_type size()  const { return m_size ; }
   KOKKOS_INLINE_FUNCTION constexpr bool      empty() const { return 0 != m_size ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type max_size() const { return m_size ; }
 
   template< typename iType >
   KOKKOS_INLINE_FUNCTION
diff --git a/packages/kokkos/core/src/Kokkos_Concepts.hpp b/packages/kokkos/core/src/Kokkos_Concepts.hpp
index 2e2507b27..117469b0a 100644
--- a/packages/kokkos/core/src/Kokkos_Concepts.hpp
+++ b/packages/kokkos/core/src/Kokkos_Concepts.hpp
@@ -105,7 +105,10 @@ namespace Kokkos {
   template< typename T > struct is_ ## CONCEPT { \
   private: \
     template< typename , typename = std::true_type > struct have : std::false_type {}; \
-    template< typename U > struct have<U,typename std::is_same<U,typename U:: CONCEPT >::type> : std::true_type {}; \
+    template< typename U > struct have<U,typename std::is_same< \
+     typename std::remove_cv<U>::type, \
+     typename std::remove_cv<typename U:: CONCEPT>::type \
+   >::type> : std::true_type {}; \
   public: \
     enum { value = is_ ## CONCEPT::template have<T>::value }; \
   };
diff --git a/packages/kokkos/core/src/Kokkos_CopyViews.hpp b/packages/kokkos/core/src/Kokkos_CopyViews.hpp
index 31281bc31..86547420e 100644
--- a/packages/kokkos/core/src/Kokkos_CopyViews.hpp
+++ b/packages/kokkos/core/src/Kokkos_CopyViews.hpp
@@ -453,8 +453,9 @@ template<class ViewTypeA,class ViewTypeB, class Layout, class ExecSpace,typename
 struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,2,iType,KOKKOS_IMPL_COMPILING_LIBRARY> {
   ViewTypeA a;
   ViewTypeB b;
-
-  typedef Kokkos::Rank<2,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type;
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
+  typedef Kokkos::Rank<2,outer_iteration_pattern,inner_iteration_pattern> iterate_type;
   typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type;
 
   ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) {
@@ -475,7 +476,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,3,iType,KOKKOS_IMPL_COMPILI
   ViewTypeA a;
   ViewTypeB b;
 
-  typedef Kokkos::Rank<3,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type;
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
+  typedef Kokkos::Rank<3,outer_iteration_pattern,inner_iteration_pattern> iterate_type;
   typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type;
 
   ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) {
@@ -496,7 +499,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,4,iType,KOKKOS_IMPL_COMPILI
   ViewTypeA a;
   ViewTypeB b;
 
-  typedef Kokkos::Rank<4,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type;
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
+  typedef Kokkos::Rank<4,outer_iteration_pattern,inner_iteration_pattern> iterate_type;
   typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type;
 
   ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) {
@@ -519,7 +524,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,5,iType,KOKKOS_IMPL_COMPILI
   ViewTypeA a;
   ViewTypeB b;
 
-  typedef Kokkos::Rank<5,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type;
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
+  typedef Kokkos::Rank<5,outer_iteration_pattern,inner_iteration_pattern> iterate_type;
   typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type;
 
   ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) {
@@ -542,7 +549,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,6,iType,KOKKOS_IMPL_COMPILI
   ViewTypeA a;
   ViewTypeB b;
 
-  typedef Kokkos::Rank<6,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type;
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
+  typedef Kokkos::Rank<6,outer_iteration_pattern,inner_iteration_pattern> iterate_type;
   typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type;
 
   ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) {
@@ -566,7 +575,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,7,iType,KOKKOS_IMPL_COMPILI
   ViewTypeA a;
   ViewTypeB b;
 
-  typedef Kokkos::Rank<6,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type;
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
+  typedef Kokkos::Rank<6,outer_iteration_pattern,inner_iteration_pattern> iterate_type;
   typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type;
 
   ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) {
@@ -590,7 +601,9 @@ struct ViewCopy<ViewTypeA,ViewTypeB,Layout,ExecSpace,8,iType,KOKKOS_IMPL_COMPILI
   ViewTypeA a;
   ViewTypeB b;
 
-  typedef Kokkos::Rank<6,ViewFillLayoutSelector<Layout>::iterate,ViewFillLayoutSelector<Layout>::iterate> iterate_type;
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
+  typedef Kokkos::Rank<6,outer_iteration_pattern,inner_iteration_pattern> iterate_type;
   typedef Kokkos::MDRangePolicy<ExecSpace,iterate_type,Kokkos::IndexType<iType>> policy_type;
 
   ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) {
@@ -642,7 +655,9 @@ void view_copy(const DstType& dst, const SrcType& src) {
   int64_t strides[DstType::Rank+1];
   dst.stride(strides);
   Kokkos::Iterate iterate;
-  if        ( std::is_same<typename DstType::array_layout,Kokkos::LayoutRight>::value ) {
+  if        ( Kokkos::is_layouttiled<typename DstType::array_layout>::value ) {
+    iterate = Kokkos::layout_iterate_type_selector<typename DstType::array_layout>::outer_iteration_pattern;
+  } else if        ( std::is_same<typename DstType::array_layout,Kokkos::LayoutRight>::value ) {
     iterate = Kokkos::Iterate::Right;
   } else if ( std::is_same<typename DstType::array_layout,Kokkos::LayoutLeft>::value ) {
     iterate = Kokkos::Iterate::Left;
@@ -1243,9 +1258,9 @@ void deep_copy
      ViewTypeFlat;
 
     ViewTypeFlat dst_flat(dst.data(),dst.size());
-    if(dst.span() < std::numeric_limits<int>::max())
+    if(dst.span() < std::numeric_limits<int>::max()) {
       Kokkos::Impl::ViewFill< ViewTypeFlat , Kokkos::LayoutRight, typename ViewType::execution_space, ViewTypeFlat::Rank, int >( dst_flat , value );
-    else
+    } else
       Kokkos::Impl::ViewFill< ViewTypeFlat , Kokkos::LayoutRight, typename ViewType::execution_space, ViewTypeFlat::Rank, int64_t >( dst_flat , value );
     Kokkos::fence();
     return;
@@ -1397,7 +1412,6 @@ void deep_copy
   enum { SrcExecCanAccessDst =
    Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible };
 
-
   // Checking for Overlapping Views.
   dst_value_type* dst_start = dst.data();
   dst_value_type* dst_end   = dst.data() + dst.span();
@@ -1493,7 +1507,7 @@ void deep_copy
     Kokkos::fence();
   } else {
     Kokkos::fence();
-    Impl::view_copy(typename dst_type::uniform_runtime_nomemspace_type(dst),typename src_type::uniform_runtime_const_nomemspace_type(src));
+    Impl::view_copy(dst, src);
     Kokkos::fence();
   }
 }
@@ -1739,8 +1753,7 @@ void deep_copy
     exec_space.fence();
   } else {
     exec_space.fence();
-    Impl::view_copy(typename dst_type::uniform_runtime_nomemspace_type(dst),
-                    typename src_type::uniform_runtime_const_nomemspace_type(src));
+    Impl::view_copy(dst, src);
     exec_space.fence();
   }
 }
@@ -1917,4 +1930,213 @@ void realloc(      Kokkos::View<T,P...> & v ,
 }
 } /* namespace Kokkos */
 
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
+// Deduce Mirror Types
+template<class Space, class T, class ... P>
+struct MirrorViewType {
+  // The incoming view_type
+  typedef typename Kokkos::View<T,P...> src_view_type;
+  // The memory space for the mirror view
+  typedef typename Space::memory_space memory_space;
+  // Check whether it is the same memory space
+  enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value };
+  // The array_layout
+  typedef typename src_view_type::array_layout array_layout;
+  // The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
+  typedef typename src_view_type::non_const_data_type data_type;
+  // The destination view type if it is not the same memory space
+  typedef Kokkos::View<data_type,array_layout,Space> dest_view_type;
+  // If it is the same memory_space return the existsing view_type
+  // This will also keep the unmanaged trait if necessary
+  typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type;
+};
+
+template<class Space, class T, class ... P>
+struct MirrorType {
+  // The incoming view_type
+  typedef typename Kokkos::View<T,P...> src_view_type;
+  // The memory space for the mirror view
+  typedef typename Space::memory_space memory_space;
+  // Check whether it is the same memory space
+  enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value };
+  // The array_layout
+  typedef typename src_view_type::array_layout array_layout;
+  // The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
+  typedef typename src_view_type::non_const_data_type data_type;
+  // The destination view type if it is not the same memory space
+  typedef Kokkos::View<data_type,array_layout,Space> view_type;
+};
+
+}
+
+template< class T , class ... P >
+inline
+typename Kokkos::View<T,P...>::HostMirror
+create_mirror( const Kokkos::View<T,P...> & src
+             , typename std::enable_if<
+                 std::is_same< typename ViewTraits<T,P...>::specialize , void >::value &&
+                 ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
+                               , Kokkos::LayoutStride >::value
+               >::type * = 0
+             )
+{
+  typedef View<T,P...>                   src_type ;
+  typedef typename src_type::HostMirror  dst_type ;
+
+  return dst_type( std::string( src.label() ).append("_mirror")
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+                   , src.extent(0)
+                   , src.extent(1)
+                   , src.extent(2)
+                   , src.extent(3)
+                   , src.extent(4)
+                   , src.extent(5)
+                   , src.extent(6)
+                   , src.extent(7) );
+#else
+                 , src.rank_dynamic > 0 ? src.extent(0): KOKKOS_IMPL_CTOR_DEFAULT_ARG
+                 , src.rank_dynamic > 1 ? src.extent(1): KOKKOS_IMPL_CTOR_DEFAULT_ARG
+                 , src.rank_dynamic > 2 ? src.extent(2): KOKKOS_IMPL_CTOR_DEFAULT_ARG
+                 , src.rank_dynamic > 3 ? src.extent(3): KOKKOS_IMPL_CTOR_DEFAULT_ARG
+                 , src.rank_dynamic > 4 ? src.extent(4): KOKKOS_IMPL_CTOR_DEFAULT_ARG
+                 , src.rank_dynamic > 5 ? src.extent(5): KOKKOS_IMPL_CTOR_DEFAULT_ARG
+                 , src.rank_dynamic > 6 ? src.extent(6): KOKKOS_IMPL_CTOR_DEFAULT_ARG
+                 , src.rank_dynamic > 7 ? src.extent(7): KOKKOS_IMPL_CTOR_DEFAULT_ARG );
+#endif
+}
+
+template< class T , class ... P >
+inline
+typename Kokkos::View<T,P...>::HostMirror
+create_mirror( const Kokkos::View<T,P...> & src
+             , typename std::enable_if<
+                 std::is_same< typename ViewTraits<T,P...>::specialize , void >::value &&
+                 std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
+                             , Kokkos::LayoutStride >::value
+               >::type * = 0
+             )
+{
+  typedef View<T,P...>                   src_type ;
+  typedef typename src_type::HostMirror  dst_type ;
+
+  Kokkos::LayoutStride layout ;
+
+  layout.dimension[0] = src.extent(0);
+  layout.dimension[1] = src.extent(1);
+  layout.dimension[2] = src.extent(2);
+  layout.dimension[3] = src.extent(3);
+  layout.dimension[4] = src.extent(4);
+  layout.dimension[5] = src.extent(5);
+  layout.dimension[6] = src.extent(6);
+  layout.dimension[7] = src.extent(7);
+
+  layout.stride[0] = src.stride_0();
+  layout.stride[1] = src.stride_1();
+  layout.stride[2] = src.stride_2();
+  layout.stride[3] = src.stride_3();
+  layout.stride[4] = src.stride_4();
+  layout.stride[5] = src.stride_5();
+  layout.stride[6] = src.stride_6();
+  layout.stride[7] = src.stride_7();
+
+  return dst_type( std::string( src.label() ).append("_mirror") , layout );
+}
+
+
+// Create a mirror in a new space (specialization for different space)
+template<class Space, class T, class ... P>
+typename Impl::MirrorType<Space,T,P ...>::view_type
+create_mirror(const Space& , const Kokkos::View<T,P...> & src
+             , typename std::enable_if<
+                 std::is_same< typename ViewTraits<T,P...>::specialize , void >::value
+               >::type * = 0) {
+  return typename Impl::MirrorType<Space,T,P ...>::view_type(src.label(),src.layout());
+}
+
+template< class T , class ... P >
+inline
+typename Kokkos::View<T,P...>::HostMirror
+create_mirror_view( const Kokkos::View<T,P...> & src
+                  , typename std::enable_if<(
+                      std::is_same< typename Kokkos::View<T,P...>::memory_space
+                                  , typename Kokkos::View<T,P...>::HostMirror::memory_space
+                                  >::value
+                      &&
+                      std::is_same< typename Kokkos::View<T,P...>::data_type
+                                  , typename Kokkos::View<T,P...>::HostMirror::data_type
+                                  >::value
+                    )>::type * = 0
+                  )
+{
+  return src ;
+}
+
+template< class T , class ... P >
+inline
+typename Kokkos::View<T,P...>::HostMirror
+create_mirror_view( const Kokkos::View<T,P...> & src
+                  , typename std::enable_if< ! (
+                      std::is_same< typename Kokkos::View<T,P...>::memory_space
+                                  , typename Kokkos::View<T,P...>::HostMirror::memory_space
+                                  >::value
+                      &&
+                      std::is_same< typename Kokkos::View<T,P...>::data_type
+                                  , typename Kokkos::View<T,P...>::HostMirror::data_type
+                                  >::value
+                    )>::type * = 0
+                  )
+{
+  return Kokkos::create_mirror( src );
+}
+
+// Create a mirror view in a new space (specialization for same space)
+template<class Space, class T, class ... P>
+typename Impl::MirrorViewType<Space,T,P ...>::view_type
+create_mirror_view(const Space& , const Kokkos::View<T,P...> & src
+  , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+  return src;
+}
+
+// Create a mirror view in a new space (specialization for different space)
+template<class Space, class T, class ... P>
+typename Impl::MirrorViewType<Space,T,P ...>::view_type
+create_mirror_view(const Space& , const Kokkos::View<T,P...> & src
+  , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+  return typename Impl::MirrorViewType<Space,T,P ...>::view_type(src.label(),src.layout());
+}
+
+// Create a mirror view and deep_copy in a new space (specialization for same space)
+template<class Space, class T, class ... P>
+typename Impl::MirrorViewType<Space,T,P ...>::view_type
+create_mirror_view_and_copy(const Space& , const Kokkos::View<T,P...> & src
+  , std::string const& name = ""
+  , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+  (void)name;
+  return src;
+}
+
+// Create a mirror view and deep_copy in a new space (specialization for different space)
+template<class Space, class T, class ... P>
+typename Impl::MirrorViewType<Space,T,P ...>::view_type
+create_mirror_view_and_copy(const Space& , const Kokkos::View<T,P...> & src
+  , std::string const& name = ""
+  , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
+  using Mirror = typename Impl::MirrorViewType<Space,T,P ...>::view_type;
+  std::string label = name.empty() ? src.label() : name;
+  auto mirror = Mirror(ViewAllocateWithoutInitializing(label), src.layout());
+  deep_copy(mirror, src);
+  return mirror;
+}
+
+} /* namespace Kokkos */
+
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
 #endif
diff --git a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp
index a33e28fcd..d4693b43c 100644
--- a/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp
+++ b/packages/kokkos/core/src/Kokkos_ExecPolicy.hpp
@@ -57,6 +57,10 @@
 
 namespace Kokkos {
 
+struct ParallelForTag {};
+struct ParallelScanTag {};
+struct ParallelReduceTag {};
+
 struct ChunkSize {
   int value;
   ChunkSize(int value_):value(value_) {}
@@ -320,6 +324,10 @@ public:
 
   template< class FunctorType >
   static int team_size_recommended( const FunctorType & , const int&);
+
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType & functor , const int vector_length);
+
   //----------------------------------------
   /** \brief  Construct policy with the given instance of the execution space */
   TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 );
diff --git a/packages/kokkos/core/src/Kokkos_Layout.hpp b/packages/kokkos/core/src/Kokkos_Layout.hpp
index b0f92d8cf..43e117783 100644
--- a/packages/kokkos/core/src/Kokkos_Layout.hpp
+++ b/packages/kokkos/core/src/Kokkos_Layout.hpp
@@ -76,6 +76,8 @@ struct LayoutLeft {
 
   size_t dimension[ ARRAY_LAYOUT_MAX_RANK ];
 
+  enum { is_extent_constructible = true };
+
   LayoutLeft( LayoutLeft const & ) = default ;
   LayoutLeft( LayoutLeft && ) = default ;
   LayoutLeft & operator = ( LayoutLeft const & ) = default ;
@@ -108,6 +110,8 @@ struct LayoutRight {
 
   size_t dimension[ ARRAY_LAYOUT_MAX_RANK ];
 
+  enum { is_extent_constructible = true };
+
   LayoutRight( LayoutRight const & ) = default ;
   LayoutRight( LayoutRight && ) = default ;
   LayoutRight & operator = ( LayoutRight const & ) = default ;
@@ -132,6 +136,8 @@ struct LayoutStride {
   size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ;
   size_t stride[ ARRAY_LAYOUT_MAX_RANK ] ;
 
+  enum { is_extent_constructible = false };
+
   LayoutStride( LayoutStride const & ) = default ;
   LayoutStride( LayoutStride && ) = default ;
   LayoutStride & operator = ( LayoutStride const & ) = default ;
@@ -222,6 +228,8 @@ struct LayoutTileLeft {
 
   size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ;
 
+  enum { is_extent_constructible = true };
+
   LayoutTileLeft( LayoutTileLeft const & ) = default ;
   LayoutTileLeft( LayoutTileLeft && ) = default ;
   LayoutTileLeft & operator = ( LayoutTileLeft const & ) = default ;
@@ -235,6 +243,144 @@ struct LayoutTileLeft {
     : dimension { argN0 , argN1 , argN2 , argN3 , argN4 , argN5 , argN6 , argN7 } {}
 };
 
+
+//////////////////////////////////////////////////////////////////////////////////////
+
+enum class Iterate
+{
+  Default,
+  Left,    // Left indices stride fastest
+  Right   // Right indices stride fastest
+};
+
+// To check for LayoutTiled
+// This is to hide extra compile-time 'identifier' info within the LayoutTiled class by not relying on template specialization to include the ArgN*'s
+template < typename LayoutTiledCheck, class Enable = void >
+struct is_layouttiled : std::false_type {};
+
+#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+template < typename LayoutTiledCheck >
+struct is_layouttiled< LayoutTiledCheck, typename std::enable_if<LayoutTiledCheck::is_array_layout_tiled>::type > : std::true_type {};
+
+namespace Experimental {
+
+/// LayoutTiled
+// Must have Rank >= 2
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP,
+           unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 = 0,  unsigned ArgN3 = 0,  unsigned ArgN4 = 0,  unsigned ArgN5 = 0,  unsigned ArgN6 = 0,  unsigned ArgN7 = 0, 
+           bool IsPowerOfTwo = 
+           ( Impl::is_integral_power_of_two(ArgN0) &&
+             Impl::is_integral_power_of_two(ArgN1) &&
+             (Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0) ) &&
+             (Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0) ) &&
+             (Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0) ) &&
+             (Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0) ) &&
+             (Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0) ) &&
+             (Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0) )
+           )
+         >
+struct LayoutTiled {
+
+  static_assert( IsPowerOfTwo
+               , "LayoutTiled must be given power-of-two tile dimensions" );
+
+#if 0
+  static_assert( (Impl::is_integral_power_of_two(ArgN0) ) &&
+                 (Impl::is_integral_power_of_two(ArgN1) ) &&
+                 (Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0) ) &&
+                 (Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0) ) &&
+                 (Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0) ) &&
+                 (Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0) ) &&
+                 (Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0) ) &&
+                 (Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0) )
+               , "LayoutTiled must be given power-of-two tile dimensions" );
+#endif
+
+  typedef LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, IsPowerOfTwo> array_layout ;
+  static constexpr Iterate outer_pattern = OuterP;
+  static constexpr Iterate inner_pattern = InnerP;
+
+  enum { N0 = ArgN0 };
+  enum { N1 = ArgN1 };
+  enum { N2 = ArgN2 };
+  enum { N3 = ArgN3 };
+  enum { N4 = ArgN4 };
+  enum { N5 = ArgN5 };
+  enum { N6 = ArgN6 };
+  enum { N7 = ArgN7 };
+
+  size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ;
+
+  enum { is_extent_constructible = true };
+
+  LayoutTiled( LayoutTiled const & ) = default ;
+  LayoutTiled( LayoutTiled && ) = default ;
+  LayoutTiled & operator = ( LayoutTiled const & ) = default ;
+  LayoutTiled & operator = ( LayoutTiled && ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  explicit constexpr
+  LayoutTiled( size_t argN0 = 0 , size_t argN1 = 0 , size_t argN2 = 0 , size_t argN3 = 0
+                , size_t argN4 = 0 , size_t argN5 = 0 , size_t argN6 = 0 , size_t argN7 = 0
+                )
+    : dimension { argN0 , argN1 , argN2 , argN3 , argN4 , argN5 , argN6 , argN7 } {}
+};
+
+} // namespace Experimental
+#endif
+
+
+// For use with view_copy
+template < typename ... Layout >
+struct layout_iterate_type_selector {
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Default ;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Default ;
+};
+
+template <>
+struct layout_iterate_type_selector< Kokkos::LayoutRight > {
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right ;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right ;
+};
+
+template <>
+struct layout_iterate_type_selector< Kokkos::LayoutLeft > {
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left ;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left ;
+};
+
+template <>
+struct layout_iterate_type_selector< Kokkos::LayoutStride > {
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Default ;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Default ;
+};
+
+#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+template < unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 ,  unsigned ArgN5 ,  unsigned ArgN6 ,  unsigned ArgN7 >
+struct layout_iterate_type_selector< Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > {
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left ;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left ;
+};
+
+template < unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 ,  unsigned ArgN5 ,  unsigned ArgN6 ,  unsigned ArgN7 >
+struct layout_iterate_type_selector< Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > {
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right ;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left ;
+};
+
+template < unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 ,  unsigned ArgN5 ,  unsigned ArgN6 ,  unsigned ArgN7 >
+struct layout_iterate_type_selector< Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > {
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left ;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right ;
+};
+
+template < unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 ,  unsigned ArgN5 ,  unsigned ArgN6 ,  unsigned ArgN7 >
+struct layout_iterate_type_selector< Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > {
+  static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right ;
+  static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right ;
+};
+#endif
+
 } // namespace Kokkos
 
 #endif // #ifndef KOKKOS_LAYOUT_HPP
diff --git a/packages/kokkos/core/src/Kokkos_Macros.hpp b/packages/kokkos/core/src/Kokkos_Macros.hpp
index 0cd19d223..96bd23e22 100644
--- a/packages/kokkos/core/src/Kokkos_Macros.hpp
+++ b/packages/kokkos/core/src/Kokkos_Macros.hpp
@@ -153,7 +153,7 @@
     #else
       #define KOKKOS_LAMBDA [=]__host__ __device__
 
-      #if defined( KOKKOS_ENABLE_CXX1Z )
+      #if defined( KOKKOS_ENABLE_CXX17 ) || defined( KOKKOS_ENABLE_CXX20 )
         #define KOKKOS_CLASS_LAMBDA        [=,*this] __host__ __device__
       #endif
     #endif
@@ -213,7 +213,7 @@
   #define KOKKOS_LAMBDA [=]
 #endif
 
-#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined( KOKKOS_CLASS_LAMBDA )
+#if (defined( KOKKOS_ENABLE_CXX17 ) || defined( KOKKOS_ENABLE_CXX20) )&& !defined( KOKKOS_CLASS_LAMBDA )
   #define KOKKOS_CLASS_LAMBDA [=,*this]
 #endif
 
@@ -521,6 +521,9 @@
 #if defined ( KOKKOS_ENABLE_CUDA )
   #if ( 9000 <= CUDA_VERSION )
   #define KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND
+  #if ( __CUDA_ARCH__ )
+    #define KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+  #endif
   #endif
 #endif
 
diff --git a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
index 7bed7aa3d..190079451 100644
--- a/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
+++ b/packages/kokkos/core/src/Kokkos_Parallel_Reduce.hpp
@@ -793,7 +793,7 @@ struct ParallelReduceReturnValue<typename std::enable_if<
 
   static return_type return_value(ReturnType& return_val,
                                   const FunctorType& functor) {
-#ifdef KOKOOS_ENABLE_DEPRECATED_CODE
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
     return return_type(return_val,functor.value_count);
 #else
     if ( is_array<ReturnType>::value )
@@ -1002,7 +1002,8 @@ void parallel_reduce(const std::string& label,
                      typename Impl::enable_if<
                        Kokkos::Impl::is_execution_policy<PolicyType>::value
                      >::type * = 0) {
-  Impl::ParallelReduceAdaptor<PolicyType,FunctorType,const ReturnType>::execute(label,policy,functor,return_value);
+  ReturnType return_value_impl = return_value;
+  Impl::ParallelReduceAdaptor<PolicyType,FunctorType,ReturnType>::execute(label,policy,functor,return_value_impl);
 }
 
 template< class PolicyType, class FunctorType, class ReturnType >
@@ -1054,6 +1055,9 @@ void parallel_reduce(const std::string& label,
                                      , typename ValueTraits::pointer_type
                                      >::type value_type ;
 
+  static_assert(Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,PolicyType,FunctorType>::
+                 has_final_member_function,"Calling parallel_reduce without either return value or final function.");
+
   typedef Kokkos::View< value_type
               , Kokkos::HostSpace
               , Kokkos::MemoryUnmanaged
@@ -1076,6 +1080,9 @@ void parallel_reduce(const PolicyType& policy,
                                      , typename ValueTraits::pointer_type
                                      >::type value_type ;
 
+  static_assert(Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,PolicyType,FunctorType>::
+                 has_final_member_function,"Calling parallel_reduce without either return value or final function.");
+
   typedef Kokkos::View< value_type
               , Kokkos::HostSpace
               , Kokkos::MemoryUnmanaged
@@ -1096,6 +1103,9 @@ void parallel_reduce(const size_t& policy,
                                      , typename ValueTraits::pointer_type
                                      >::type value_type ;
 
+  static_assert(Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,RangePolicy<>,FunctorType>::
+                 has_final_member_function,"Calling parallel_reduce without either return value or final function.");
+
   typedef Kokkos::View< value_type
               , Kokkos::HostSpace
               , Kokkos::MemoryUnmanaged
@@ -1117,6 +1127,9 @@ void parallel_reduce(const std::string& label,
                                      , typename ValueTraits::pointer_type
                                      >::type value_type ;
 
+  static_assert(Impl::FunctorAnalysis<Impl::FunctorPatternInterface::REDUCE,RangePolicy<>,FunctorType>::
+                 has_final_member_function,"Calling parallel_reduce without either return value or final function.");
+
   typedef Kokkos::View< value_type
               , Kokkos::HostSpace
               , Kokkos::MemoryUnmanaged
diff --git a/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp b/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp
index 4527dd4c1..86d803ccc 100644
--- a/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp
+++ b/packages/kokkos/core/src/Kokkos_ScratchSpace.hpp
@@ -136,6 +136,55 @@ public:
     }
   }
 
+
+  KOKKOS_INLINE_FUNCTION
+  void* get_shmem_aligned (const ptrdiff_t size, const ptrdiff_t alignment, int level = -1) const {
+    if(level == -1)
+      level = m_default_level;
+    if(level == 0) {
+
+      char* previous = m_iter_L0;
+      const ptrdiff_t missalign = size_t(m_iter_L0)%alignment;
+      if(missalign) m_iter_L0 += alignment-missalign;
+
+      void* tmp = m_iter_L0 + m_offset * size;
+      if (m_end_L0 < (m_iter_L0 += size * m_multiplier)) {
+        m_iter_L0 = previous; // put it back like it was
+        #ifdef KOKKOS_DEBUG
+        // mfh 23 Jun 2015: printf call consumes 25 registers
+        // in a CUDA build, so only print in debug mode.  The
+        // function still returns NULL if not enough memory.
+        printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
+                "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
+                long(m_end_L0-m_iter_L0));
+        #endif // KOKKOS_DEBUG
+        tmp = 0;
+      }
+      return tmp;
+    } else {
+
+      char* previous = m_iter_L1;
+      const ptrdiff_t missalign =  size_t(m_iter_L1)%alignment;
+      if(missalign) m_iter_L1 += alignment-missalign;
+
+      void* tmp = m_iter_L1 + m_offset * size;
+      if (m_end_L1 < (m_iter_L1 += size * m_multiplier)) {
+        m_iter_L1 = previous; // put it back like it was
+        #ifdef KOKKOS_DEBUG
+        // mfh 23 Jun 2015: printf call consumes 25 registers
+        // in a CUDA build, so only print in debug mode.  The
+        // function still returns NULL if not enough memory.
+        printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
+                "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
+                long(m_end_L1-m_iter_L1));
+        #endif // KOKKOS_DEBUG
+        tmp = 0;
+      }
+      return tmp;
+
+    }
+  }
+
   template< typename IntType >
   KOKKOS_INLINE_FUNCTION
   ScratchMemorySpace( void * ptr_L0 , const IntType & size_L0 , void * ptr_L1 = NULL , const IntType & size_L1 = 0)
diff --git a/packages/kokkos/core/src/Kokkos_Serial.hpp b/packages/kokkos/core/src/Kokkos_Serial.hpp
index 911aba892..01701e53a 100644
--- a/packages/kokkos/core/src/Kokkos_Serial.hpp
+++ b/packages/kokkos/core/src/Kokkos_Serial.hpp
@@ -262,7 +262,7 @@ public:
   }
 
   //----------------------------------------
-
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   template< class FunctorType >
   static
   int team_size_max( const FunctorType & ) { return 1 ; }
@@ -274,6 +274,16 @@ public:
   template< class FunctorType >
   static
   int team_size_recommended( const FunctorType & , const int& ) { return 1 ; }
+#endif
+
+  template<class FunctorType>
+  int team_size_max( const FunctorType&, const ParallelForTag& ) const { return 1 ; }
+  template<class FunctorType>
+  int team_size_max( const FunctorType&, const ParallelReduceTag& ) const { return 1 ; }
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType&, const ParallelForTag& ) const { return 1 ; }
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType&, const ParallelReduceTag& ) const { return 1 ; }
 
   //----------------------------------------
 
@@ -281,6 +291,16 @@ public:
   inline int league_size() const { return m_league_size ; }
   inline size_t scratch_size(const int& level, int = 0) const { return m_team_scratch_size[level] + m_thread_scratch_size[level]; }
 
+  inline static
+  int vector_length_max()
+    { return 1024; } // Use arbitrary large number, is meant as a vectorizable length
+
+  inline static
+  int scratch_size_max(int level)
+  { return (level==0?
+        1024*32:
+        20*1024*1024);
+  }
   /** \brief  Specify league size, request team size */
   TeamPolicyInternal( execution_space &
             , int league_size_request
diff --git a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp
index c3185853d..5045e9cbb 100644
--- a/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp
+++ b/packages/kokkos/core/src/Kokkos_TaskScheduler.hpp
@@ -624,7 +624,6 @@ public:
   when_all( Future< A1 , A2 > const arg[] , int narg )
     {
       using future_type = Future< execution_space > ;
-      using task_base   = Kokkos::Impl::TaskBase< void , void , void > ;
 
       future_type f ;
 
@@ -692,7 +691,6 @@ public:
     {
       using input_type  = decltype( func(0) );
       using future_type = Future< execution_space > ;
-      using task_base   = Kokkos::Impl::TaskBase< void , void , void > ;
 
       static_assert( is_future< input_type >::value
                    , "Functor must return a Kokkos::Future" );
diff --git a/packages/kokkos/core/src/Kokkos_View.hpp b/packages/kokkos/core/src/Kokkos_View.hpp
index 70301884c..da49aff22 100644
--- a/packages/kokkos/core/src/Kokkos_View.hpp
+++ b/packages/kokkos/core/src/Kokkos_View.hpp
@@ -707,10 +707,17 @@ public:
   //----------------------------------------
   // Allow specializations to query their specialized map
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   KOKKOS_INLINE_FUNCTION
   const Kokkos::Impl::ViewMapping< traits , void > &
   implementation_map() const { return m_map ; }
-
+#endif
+  KOKKOS_INLINE_FUNCTION
+  const Kokkos::Impl::ViewMapping< traits , void > &
+  impl_map() const { return m_map ; }
+  KOKKOS_INLINE_FUNCTION
+  const Kokkos::Impl::SharedAllocationTracker &
+  impl_track() const { return m_track ; }
   //----------------------------------------
 
 private:
@@ -752,423 +759,421 @@ private:
 #endif
 
 public:
-
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
-  template< class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<( Kokkos::Impl::are_integral<Args...>::value
-                            && ( 0 == Rank )
-                          ), reference_type >::type
-  operator()( Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,args...) )
-      return m_map.reference();
-    }
-
-  template< typename I0
-             , class ... Args>
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,Args...>::value
-      && ( 1 == Rank )
-      && ! is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0,
-              Args ... args) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) )
-      return m_map.reference(i0);
-    }
-
-  template< typename I0
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,Args...>::value
-      && ( 1 == Rank )
-      && is_default_map
-      && ! is_layout_stride
-    ), reference_type >::type
-  operator()( const I0 & i0
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) )
-      return m_map.m_handle[ i0 ];
-    }
+   template< class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<( Kokkos::Impl::are_integral<Args...>::value
+                             && ( 0 == Rank )
+                           ), reference_type >::type
+   operator()( Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,args...) )
+       return m_map.reference();
+     }
 
-  template< typename I0
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,Args...>::value
-      && ( 1 == Rank )
-      && is_default_map
-      && is_layout_stride
-    ), reference_type >::type
-  operator()( const I0 & i0
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) )
-      return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
-    }
+   template< typename I0
+              , class ... Args>
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,Args...>::value
+       && ( 1 == Rank )
+       && ! is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0,
+               Args ... args) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) )
+       return m_map.reference(i0);
+     }
 
-  //------------------------------
-    // Rank 1 operator[]
+   template< typename I0
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,Args...>::value
+       && ( 1 == Rank )
+       && is_default_map
+       && ! is_layout_stride
+     ), reference_type >::type
+   operator()( const I0 & i0
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) )
+       return m_map.m_impl_handle[ i0 ];
+     }
 
-    template< typename I0 >
-    KOKKOS_FORCEINLINE_FUNCTION
-    typename std::enable_if<
-      ( Kokkos::Impl::are_integral<I0>::value
-        && ( 1 == Rank )
-        && ! is_default_map
-      ), reference_type >::type
-    operator[]( const I0 & i0 ) const
-      {
-        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
-        return m_map.reference(i0);
-      }
+   template< typename I0
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,Args...>::value
+       && ( 1 == Rank )
+       && is_default_map
+       && is_layout_stride
+     ), reference_type >::type
+   operator()( const I0 & i0
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) )
+       return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ];
+     }
 
-    template< typename I0 >
-    KOKKOS_FORCEINLINE_FUNCTION
-    typename std::enable_if<
-      ( Kokkos::Impl::are_integral<I0>::value
-        && ( 1 == Rank )
-        && is_default_map
-        && ! is_layout_stride
-      ), reference_type >::type
-    operator[]( const I0 & i0 ) const
-      {
-        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
-        return m_map.m_handle[ i0 ];
-      }
+   //------------------------------
+     // Rank 1 operator[]
+
+     template< typename I0 >
+     KOKKOS_FORCEINLINE_FUNCTION
+     typename std::enable_if<
+       ( Kokkos::Impl::are_integral<I0>::value
+         && ( 1 == Rank )
+         && ! is_default_map
+       ), reference_type >::type
+     operator[]( const I0 & i0 ) const
+       {
+         KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
+         return m_map.reference(i0);
+       }
 
     template< typename I0 >
-    KOKKOS_FORCEINLINE_FUNCTION
-    typename std::enable_if<
-      ( Kokkos::Impl::are_integral<I0>::value
-        && ( 1 == Rank )
-        && is_default_map
-        && is_layout_stride
-      ), reference_type >::type
-    operator[]( const I0 & i0 ) const
-      {
-        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
-        return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
-      }
-
-
-  template< typename I0 , typename I1
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
-      && ( 2 == Rank )
-      && ! is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-      return m_map.reference(i0,i1);
-    }
+     KOKKOS_FORCEINLINE_FUNCTION
+     typename std::enable_if<
+       ( Kokkos::Impl::are_integral<I0>::value
+         && ( 1 == Rank )
+         && is_default_map
+         && ! is_layout_stride
+       ), reference_type >::type
+     operator[]( const I0 & i0 ) const
+       {
+         KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
+         return m_map.m_impl_handle[ i0 ];
+       }
+
+     template< typename I0 >
+     KOKKOS_FORCEINLINE_FUNCTION
+     typename std::enable_if<
+       ( Kokkos::Impl::are_integral<I0>::value
+         && ( 1 == Rank )
+         && is_default_map
+         && is_layout_stride
+       ), reference_type >::type
+     operator[]( const I0 & i0 ) const
+       {
+         KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
+         return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ];
+       }
 
-  template< typename I0 , typename I1
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
-      && ( 2 == Rank )
-      && is_default_map
-      && is_layout_left && ( traits::rank_dynamic == 0 )
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-      return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
-    }
-
-  template< typename I0 , typename I1
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
-      && ( 2 == Rank )
-      && is_default_map
-      && is_layout_left && ( traits::rank_dynamic != 0 )
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-      return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
-    }
+   template< typename I0 , typename I1
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
+       && ( 2 == Rank )
+       && ! is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
+       return m_map.reference(i0,i1);
+     }
 
-  template< typename I0 , typename I1
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
-      && ( 2 == Rank )
-      && is_default_map
-      && is_layout_right && ( traits::rank_dynamic == 0 )
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-      return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
-    }
+   template< typename I0 , typename I1
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
+       && ( 2 == Rank )
+       && is_default_map
+       && is_layout_left && ( traits::rank_dynamic == 0 )
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
+       return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_dim.N0 * i1 ];
+     }
 
-  template< typename I0 , typename I1
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
-      && ( 2 == Rank )
-      && is_default_map
-      && is_layout_right && ( traits::rank_dynamic != 0 )
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-      return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
-    }
+   template< typename I0 , typename I1
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
+       && ( 2 == Rank )
+       && is_default_map
+       && is_layout_left && ( traits::rank_dynamic != 0 )
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
+       return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_stride * i1 ];
+     }
 
-  template< typename I0 , typename I1
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
-      && ( 2 == Rank )
-      && is_default_map
-      && is_layout_stride
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-      return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
-                             i1 * m_map.m_offset.m_stride.S1 ];
-    }
+   template< typename I0 , typename I1
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
+       && ( 2 == Rank )
+       && is_default_map
+       && is_layout_right && ( traits::rank_dynamic == 0 )
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
+       return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_dim.N1 * i0 ];
+     }
 
-  //------------------------------
-  // Rank 3
+   template< typename I0 , typename I1
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
+       && ( 2 == Rank )
+       && is_default_map
+       && is_layout_right && ( traits::rank_dynamic != 0 )
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
+       return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_stride * i0 ];
+     }
 
-  template< typename I0 , typename I1 , typename I2
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value
-      && ( 3 == Rank )
-      && is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
-    }
+   template< typename I0 , typename I1
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,Args...>::value
+       && ( 2 == Rank )
+       && is_default_map
+       && is_layout_stride
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
+       return m_map.m_impl_handle[ i0 * m_map.m_impl_offset.m_stride.S0 +
+                              i1 * m_map.m_impl_offset.m_stride.S1 ];
+     }
 
-  template< typename I0 , typename I1 , typename I2
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value
-      && ( 3 == Rank )
-      && ! is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) )
-      return m_map.reference(i0,i1,i2);
-    }
+   //------------------------------
+   // Rank 3
 
-  //------------------------------
-  // Rank 4
+   template< typename I0 , typename I1 , typename I2
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value
+       && ( 3 == Rank )
+       && is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) )
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2) ];
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value
-      && ( 4 == Rank )
-      && is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
-    }
+   template< typename I0 , typename I1 , typename I2
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,Args...>::value
+       && ( 3 == Rank )
+       && ! is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) )
+       return m_map.reference(i0,i1,i2);
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value
-      && ( 4 == Rank )
-      && ! is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) )
-      return m_map.reference(i0,i1,i2,i3);
-    }
+   //------------------------------
+   // Rank 4
 
-  //------------------------------
-  // Rank 5
+ template< typename I0 , typename I1 , typename I2 , typename I3
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value
+       && ( 4 == Rank )
+       && is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) )
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3) ];
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value
-      && ( 5 == Rank )
-      && is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , const I4 & i4
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
-    }
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,Args...>::value
+       && ( 4 == Rank )
+       && ! is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) )
+       return m_map.reference(i0,i1,i2,i3);
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value
-      && ( 5 == Rank )
-      && ! is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , const I4 & i4
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) )
-      return m_map.reference(i0,i1,i2,i3,i4);
-    }
+   //------------------------------
+   // Rank 5
 
-  //------------------------------
-  // Rank 6
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , typename I4
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value
+       && ( 5 == Rank )
+       && is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , const I4 & i4
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) )
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4) ];
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value
-      && ( 6 == Rank )
-      && is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , const I4 & i4 , const I5 & i5
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
-    }
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , typename I4
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,Args...>::value
+       && ( 5 == Rank )
+       && ! is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , const I4 & i4
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) )
+       return m_map.reference(i0,i1,i2,i3,i4);
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value
-      && ( 6 == Rank )
-      && ! is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , const I4 & i4 , const I5 & i5
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) )
-      return m_map.reference(i0,i1,i2,i3,i4,i5);
-    }
+   //------------------------------
+   // Rank 6
 
-  //------------------------------
-  // Rank 7
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , typename I4 , typename I5
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value
+       && ( 6 == Rank )
+       && is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , const I4 & i4 , const I5 & i5
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) )
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5) ];
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5 , typename I6
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value
-      && ( 7 == Rank )
-      && is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , const I4 & i4 , const I5 & i5 , const I6 & i6
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
-    }
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , typename I4 , typename I5
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,Args...>::value
+       && ( 6 == Rank )
+       && ! is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , const I4 & i4 , const I5 & i5
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) )
+       return m_map.reference(i0,i1,i2,i3,i4,i5);
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5 , typename I6
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value
-      && ( 7 == Rank )
-      && ! is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , const I4 & i4 , const I5 & i5 , const I6 & i6
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
-      return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
-    }
+   //------------------------------
+   // Rank 7
 
-  //------------------------------
-  // Rank 8
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , typename I4 , typename I5 , typename I6
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value
+       && ( 7 == Rank )
+       && is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , const I4 & i4 , const I5 & i5 , const I6 & i6
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ];
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5 , typename I6 , typename I7
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value
-      && ( 8 == Rank )
-      && is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
-    }
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , typename I4 , typename I5 , typename I6
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,Args...>::value
+       && ( 7 == Rank )
+       && ! is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , const I4 & i4 , const I5 & i5 , const I6 & i6
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
+       return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
+     }
 
-  template< typename I0 , typename I1 , typename I2 , typename I3
-          , typename I4 , typename I5 , typename I6 , typename I7
-          , class ... Args >
-  KOKKOS_FORCEINLINE_FUNCTION
-  typename std::enable_if<
-    ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value
-      && ( 8 == Rank )
-      && ! is_default_map
-    ), reference_type >::type
-  operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
-            , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7
-            , Args ... args ) const
-    {
-      KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
-      return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
-    }
+   //------------------------------
+   // Rank 8
+
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , typename I4 , typename I5 , typename I6 , typename I7
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value
+       && ( 8 == Rank )
+       && is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
+     }
+
+   template< typename I0 , typename I1 , typename I2 , typename I3
+           , typename I4 , typename I5 , typename I6 , typename I7
+           , class ... Args >
+   KOKKOS_FORCEINLINE_FUNCTION
+   typename std::enable_if<
+     ( Kokkos::Impl::are_integral<I0,I1,I2,I3,I4,I5,I6,I7,Args...>::value
+       && ( 8 == Rank )
+       && ! is_default_map
+     ), reference_type >::type
+   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+             , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7
+             , Args ... args ) const
+     {
+       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
+       return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
+     }
 
 
- #else
+  #else
   //------------------------------
   // Rank 0 operator()
 
@@ -1206,7 +1211,7 @@ public:
   operator()( const I0 & i0 ) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
-      return m_map.m_handle[ i0 ];
+      return m_map.m_impl_handle[ i0 ];
     }
 
   template< typename I0 >
@@ -1220,7 +1225,7 @@ public:
   operator()( const I0 & i0) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
-      return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
+      return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ];
     }
   //------------------------------
     // Rank 1 operator[]
@@ -1249,7 +1254,7 @@ public:
     operator[]( const I0 & i0 ) const
       {
         KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
-        return m_map.m_handle[ i0 ];
+        return m_map.m_impl_handle[ i0 ];
       }
 
     template< typename I0 >
@@ -1263,7 +1268,7 @@ public:
     operator[]( const I0 & i0 ) const
       {
         KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) )
-        return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
+        return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ];
       }
 
 
@@ -1294,7 +1299,7 @@ public:
   operator()( const I0 & i0 , const I1 & i1) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) )
-      return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
+      return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_dim.N0 * i1 ];
     }
 
   template< typename I0 , typename I1>
@@ -1308,7 +1313,7 @@ public:
   operator()( const I0 & i0 , const I1 & i1) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) )
-      return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
+      return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_stride * i1 ];
     }
 
   template< typename I0 , typename I1 >
@@ -1322,7 +1327,7 @@ public:
   operator()( const I0 & i0 , const I1 & i1 ) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) )
-      return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
+      return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_dim.N1 * i0 ];
     }
 
   template< typename I0 , typename I1 >
@@ -1336,7 +1341,7 @@ public:
   operator()( const I0 & i0 , const I1 & i1 ) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) )
-      return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
+      return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_stride * i0 ];
     }
 
   template< typename I0 , typename I1>
@@ -1350,8 +1355,8 @@ public:
   operator()( const I0 & i0 , const I1 & i1 ) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1) )
-      return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
-                             i1 * m_map.m_offset.m_stride.S1 ];
+      return m_map.m_impl_handle[ i0 * m_map.m_impl_offset.m_stride.S0 +
+                             i1 * m_map.m_impl_offset.m_stride.S1 ];
     }
 
   //------------------------------
@@ -1367,7 +1372,7 @@ public:
   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
+      return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2) ];
     }
 
   template< typename I0 , typename I1 , typename I2>
@@ -1396,7 +1401,7 @@ public:
   operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
+      return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3) ];
     }
 
   template< typename I0 , typename I1 , typename I2 , typename I3 >
@@ -1427,7 +1432,7 @@ public:
             , const I4 & i4 ) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
+      return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4) ];
     }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1460,7 +1465,7 @@ public:
             , const I4 & i4 , const I5 & i5 ) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
+      return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5) ];
     }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1493,7 +1498,7 @@ public:
             , const I4 & i4 , const I5 & i5 , const I6 & i6) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
+      return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ];
     }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1526,7 +1531,7 @@ public:
             , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7) const
     {
       KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7) )
-      return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
+      return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
     }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1545,7 +1550,6 @@ public:
     }
 
 #endif
-
   template< class ... Args >
   KOKKOS_FORCEINLINE_FUNCTION
   typename std::enable_if<( Kokkos::Impl::are_integral<Args...>::value
@@ -1585,7 +1589,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) )
-       return m_map.m_handle[ i0 ];
+       return m_map.m_impl_handle[ i0 ];
      }
 
    template< typename I0
@@ -1601,7 +1605,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) )
-       return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
+       return m_map.m_impl_handle[ m_map.m_impl_offset.m_stride.S0 * i0 ];
      }
 
    template< typename I0 , typename I1
@@ -1632,7 +1636,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-       return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
+       return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_dim.N0 * i1 ];
      }
 
    template< typename I0 , typename I1
@@ -1648,7 +1652,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-       return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
+       return m_map.m_impl_handle[ i0 + m_map.m_impl_offset.m_stride * i1 ];
      }
 
    template< typename I0 , typename I1
@@ -1664,7 +1668,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-       return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
+       return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_dim.N1 * i0 ];
      }
 
    template< typename I0 , typename I1
@@ -1680,7 +1684,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-       return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
+       return m_map.m_impl_handle[ i1 + m_map.m_impl_offset.m_stride * i0 ];
      }
 
    template< typename I0 , typename I1
@@ -1696,8 +1700,8 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) )
-       return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
-                              i1 * m_map.m_offset.m_stride.S1 ];
+       return m_map.m_impl_handle[ i0 * m_map.m_impl_offset.m_stride.S0 +
+                              i1 * m_map.m_impl_offset.m_stride.S1 ];
      }
 
    //------------------------------
@@ -1715,7 +1719,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) )
-       return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2) ];
      }
 
    template< typename I0 , typename I1 , typename I2
@@ -1748,7 +1752,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) )
-       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3) ];
      }
 
    template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1783,7 +1787,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) )
-       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4) ];
      }
 
    template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1820,7 +1824,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) )
-       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5) ];
      }
 
    template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1857,7 +1861,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
-       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ];
      }
 
    template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1894,7 +1898,7 @@ public:
              , Args ... args ) const
      {
        KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
-       return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
+       return m_map.m_impl_handle[ m_map.m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
      }
 
    template< typename I0 , typename I1 , typename I2 , typename I3
@@ -1938,6 +1942,8 @@ public:
   KOKKOS_INLINE_FUNCTION
   View & operator = ( View && rhs ) { m_track = std::move(rhs.m_track) ; m_map = std::move(rhs.m_map) ; return *this ; }
 
+
+
   //----------------------------------------
   // Compatible view copy constructor and assignment
   // may assign unmanaged from managed.
@@ -2206,7 +2212,8 @@ public:
               , arg_N4 , arg_N5 , arg_N6 , arg_N7 )
           )
     {
-
+      static_assert ( traits::array_layout::is_extent_constructible , "Layout is not extent constructible. A layout object should be passed too.\n" );
+	  
 #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
     Impl::runtime_check_rank_host(traits::rank_dynamic, std::is_same<typename traits::specialize,void>::value, arg_N0, arg_N1, arg_N2, arg_N3,
                              arg_N4, arg_N5, arg_N6, arg_N7, label());
@@ -2257,6 +2264,15 @@ public:
 #endif
 
     }
+  template <class Traits>
+  KOKKOS_INLINE_FUNCTION
+  View( const track_type & track,  const Kokkos::Impl::ViewMapping< Traits , void >  &map ) :
+  m_track(track), m_map()
+  {
+    typedef Kokkos::Impl::ViewMapping< traits , Traits , void >  Mapping ;
+    static_assert( Mapping::is_assignable , "Incompatible View copy construction" );
+    Mapping::assign( m_map , map , track );
+  }
 
   //----------------------------------------
   // Memory span required to wrap these dimensions.
@@ -2346,7 +2362,7 @@ public:
   static inline
   size_t shmem_size( typename traits::array_layout const& arg_layout )
   {
-    return map_type::memory_span( arg_layout );
+    return map_type::memory_span( arg_layout )+sizeof(typename traits::value_type);
   }
 
   explicit KOKKOS_INLINE_FUNCTION
@@ -2354,7 +2370,7 @@ public:
       , const typename traits::array_layout & arg_layout )
     : View( Impl::ViewCtorProp<pointer_type>(
               reinterpret_cast<pointer_type>(
-                arg_space.get_shmem( map_type::memory_span( arg_layout ) ) ) )
+                arg_space.get_shmem_aligned( map_type::memory_span( arg_layout ), sizeof(typename traits::value_type) ) ) )
          , arg_layout )
     {}
 
@@ -2370,11 +2386,11 @@ public:
       , const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG )
     : View( Impl::ViewCtorProp<pointer_type>(
               reinterpret_cast<pointer_type>(
-                arg_space.get_shmem(
+                arg_space.get_shmem_aligned(
                   map_type::memory_span(
                     typename traits::array_layout
                      ( arg_N0 , arg_N1 , arg_N2 , arg_N3
-                     , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ) ) ) )
+                     , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ), sizeof(typename traits::value_type) ) ) )
           , typename traits::array_layout
              ( arg_N0 , arg_N1 , arg_N2 , arg_N3
              , arg_N4 , arg_N5 , arg_N6 , arg_N7 )
@@ -2515,209 +2531,6 @@ void shared_allocation_tracking_enable()
 } /* namespace Impl */
 } /* namespace Kokkos */
 
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-// Deduce Mirror Types
-template<class Space, class T, class ... P>
-struct MirrorViewType {
-  // The incoming view_type
-  typedef typename Kokkos::View<T,P...> src_view_type;
-  // The memory space for the mirror view
-  typedef typename Space::memory_space memory_space;
-  // Check whether it is the same memory space
-  enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value };
-  // The array_layout
-  typedef typename src_view_type::array_layout array_layout;
-  // The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
-  typedef typename src_view_type::non_const_data_type data_type;
-  // The destination view type if it is not the same memory space
-  typedef Kokkos::View<data_type,array_layout,Space> dest_view_type;
-  // If it is the same memory_space return the existsing view_type
-  // This will also keep the unmanaged trait if necessary
-  typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type;
-};
-
-template<class Space, class T, class ... P>
-struct MirrorType {
-  // The incoming view_type
-  typedef typename Kokkos::View<T,P...> src_view_type;
-  // The memory space for the mirror view
-  typedef typename Space::memory_space memory_space;
-  // Check whether it is the same memory space
-  enum { is_same_memspace = std::is_same<memory_space,typename src_view_type::memory_space>::value };
-  // The array_layout
-  typedef typename src_view_type::array_layout array_layout;
-  // The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
-  typedef typename src_view_type::non_const_data_type data_type;
-  // The destination view type if it is not the same memory space
-  typedef Kokkos::View<data_type,array_layout,Space> view_type;
-};
-
-}
-
-template< class T , class ... P >
-inline
-typename Kokkos::View<T,P...>::HostMirror
-create_mirror( const Kokkos::View<T,P...> & src
-             , typename std::enable_if<
-                 ! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
-                               , Kokkos::LayoutStride >::value
-               >::type * = 0
-             )
-{
-  typedef View<T,P...>                   src_type ;
-  typedef typename src_type::HostMirror  dst_type ;
-
-  return dst_type( std::string( src.label() ).append("_mirror")
-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
-                   , src.extent(0)
-                   , src.extent(1)
-                   , src.extent(2)
-                   , src.extent(3)
-                   , src.extent(4)
-                   , src.extent(5)
-                   , src.extent(6)
-                   , src.extent(7) );
-#else
-                 , src.rank_dynamic > 0 ? src.extent(0): KOKKOS_IMPL_CTOR_DEFAULT_ARG
-                 , src.rank_dynamic > 1 ? src.extent(1): KOKKOS_IMPL_CTOR_DEFAULT_ARG
-                 , src.rank_dynamic > 2 ? src.extent(2): KOKKOS_IMPL_CTOR_DEFAULT_ARG
-                 , src.rank_dynamic > 3 ? src.extent(3): KOKKOS_IMPL_CTOR_DEFAULT_ARG
-                 , src.rank_dynamic > 4 ? src.extent(4): KOKKOS_IMPL_CTOR_DEFAULT_ARG
-                 , src.rank_dynamic > 5 ? src.extent(5): KOKKOS_IMPL_CTOR_DEFAULT_ARG
-                 , src.rank_dynamic > 6 ? src.extent(6): KOKKOS_IMPL_CTOR_DEFAULT_ARG
-                 , src.rank_dynamic > 7 ? src.extent(7): KOKKOS_IMPL_CTOR_DEFAULT_ARG );
-#endif
-}
-
-template< class T , class ... P >
-inline
-typename Kokkos::View<T,P...>::HostMirror
-create_mirror( const Kokkos::View<T,P...> & src
-             , typename std::enable_if<
-                 std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
-                             , Kokkos::LayoutStride >::value
-               >::type * = 0
-             )
-{
-  typedef View<T,P...>                   src_type ;
-  typedef typename src_type::HostMirror  dst_type ;
-
-  Kokkos::LayoutStride layout ;
-
-  layout.dimension[0] = src.extent(0);
-  layout.dimension[1] = src.extent(1);
-  layout.dimension[2] = src.extent(2);
-  layout.dimension[3] = src.extent(3);
-  layout.dimension[4] = src.extent(4);
-  layout.dimension[5] = src.extent(5);
-  layout.dimension[6] = src.extent(6);
-  layout.dimension[7] = src.extent(7);
-
-  layout.stride[0] = src.stride_0();
-  layout.stride[1] = src.stride_1();
-  layout.stride[2] = src.stride_2();
-  layout.stride[3] = src.stride_3();
-  layout.stride[4] = src.stride_4();
-  layout.stride[5] = src.stride_5();
-  layout.stride[6] = src.stride_6();
-  layout.stride[7] = src.stride_7();
-
-  return dst_type( std::string( src.label() ).append("_mirror") , layout );
-}
-
-
-// Create a mirror in a new space (specialization for different space)
-template<class Space, class T, class ... P>
-typename Impl::MirrorType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::View<T,P...> & src) {
-  return typename Impl::MirrorType<Space,T,P ...>::view_type(src.label(),src.layout());
-}
-
-template< class T , class ... P >
-inline
-typename Kokkos::View<T,P...>::HostMirror
-create_mirror_view( const Kokkos::View<T,P...> & src
-                  , typename std::enable_if<(
-                      std::is_same< typename Kokkos::View<T,P...>::memory_space
-                                  , typename Kokkos::View<T,P...>::HostMirror::memory_space
-                                  >::value
-                      &&
-                      std::is_same< typename Kokkos::View<T,P...>::data_type
-                                  , typename Kokkos::View<T,P...>::HostMirror::data_type
-                                  >::value
-                    )>::type * = 0
-                  )
-{
-  return src ;
-}
-
-template< class T , class ... P >
-inline
-typename Kokkos::View<T,P...>::HostMirror
-create_mirror_view( const Kokkos::View<T,P...> & src
-                  , typename std::enable_if< ! (
-                      std::is_same< typename Kokkos::View<T,P...>::memory_space
-                                  , typename Kokkos::View<T,P...>::HostMirror::memory_space
-                                  >::value
-                      &&
-                      std::is_same< typename Kokkos::View<T,P...>::data_type
-                                  , typename Kokkos::View<T,P...>::HostMirror::data_type
-                                  >::value
-                    )>::type * = 0
-                  )
-{
-  return Kokkos::create_mirror( src );
-}
-
-// Create a mirror view in a new space (specialization for same space)
-template<class Space, class T, class ... P>
-typename Impl::MirrorViewType<Space,T,P ...>::view_type
-create_mirror_view(const Space& , const Kokkos::View<T,P...> & src
-  , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
-  return src;
-}
-
-// Create a mirror view in a new space (specialization for different space)
-template<class Space, class T, class ... P>
-typename Impl::MirrorViewType<Space,T,P ...>::view_type
-create_mirror_view(const Space& , const Kokkos::View<T,P...> & src
-  , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
-  return typename Impl::MirrorViewType<Space,T,P ...>::view_type(src.label(),src.layout());
-}
-
-// Create a mirror view and deep_copy in a new space (specialization for same space)
-template<class Space, class T, class ... P>
-typename Impl::MirrorViewType<Space,T,P ...>::view_type
-create_mirror_view_and_copy(const Space& , const Kokkos::View<T,P...> & src
-  , std::string const& name = ""
-  , typename std::enable_if<Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
-  (void)name;
-  return src;
-}
-
-// Create a mirror view and deep_copy in a new space (specialization for different space)
-template<class Space, class T, class ... P>
-typename Impl::MirrorViewType<Space,T,P ...>::view_type
-create_mirror_view_and_copy(const Space& , const Kokkos::View<T,P...> & src
-  , std::string const& name = ""
-  , typename std::enable_if<!Impl::MirrorViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
-  using Mirror = typename Impl::MirrorViewType<Space,T,P ...>::view_type;
-  std::string label = name.empty() ? src.label() : name;
-  auto mirror = Mirror(ViewAllocateWithoutInitializing(label), src.layout());
-  deep_copy(mirror, src);
-  return mirror;
-}
-
-} /* namespace Kokkos */
-
-
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
diff --git a/packages/kokkos/core/src/Makefile b/packages/kokkos/core/src/Makefile
index 6ee5fec71..c2dbddf45 100644
--- a/packages/kokkos/core/src/Makefile
+++ b/packages/kokkos/core/src/Makefile
@@ -16,6 +16,7 @@ endif
 CXXFLAGS ?= -O3
 LINK ?= $(CXX)
 LDFLAGS ?=
+CP = cp
 
 include $(KOKKOS_PATH)/Makefile.kokkos
 include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists
@@ -50,7 +51,12 @@ ifeq ($(KOKKOS_OS),Linux)
   COPY_FLAG = -u
 endif
 ifeq ($(KOKKOS_OS),Darwin)
-  COPY_FLAG =
+  COPY_FLAG = 
+  # If Homebrew coreutils is installed, its cp will have the -u option
+  ifneq ("$(wildcard /usr/local/opt/coreutils/libexec/gnubin/cp)","")
+    CP = /usr/local/opt/coreutils/libexec/gnubin/cp
+    COPY_FLAG = -u
+  endif
 endif
 
 ifeq ($(KOKKOS_DEBUG),"no")
@@ -66,36 +72,38 @@ mkdir:
 	mkdir -p $(PREFIX)/bin
 	mkdir -p $(PREFIX)/include
 	mkdir -p $(PREFIX)/lib
+	mkdir -p $(PREFIX)/lib/pkgconfig
 	mkdir -p $(PREFIX)/include/impl
 
 copy-cuda: mkdir
 	mkdir -p $(PREFIX)/include/Cuda
-	cp $(COPY_FLAG) $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda
+	$(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda
 
 copy-threads: mkdir
 	mkdir -p $(PREFIX)/include/Threads
-	cp $(COPY_FLAG) $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads
+	$(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads
 
 copy-qthreads: mkdir
 	mkdir -p $(PREFIX)/include/Qthreads
-	cp $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREADS) $(PREFIX)/include/Qthreads
+	$(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREADS) $(PREFIX)/include/Qthreads
 
 copy-openmp: mkdir
 	mkdir -p $(PREFIX)/include/OpenMP
-	cp $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
+	$(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
 
 copy-rocm: mkdir
 	mkdir -p $(PREFIX)/include/ROCm
-	cp $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm
+	$(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm
 
 install: mkdir $(CONDITIONAL_COPIES) build-lib generate_build_settings
-	cp $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin
-	cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
-	cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
-	cp $(COPY_FLAG) $(KOKKOS_MAKEFILE)  $(PREFIX)
-	cp $(COPY_FLAG) $(KOKKOS_CMAKEFILE)  $(PREFIX)
-	cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib
-	cp $(COPY_FLAG) $(KOKKOS_CONFIG_HEADER) $(PREFIX)/include
+	$(CP) $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin
+	$(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
+	$(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
+	$(CP) $(COPY_FLAG) $(KOKKOS_MAKEFILE)  $(PREFIX)
+	$(CP) $(COPY_FLAG) $(KOKKOS_CMAKEFILE)  $(PREFIX)
+	$(CP) $(COPY_FLAG) $(KOKKOS_PKGCONFIG)  $(PREFIX)/lib/pkgconfig
+	$(CP) $(COPY_FLAG) libkokkos.a $(PREFIX)/lib
+	$(CP) $(COPY_FLAG) $(KOKKOS_CONFIG_HEADER) $(PREFIX)/include
 
 clean: kokkos-clean
-	rm -f $(KOKKOS_MAKEFILE) $(KOKKOS_CMAKEFILE) 
+	rm -f $(KOKKOS_MAKEFILE) $(KOKKOS_CMAKEFILE) $(KOKKOS_PKGCONFIG) 
diff --git a/packages/kokkos/core/src/Makefile.generate_build_files b/packages/kokkos/core/src/Makefile.generate_build_files
index 7e0c6351f..cc856ee9a 100644
--- a/packages/kokkos/core/src/Makefile.generate_build_files
+++ b/packages/kokkos/core/src/Makefile.generate_build_files
@@ -5,6 +5,7 @@
 # These files are generated by this makefile
 KOKKOS_MAKEFILE=Makefile.kokkos
 KOKKOS_CMAKEFILE=kokkos_generated_settings.cmake
+KOKKOS_PKGCONFIG=kokkos.pc
 
 ifeq ($(KOKKOS_DEBUG),"no")
   KOKKOS_DEBUG_CMAKE = OFF
@@ -33,11 +34,29 @@ kokkos_append_var = $(call kokkos_appendvar_makefile,$1); $(call kokkos_appendva
 kokkos_append_var2 = $(call kokkos_appendvar2_makefile,$1); $(call kokkos_appendvar_cmakefile,$1,$2)
 kokkos_append_varval = $(call kokkos_appendval_makefile,$1,$2); $(call kokkos_appendval_cmakefile,$1,$2,$3)
 
+kokkos_fixup_sed_impl = sed \
+		-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
+		-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
+		-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
+		-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
+		-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
+		-e 's|= $(KOKKOS_CONFIG_HEADER)|= $(PREFIX)/include/$(KOKKOS_CONFIG_HEADER)|g' $1 \
+		> $1.tmp && mv -f $1.tmp $1
+
+$(KOKKOS_PKGCONFIG): $(KOKKOS_PATH)/core/src/$(KOKKOS_PKGCONFIG).in
+	@sed -e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|g' \
+	    -e 's|@KOKKOS_CXXFLAGS@|$(patsubst -I%,,$(KOKKOS_CXXFLAGS))|g' \
+	    -e 's|@KOKKOS_EXTRA_LIBS_LIST@|$(KOKKOS_EXTRA_LIBS)|g' \
+	    -e 's|@KOKKOS_LINK_FLAGS@|$(KOKKOS_LINK_FLAGS)|g' \
+	     $< > $@
+
+kokkos_fixup_sed = $(call kokkos_fixup_sed_impl,$(KOKKOS_MAKEFILE)); $(call kokkos_fixup_sed_impl,$(KOKKOS_CMAKEFILE))
+
 #This function should be used for variables whose values are different in GNU Make versus CMake,
 #especially lists which are delimited by commas in one case and semicolons in another
 kokkos_append_gmakevar = $(call kokkos_appendvar_makefile,$1); $(call kokkos_append_gmakevar_cmakefile,$1,$2)
 
-generate_build_settings: $(KOKKOS_CONFIG_HEADER)
+generate_build_settings: $(KOKKOS_CONFIG_HEADER) $(KOKKOS_PKGCONFIG)
 	@rm -f $(KOKKOS_MAKEFILE)
 	@rm -f $(KOKKOS_CMAKEFILE)
 	@$(call kokkos_append_string, "#Global Settings used to generate this library")
@@ -68,7 +87,6 @@ generate_build_settings: $(KOKKOS_CONFIG_HEADER)
 	@$(call kokkos_append_var,KOKKOS_HEADERS_ROCM,'STRING "Kokkos headers ROCm list"')
 	@$(call kokkos_append_var,KOKKOS_HEADERS_THREADS,'STRING "Kokkos headers Threads list"')
 	@$(call kokkos_append_var,KOKKOS_HEADERS_QTHREADS,'STRING "Kokkos headers QThreads list"')
-	@$(call kokkos_append_var,KOKKOS_SRC,'STRING "Kokkos source list"')
 	@$(call kokkos_append_string,"")
 	@$(call kokkos_append_string,"#Variables used in application Makefiles")
 	@$(call kokkos_append_var,KOKKOS_OS,'STRING ""')  # This was not in original cmake gen
@@ -94,19 +112,11 @@ generate_build_settings: $(KOKKOS_CONFIG_HEADER)
 	@$(call kokkos_append_makefile,"#Fake kokkos-clean target")
 	@$(call kokkos_append_makefile,"kokkos-clean:")
 	@$(call kokkos_append_makefile,"")
-	@sed \
-		-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
-		-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
-		-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
-		-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
-		-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
-		-e 's|= $(KOKKOS_CONFIG_HEADER)|= $(PREFIX)/include/$(KOKKOS_CONFIG_HEADER)|g' $(KOKKOS_MAKEFILE) \
-		> $(KOKKOS_MAKEFILE).tmp
-	@mv -f $(KOKKOS_MAKEFILE).tmp $(KOKKOS_MAKEFILE)
+	@$(call kokkos_fixup_sed)
+	@$(call kokkos_append_var,KOKKOS_SRC,'STRING "Kokkos source list"')
 	@$(call kokkos_setvar_cmakefile,KOKKOS_CXX_FLAGS,$(KOKKOS_CXXFLAGS))
 	@$(call kokkos_setvar_cmakefile,KOKKOS_CPP_FLAGS,$(KOKKOS_CPPFLAGS))
 	@$(call kokkos_setvar_cmakefile,KOKKOS_LD_FLAGS,$(KOKKOS_LDFLAGS))
 	@$(call kokkos_setlist_cmakefile,KOKKOS_LIBS_LIST,$(KOKKOS_LIBS))
 	@$(call kokkos_setlist_cmakefile,KOKKOS_EXTRA_LIBS_LIST,$(KOKKOS_EXTRA_LIBS))
 	@$(call kokkos_setvar_cmakefile,KOKKOS_LINK_FLAGS,$(KOKKOS_LINK_FLAGS))
-
diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
index 351f5f1ec..2f2c76846 100644
--- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
+++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
@@ -103,8 +103,6 @@ public:
 void TaskQueueSpecialization< Kokkos::OpenMP >::execute
   ( TaskQueue< Kokkos::OpenMP > * const queue )
 {
-  using execution_space = Kokkos::OpenMP ;
-  using queue_type      = TaskQueue< execution_space > ;
   using task_root_type  = TaskBase< void , void , void > ;
   using Member          = Impl::HostThreadTeamMember< execution_space > ;
 
@@ -213,8 +211,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::
   iff_single_thread_recursive_execute
     ( TaskQueue< Kokkos::OpenMP > * const queue )
 {
-  using execution_space = Kokkos::OpenMP ;
-  using queue_type      = TaskQueue< execution_space > ;
   using task_root_type  = TaskBase< void , void , void > ;
   using Member          = Impl::HostThreadTeamMember< execution_space > ;
 
diff --git a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp
index b530dca10..e8fbc467e 100644
--- a/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp
+++ b/packages/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp
@@ -76,14 +76,11 @@ public:
 
   //----------------------------------------
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   template< class FunctorType >
   inline static
   int team_size_max( const FunctorType & ) {
-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
     int pool_size = traits::execution_space::thread_pool_size(1);
-#else
-    int pool_size = traits::execution_space::impl_thread_pool_size(1);
-#endif
     int max_host_team_size =  Impl::HostThreadTeamData::max_team_members;
     return pool_size<max_host_team_size?pool_size:max_host_team_size;
   }
@@ -92,17 +89,47 @@ public:
   inline static
   int team_size_recommended( const FunctorType & )
   {
-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
     return traits::execution_space::thread_pool_size(2);
-#else
-    return traits::execution_space::impl_thread_pool_size(2);
-#endif
   }
 
   template< class FunctorType >
   inline static
   int team_size_recommended( const FunctorType &, const int& )
   {
+    return traits::execution_space::thread_pool_size(2);
+  }
+#endif
+
+  template<class FunctorType>
+  int team_size_max( const FunctorType&, const ParallelForTag& ) const {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    int pool_size = traits::execution_space::thread_pool_size(1);
+#else
+    int pool_size = traits::execution_space::impl_thread_pool_size(1);
+#endif
+    int max_host_team_size =  Impl::HostThreadTeamData::max_team_members;
+    return pool_size<max_host_team_size?pool_size:max_host_team_size;
+  }
+  template<class FunctorType>
+  int team_size_max( const FunctorType&, const ParallelReduceTag& ) const {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    int pool_size = traits::execution_space::thread_pool_size(1);
+#else
+    int pool_size = traits::execution_space::impl_thread_pool_size(1);
+#endif
+    int max_host_team_size =  Impl::HostThreadTeamData::max_team_members;
+    return pool_size<max_host_team_size?pool_size:max_host_team_size;
+  }
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType&, const ParallelForTag& ) const {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    return traits::execution_space::thread_pool_size(2);
+#else
+    return traits::execution_space::impl_thread_pool_size(2);
+#endif
+  }
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType&, const ParallelReduceTag& ) const {
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
     return traits::execution_space::thread_pool_size(2);
 #else
@@ -110,6 +137,18 @@ public:
 #endif
   }
 
+
+  inline static
+  int vector_length_max()
+    { return 1024; } // Use arbitrary large number, is meant as a vectorizable length
+
+  inline static
+  int scratch_size_max(int level)
+    { return (level==0?
+        1024*32: // Roughly L1 size
+        20*1024*1024); // Limit to keep compatibility with CUDA
+    }
+
   //----------------------------------------
 
 private:
diff --git a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
index 2d53670c8..c7d4defad 100644
--- a/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
+++ b/packages/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
@@ -160,7 +160,8 @@ SharedAllocationRecord( const Kokkos::Experimental::OpenMPTargetSpace & arg_spac
           , arg_label.c_str()
           , SharedAllocationHeader::maximum_label_length
           );
-  
+  // Set last element zero, in case c_str is too long
+  header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0; 
   //TODO DeepCopy
   // DeepCopy
 
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp
index 71643458b..87840bb37 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Config.hpp
@@ -44,8 +44,8 @@
 #ifndef GUARD_CORE_KOKKOS_ROCM_CONFIG_HPP
 #define GUARD_CORE_KOKKOS_ROCM_CONFIG_HPP
 
-#ifndef KOKKOS_ROCM_HAS_WORKAROUNDS
-#define KOKKOS_ROCM_HAS_WORKAROUNDS 1
+#ifndef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
+#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1
 #endif
 
 #endif
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp
index 1c2bf303c..205e6a295 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp
@@ -55,14 +55,14 @@ namespace Impl {
 
 struct ROCmTraits {
 // TODO: determine if needed
-  enum { WavefrontSize       = 64 /* 64  */ };
-  enum { WorkgroupSize       = 64 /* 64  */ };
-  enum { WavefrontIndexMask  = 0x001f  /* Mask for warpindex */ };
-  enum { WavefrontIndexShift = 5       /* WarpSize == 1 << WarpShift */ };
+  enum { WavefrontSize       = 64  /* 64  */ };
+  enum { WorkgroupSize       = 256 /* 256  */ };
+  enum { WavefrontIndexMask  = 0x003f  /* Mask for wavefrontindex */ };
+  enum { WavefrontIndexShift = 6   /* WavefrontSize == 1 << WavefrontShift */ };
 
-  enum { SharedMemoryBanks    = 32      /* Compute device 2.0 */ };
-  enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ };
-  enum { SharedMemoryUsage    = 0x04000 /* 16k shared / 48k L1 Cache */ };
+  enum { SharedMemoryBanks    = 64      /* GCN */ };
+  enum { SharedMemoryCapacity = 0x10000 /* 64k shared / 16k L1 Cache */ };
+  enum { SharedMemoryUsage    = 0x04000 /* 64k shared / 16k L1 Cache */ };
 
   enum { UpperBoundExtentCount    = 4294967295 /* Hard upper bound */ };
 #if 0
@@ -84,6 +84,16 @@ size_t rocm_internal_maximum_workgroup_count();
 size_t * rocm_internal_scratch_flags( const size_t size );
 size_t * rocm_internal_scratch_space( const size_t size );
 
+// This pointer is the start of dynamic shared memory (LDS).
+// Dynamic is at the end of LDS and it's size must be specified
+// in a tile_block specification at kernel launch time.
+template< typename T >
+KOKKOS_INLINE_FUNCTION
+T * kokkos_impl_rocm_shared_memory()
+//{ return (T*) hc::get_group_segment_base_pointer() ; }
+{ return (T*) hc::get_dynamic_group_segment_base_pointer() ; }
+
+
 }
 } // namespace Kokkos
 #define ROCM_SPACE_ATOMIC_MASK      0x1FFFF
@@ -249,7 +259,6 @@ struct ROCmParallelLaunch< DriverType
       size_t bx = (grid.x > block.x)? block.x : grid.x;
       size_t by = (grid.y > block.y)? block.y : grid.y;
       size_t bz = (grid.z > block.z)? block.z : grid.z;
-
       hc::parallel_for_each(ext.tile_with_dynamic(bz,by,bx,shmem), [=](const hc::index<3> & idx) [[hc]]
  
  
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp
index 3ae312647..236042ccc 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp
@@ -543,20 +543,13 @@ enum { sizeScratchGrain = sizeof(ScratchGrain) };
 void rocmMemset(  Kokkos::Experimental::ROCm::size_type * ptr ,  Kokkos::Experimental::ROCm::size_type value , Kokkos::Experimental::ROCm::size_type size)
 {
 char * mptr = (char * ) ptr;
-#if 0
-   parallel_for_each(hc::extent<1>(size),
+/*   parallel_for_each(hc::extent<1>(size),
                     [=, &ptr]
                     (hc::index<1> idx) __HC__
    {
       int i = idx[0];
       ptr[i] = value;
-   }).wait();
-#else
-   for (int i= 0; i<size ; i++)
-   {
-     mptr[i] = (char) value;
-   }
-#endif
+   }).wait();*/
 }
 
 Kokkos::Experimental::ROCm::size_type *
@@ -567,9 +560,9 @@ ROCmInternal::scratch_flags( const Kokkos::Experimental::ROCm::size_type size )
 
     m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
 
-    typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
+    typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > Record ;
 
-    Record * const r = Record::allocate( Kokkos::HostSpace()
+    Record * const r = Record::allocate( Kokkos::Experimental::ROCmSpace()
                                        , "InternalScratchFlags"
                                        , ( sizeScratchGrain  * m_scratchFlagsCount ) );
 
@@ -590,9 +583,9 @@ ROCmInternal::scratch_space( const Kokkos::Experimental::ROCm::size_type size )
 
     m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
 
-     typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
+     typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > Record ;
 
-     Record * const r = Record::allocate( Kokkos::HostSpace()
+     static Record * const r = Record::allocate( Kokkos::Experimental::ROCmSpace()
                                         , "InternalScratchSpace"
                                         , ( sizeScratchGrain  * m_scratchSpaceCount ) );
 
@@ -616,7 +609,7 @@ void ROCmInternal::finalize()
 //    scratch_lock_array_rocm_space_ptr(false);
 //    threadid_lock_array_rocm_space_ptr(false);
 
-    typedef Kokkos::Impl::SharedAllocationRecord< HostSpace > RecordROCm ;
+    typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace > RecordROCm ;
     typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace > RecordHost ;
 
     RecordROCm::decrement( RecordROCm::get_record( m_scratchFlags ) );
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp
index 2978ae8f5..edd1c12e4 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp
@@ -243,6 +243,15 @@ public:
    return(max);
  }
 
+  template< class FunctorType , class PatternTypeTag>
+  int team_size_max( const FunctorType& functor, PatternTypeTag) {
+    return 256/vector_length();
+  }
+  template< class FunctorType , class PatternTypeTag>
+  int team_size_recommended( const FunctorType& functor, PatternTypeTag) {
+    return 128/vector_length();
+  }
+
   template<class F>
   KOKKOS_INLINE_FUNCTION int team_size(const F& f) const { return (m_team_size > 0) ? m_team_size : team_size_recommended(f); }
   KOKKOS_INLINE_FUNCTION int team_size() const { return (m_team_size > 0) ? m_team_size : Impl::get_max_tile_thread(); ; }
@@ -261,6 +270,11 @@ public:
     return m_thread_scratch_size[level];
   }
 
+  static int scratch_size_max(int level) {
+    return level==0 ? 
+      1024*40 : 1024*1204*20;
+  }
+
   typedef Impl::ROCmTeamMember member_type;
 };
 
@@ -487,6 +501,7 @@ public:
 #endif
       }
       m_idx.barrier.wait();
+      reducer.reference() = buffer[0];
     }
 
     /** \brief  Intra-team vector reduce 
@@ -541,19 +556,19 @@ public:
     }
 
   template< typename ReducerType >
-  KOKKOS_INLINE_FUNCTION static
+  KOKKOS_INLINE_FUNCTION
   typename std::enable_if< is_reducer< ReducerType >::value >::type
-  vector_reduce( ReducerType const & reducer )
+  vector_reduce( ReducerType const & reducer ) const
     {
       #ifdef __HCC_ACCELERATOR__
-      if(blockDim_x == 1) return;
+      if(m_vector_length == 1) return;
 
       // Intra vector lane shuffle reduction:
       typename ReducerType::value_type tmp ( reducer.reference() );
 
-      for ( int i = blockDim_x ; ( i >>= 1 ) ; ) {
-        shfl_down( reducer.reference() , i , blockDim_x );
-        if ( (int)threadIdx_x < i ) { reducer.join( tmp , reducer.reference() ); }
+      for ( int i = m_vector_length ; ( i >>= 1 ) ; ) {
+        reducer.reference() = shfl_down( tmp , i , m_vector_length );
+        if ( (int)vector_rank() < i ) { reducer.join( tmp , reducer.reference() ); }
       }
 
       // Broadcast from root lane to all other lanes.
@@ -561,7 +576,7 @@ public:
       // because floating point summation is not associative
       // and thus different threads could have different results.
 
-      shfl( reducer.reference() , 0 , blockDim_x );
+      reducer.reference() = shfl( tmp , 0 , m_vector_length );
       #endif
     }
 
@@ -847,7 +862,7 @@ public:
 
       hc::extent< 1 > flat_extent( total_size );
 
-      hc::tiled_extent< 1 > team_extent = flat_extent.tile(team_size*vector_length);
+      hc::tiled_extent< 1 > team_extent = flat_extent.tile(vector_length*team_size);
       hc::parallel_for_each( team_extent , [=](hc::tiled_index<1> idx) [[hc]]
       {
         rocm_invoke<typename Policy::work_tag>(f, typename Policy::member_type(idx, league_size, team_size, shared, shared_size, scratch_size0, scratch, scratch_size1,vector_length));
@@ -958,6 +973,176 @@ public:
 
 };
 
+//----------------------------------------------------------------------------
+
+template< class FunctorType , class ReducerType, class... Traits >
+class ParallelReduce<
+  FunctorType , Kokkos::MDRangePolicy< Traits... >, ReducerType, Kokkos::Experimental::ROCm >
+{
+private:
+  typedef Kokkos::MDRangePolicy< Traits ...  > Policy ;
+  using RP = Policy;
+  typedef typename Policy::array_index_type array_index_type;
+  typedef typename Policy::index_type index_type;
+  typedef typename Policy::work_tag     WorkTag ;
+  typedef typename Policy::member_type  Member ;
+  typedef typename Policy::launch_bounds LaunchBounds;
+
+  typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
+  typedef typename ReducerConditional::type ReducerTypeFwd;
+  typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
+
+  typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ;
+  typedef Kokkos::Impl::FunctorValueInit<   ReducerTypeFwd, WorkTagFwd > ValueInit ;
+  typedef Kokkos::Impl::FunctorValueJoin<   ReducerTypeFwd, WorkTagFwd > ValueJoin ;
+
+
+public:
+
+  typedef typename ValueTraits::pointer_type    pointer_type ;
+  typedef typename ValueTraits::value_type      value_type ;
+  typedef typename ValueTraits::reference_type  reference_type ;
+  typedef FunctorType                           functor_type ;
+  typedef Kokkos::Experimental::ROCm::size_type size_type ;
+
+  // Algorithmic constraints: blockSize is a power of two AND blockDim.y == blockDim.z == 1
+
+  const FunctorType   m_functor ;
+  const Policy        m_policy ; // used for workrange and nwork
+  const ReducerType   m_reducer ;
+  const pointer_type  m_result_ptr ;
+  value_type *         m_scratch_space ;
+  size_type *         m_scratch_flags ;
+
+  typedef typename Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank, Policy, FunctorType, typename Policy::work_tag, reference_type> DeviceIteratePattern;
+
+  KOKKOS_INLINE_FUNCTION
+  void exec_range( reference_type update ) const
+  {
+    Kokkos::Impl::Reduce::DeviceIterateTile<Policy::rank,Policy,FunctorType,typename Policy::work_tag, reference_type>(m_policy, m_functor, update).exec_range();
+  }
+
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(void) const
+    {
+       run();
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  void run( ) const
+  {
+    const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(value_type) >
+      word_count( (ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) )) / sizeof(value_type) );
+      // pointer to shared data accounts for the reserved space at the start
+      value_type * const shared = kokkos_impl_rocm_shared_memory<value_type>()
+                                 + 2*sizeof(uint64_t); 
+
+    {
+      reference_type value =
+        ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , shared + threadIdx_y * word_count.value );
+      // Number of blocks is bounded so that the reduction can be limited to two passes.
+      // Each thread block is given an approximately equal amount of work to perform.
+      // Accumulate the values for this block.
+      // The accumulation ordering does not match the final pass, but is arithmatically equivalent.
+
+      this-> exec_range( value );
+    }
+
+    // Reduce with final value at blockDim.y - 1 location.
+    // Problem: non power-of-two blockDim
+
+    if ( rocm_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTagFwd>(
+           ReducerConditional::select(m_functor , m_reducer) , blockIdx_x ,
+           gridDim_x , shared , m_scratch_space , m_scratch_flags ) ) {
+
+      // This is the final block with the final result at the final threads' location
+      value_type * const tshared = shared + ( blockDim_y - 1 ) * word_count.value ;
+      value_type * const global =  m_scratch_space ;
+
+      if ( threadIdx_y == 0 ) {
+        Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , tshared );
+//        for ( unsigned i = 0 ; i < word_count.value ; i+=blockDim_y ) { global[i] = tshared[i]; }
+        for ( unsigned i = 0 ; i < word_count.value ; i++ ) { global[i] = tshared[i]; }
+      }
+    }
+  }
+
+
+
+  // Determine block size constrained by shared memory:
+  static inline
+  unsigned local_block_size( const FunctorType & f )
+    {
+      unsigned n = ROCmTraits::WavefrontSize * 8 ;
+      while ( n && ROCmTraits::SharedMemoryCapacity < rocm_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( f , n ) ) { n >>= 1 ; }
+      return n ;
+    }
+
+  inline
+  void execute()
+    {
+      const int nwork = m_policy.m_num_tiles;
+      if ( nwork ) {
+        int block_size = m_policy.m_prod_tile_dims;
+        // CONSTRAINT: Algorithm requires block_size >= product of tile dimensions
+        // Nearest power of two
+        int exponent_pow_two = std::ceil( std::log2((float)block_size) );
+        block_size = 1<<(exponent_pow_two);
+
+        m_scratch_space = (value_type*)rocm_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size*nwork /* block_size == max block_count */ );
+        m_scratch_flags = rocm_internal_scratch_flags( sizeof(size_type) );
+        const dim3 block( 1 , block_size , 1 );
+        // Required grid.x <= block.y
+        const dim3 grid( nwork, block_size ,  1 );
+      const int shmem = rocm_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( m_functor , block.y );
+
+      ROCmParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute
+
+      ROCM::fence();
+
+      if ( m_result_ptr ) {
+          const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer)  );
+          DeepCopy<HostSpace,Kokkos::Experimental::ROCmSpace>( m_result_ptr , m_scratch_space , size );
+      }
+    }
+    else {
+      if (m_result_ptr) {
+        ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr );
+      }
+    }
+  }
+
+
+  template< class HostViewType >
+  ParallelReduce( const FunctorType  & arg_functor
+                , const Policy       & arg_policy
+                , const HostViewType & arg_result
+                , typename std::enable_if<
+                   Kokkos::is_view< HostViewType >::value
+                ,void*>::type = NULL)
+  : m_functor( arg_functor )
+  , m_policy(  arg_policy )
+  , m_reducer( InvalidType() )
+  , m_result_ptr( arg_result.data() )
+  , m_scratch_space( 0 )
+  , m_scratch_flags( 0 )
+  {}
+
+  ParallelReduce( const FunctorType  & arg_functor
+                , const Policy       & arg_policy
+                , const ReducerType & reducer)
+  : m_functor( arg_functor )
+  , m_policy(  arg_policy )
+  , m_reducer( reducer )
+  , m_result_ptr( reducer.view().data() )
+  , m_scratch_space( 0 )
+  , m_scratch_flags( 0 )
+  {}
+
+};
+//----------------------------------------------------------------------------
+
 template< class FunctorType, class ReducerType, class... Traits >
 class ParallelReduce<
    FunctorType , Kokkos::TeamPolicy< Traits... >, ReducerType, Kokkos::Experimental::ROCm >
@@ -992,8 +1177,14 @@ public:
       const int scratch_size0 = policy.scratch_size(0,team_size);
       const int scratch_size1 = policy.scratch_size(1,team_size);
       const int total_size = league_size * team_size ;
-
-      if(total_size == 0) return;
+      
+      typedef Kokkos::Impl::FunctorValueInit< FunctorType, typename Policy::work_tag > ValueInit ;
+      if(total_size==0) {
+        if (result_view.data()) {
+           ValueInit::init( f , result_view.data() );
+        }
+        return;
+      }
 
       const int reduce_size = ValueTraits::value_size( f );
       const int shared_size = FunctorTeamShmemSize< FunctorType >::value( f , team_size );
@@ -1042,7 +1233,16 @@ public:
       const int vector_length = policy.vector_length();
       const int total_size = league_size * team_size;
 
-      if(total_size == 0) return;
+      typedef Kokkos::Impl::FunctorValueInit< ReducerType, typename Policy::work_tag > ValueInit ;
+      typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value,
+                                   FunctorType, ReducerType> ReducerConditional;
+      if(total_size==0) {
+        if (reducer.view().data()) {
+           ValueInit::init( ReducerConditional::select(f,reducer), 
+                            reducer.view().data() );
+        }
+        return;
+      }
 
       const int reduce_size = ValueTraits::value_size( f );
       const int shared_size = FunctorTeamShmemSize< FunctorType >::value( f , team_size );
@@ -1113,6 +1313,39 @@ public:
   //----------------------------------------
 };
 
+template< class FunctorType , class ReturnType , class... Traits >
+class ParallelScanWithTotal< FunctorType , Kokkos::RangePolicy< Traits... >,
+                             ReturnType, Kokkos::Experimental::ROCm >
+{
+private:
+
+  typedef Kokkos::RangePolicy< Traits... > Policy;
+  typedef typename Policy::work_tag Tag;
+  typedef Kokkos::Impl::FunctorValueTraits< FunctorType, Tag>  ValueTraits;
+
+public:
+
+  //----------------------------------------
+
+  inline
+  ParallelScanWithTotal( const FunctorType & f
+              , const Policy      & policy 
+              , ReturnType        & arg_returnvalue)
+  {
+    const auto len = policy.end()-policy.begin();
+
+
+    if(len==0) return;
+
+    scan_enqueue<Tag,ReturnType>(len, f, arg_returnvalue, [](hc::tiled_index<1> idx, int, int) { return idx.global[0]; });
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void execute() const {}
+
+  //----------------------------------------
+};
+
 template< class FunctorType , class... Traits>
 class ParallelScan< FunctorType , Kokkos::TeamPolicy< Traits... >, Kokkos::Experimental::ROCm >
 {
@@ -1350,22 +1583,17 @@ void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTe
  * val is performed and put into result. This functionality requires C++11 support.*/
 template< typename iType, class Lambda, typename ValueType >
 KOKKOS_INLINE_FUNCTION
-void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries,
+typename std::enable_if< ! Kokkos::is_reducer< ValueType >::value >::type
+parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries,
                      const Lambda & lambda, ValueType& result) {
 
-  result = ValueType();
+  Kokkos::Sum<ValueType> reducer(result);
+  reducer.init( reducer.reference() );
 
   for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
-    ValueType tmp = ValueType();
-    lambda(i,tmp);
-    result+=tmp;
+    lambda(i,reducer.reference());
   }
-  result = loop_boundaries.thread.team_reduce(result,
-                                              Impl::JoinAdd<ValueType>());
-//  Impl::rocm_intra_workgroup_reduction( loop_boundaries.thread, result,
-//               Impl::JoinAdd<ValueType>());
-//  Impl::rocm_inter_workgroup_reduction( loop_boundaries.thread, result,
-//               Impl::JoinAdd<ValueType>());
+  loop_boundaries.thread.team_reduce(reducer);
 }
 
 /** \brief  Inter-thread thread range parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
@@ -1374,7 +1602,8 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROC
  * val is performed and put into result. This functionality requires C++11 support.*/
 template< typename iType, class Lambda, typename ReducerType >
 KOKKOS_INLINE_FUNCTION
-void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries,
+typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type
+parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ROCmTeamMember>& loop_boundaries,
                      const Lambda & lambda, ReducerType const & reducer) {
   reducer.init( reducer.reference() );
 
@@ -1439,7 +1668,8 @@ void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCm
  * val is performed and put into result. This functionality requires C++11 support.*/
 template< typename iType, class Lambda, typename ValueType >
 KOKKOS_INLINE_FUNCTION
-void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >&
+typename std::enable_if< !Kokkos::is_reducer< ValueType >::value >::type 
+parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >&
       loop_boundaries, const Lambda & lambda, ValueType& result) {
   result = ValueType();
 
@@ -1477,7 +1707,8 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::R
  * val is performed and put into result. This functionality requires C++11 support.*/
 template< typename iType, class Lambda, typename ReducerType >
 KOKKOS_INLINE_FUNCTION
-void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >&
+typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type
+parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROCmTeamMember >&
       loop_boundaries, const Lambda & lambda, ReducerType const & reducer) {
   reducer.init( reducer.reference() );
 
@@ -1523,86 +1754,46 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ROC
   typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
   typedef typename ValueTraits::value_type value_type ;
 
-  value_type scan_val = value_type();
-#if (__ROCM_ARCH__ >= 800)
-// adopt the cuda vector shuffle method
-  const int VectorLength = loop_boundaries.increment;
-  int lid = loop_boundaries.thread.lindex();
-  int vector_rank = lid%VectorLength;
-
-  iType loop_bound = ((loop_boundaries.end+VectorLength-1)/VectorLength) * VectorLength;
-  value_type val ;
-  for(int _i = vector_rank; _i < loop_bound; _i += VectorLength) {
-    val = value_type();
-    if(_i<loop_boundaries.end)
-      lambda(_i , val , false);
-
-    value_type tmp = val;
-    value_type result_i;
-
-    if(vector_rank == 0)
-      result_i = tmp;
-    if (VectorLength > 1) {
-      const value_type tmp2 = shfl_up(tmp, 1,VectorLength);
-      if(vector_rank > 0)
-        tmp+=tmp2;
-    }
-    if(vector_rank == 1)
-      result_i = tmp;
-    if (VectorLength > 3) {
-      const value_type tmp2 = shfl_up(tmp, 2,VectorLength);
-      if(vector_rank > 1)
-        tmp+=tmp2;
-    }
-    if ((vector_rank >= 2) &&
-        (vector_rank < 4))
-      result_i = tmp;
-    if (VectorLength > 7) {
-      const value_type tmp2 = shfl_up(tmp, 4,VectorLength);
-      if(vector_rank > 3)
-        tmp+=tmp2;
-    }
-    if ((vector_rank >= 4) &&
-        (vector_rank < 8))
-      result_i = tmp;
-    if (VectorLength > 15) {
-      const value_type tmp2 = shfl_up(tmp, 8,VectorLength);
-      if(vector_rank > 7)
-        tmp+=tmp2;
-    }
-    if ((vector_rank >= 8) &&
-        (vector_rank < 16))
-      result_i = tmp;
-    if (VectorLength > 31) {
-      const value_type tmp2 = shfl_up(tmp, 16,VectorLength);
-      if(vector_rank > 15)
-        tmp+=tmp2;
-    }
-    if ((vector_rank >=16) &&
-        (vector_rank < 32))
-      result_i = tmp;
-    if (VectorLength > 63) {
-      const value_type tmp2 = shfl_up(tmp, 32,VectorLength);
-      if(vector_rank > 31)
-        tmp+=tmp2;
+  value_type val = value_type();
+  const int vector_length = loop_boundaries.thread.vector_length();
+  const int vector_rank = loop_boundaries.thread.vector_rank();
+
+  iType end = ((loop_boundaries.end+vector_length-1)/vector_length) * vector_length;
+  value_type accum = value_type();
+
+  for ( int i = vector_rank ; i < end ; i += vector_length ) {
+
+    value_type val = 0 ;
+
+    // First acquire per-lane contributions:
+    if ( i < loop_boundaries.end ) lambda( i , val , false );
+
+    value_type sval = val ;
+
+    // Bottom up inclusive scan in triangular pattern
+    // where each thread is the root of a reduction tree
+    // from the zeroth "lane" to itself.
+    //  [t] += [t-1] if t >= 1
+    //  [t] += [t-2] if t >= 2
+    //  [t] += [t-4] if t >= 4
+    //  ...
+
+    for ( int j = 1 ; j < vector_length ; j <<= 1 ) {
+      value_type tmp = 0 ;
+      tmp = shfl_up(sval , j , vector_length );
+      if ( j <= vector_rank ) { sval += tmp ; }
     }
 
-    if (vector_rank >= 32)
-      result_i = tmp;
+    // Include accumulation and remove value for exclusive scan:
+    val = accum + sval - val ;
 
-    val = scan_val + result_i - val;
-    scan_val += shfl(tmp,VectorLength-1,VectorLength);
-    if(_i<loop_boundaries.end)
-      lambda(_i , val , true);
-  }
-#else
-// for kaveri, call the LDS based thread_scan routine
-  for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
-    lambda(i,scan_val,true);
-  }
-  scan_val = loop_boundaries.thread.team_scan(scan_val);
+    // Provide exclusive scan value:
+    if ( i < loop_boundaries.end ) lambda( i , val , true );
 
-#endif
+    // Accumulate the last value in the inclusive scan:
+    sval = shfl( sval , vector_length-1 , vector_length);
+    accum += sval ;
+  }
 }
 
 } // namespace Kokkos
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp
index 7dd69e757..0321f3d53 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp
@@ -57,7 +57,6 @@
 #include <ROCm/Kokkos_ROCm_Tile.hpp>
 #include <ROCm/Kokkos_ROCm_Invoke.hpp>
 #include <ROCm/Kokkos_ROCm_Join.hpp>
-
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 namespace Kokkos {
@@ -75,7 +74,7 @@ T& reduce_value(T* x, std::false_type) [[hc]]
   return *x;
 }
 
-#if KOKKOS_ROCM_HAS_WORKAROUNDS
+#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
 struct always_true
 {
     template<class... Ts>
@@ -149,7 +148,7 @@ void reduce_enqueue(
       // Store the tile result in the global memory.
       if (local == 0)
       {
-#if KOKKOS_ROCM_HAS_WORKAROUNDS
+#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
           // Workaround for assigning from LDS memory: std::copy should work
           // directly
           buffer.action_at(0, [&](T* x)
@@ -158,7 +157,7 @@ void reduce_enqueue(
 // new ROCM 15 address space changes aren't implemented in std algorithms yet
               auto * src = reinterpret_cast<char *>(x);
               auto * dest = reinterpret_cast<char *>(result.data()+tile*output_length);
-              for(int i=0; i<sizeof(T);i++) dest[i] = src[i];
+              for(int i=0; i<sizeof(T)*output_length;i++) dest[i] = src[i];
 #else
               // Workaround: copy_if used to avoid memmove
               std::copy_if(x, x+output_length, result.data()+tile*output_length, always_true{} );
@@ -169,12 +168,10 @@ void reduce_enqueue(
 
 #endif
       }
-      
   });
   if (output_result != nullptr)
      ValueInit::init(ReducerConditional::select(f, reducer), output_result);
   fut.wait();
-
   copy(result,result_cpu.data());
   if (output_result != nullptr) {
     for(std::size_t i=0;i<td.num_tiles;i++)
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp
index 3f67089b9..33efa0d6f 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp
@@ -62,6 +62,76 @@
 namespace Kokkos {
 namespace Impl {
 
+//#if __KALMAR_ACCELERATOR__ == 1
+KOKKOS_INLINE_FUNCTION
+void __syncthreads() [[hc]]
+{
+   amp_barrier(CLK_LOCAL_MEM_FENCE);
+}
+
+#define LT0 ((threadIdx_x+threadIdx_y+threadIdx_z)?0:1)
+
+
+// returns non-zero if and only if predicate is non-zero for all threads
+// note that syncthreads_or uses the first 64 bits of dynamic group memory.
+// this reserved memory must be accounted for everwhere 
+// that get_dynamic_group_segment_base_pointer is called.
+KOKKOS_INLINE_FUNCTION
+uint64_t __syncthreads_or(uint64_t  pred) 
+{
+  uint64_t *shared_var = (uint64_t *)hc::get_dynamic_group_segment_base_pointer();
+  if(LT0) *shared_var = 0;
+  amp_barrier(CLK_LOCAL_MEM_FENCE);
+#if __KALMAR_ACCELERATOR__ == 1
+  if (pred) hc::atomic_or_uint64(shared_var,1);
+#endif
+  amp_barrier(CLK_LOCAL_MEM_FENCE);
+  return (*shared_var);
+}
+
+KOKKOS_INLINE_FUNCTION
+void __threadfence() 
+{
+   amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
+}
+
+KOKKOS_INLINE_FUNCTION
+void __threadfence_block() 
+{
+   amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
+}
+//#endif
+struct ROCm_atomic_CAS {
+    template<class OP>
+    KOKKOS_INLINE_FUNCTION
+    unsigned long operator () (volatile unsigned long * dest, OP &&op){
+       unsigned long read,compare,val;
+       compare = *dest;
+       read = compare;
+       do {
+         compare = read;
+         val = op(compare);
+#if __KALMAR_ACCELERATOR__ == 1
+         hc::atomic_compare_exchange((uint64_t *)dest,&read,val);
+#endif
+       } while (read != compare);
+       return val;
+    }
+};
+
+  template<class OP>
+  KOKKOS_INLINE_FUNCTION
+  unsigned long atomic_cas_op (volatile unsigned long * dest, OP &&op) {
+    ROCm_atomic_CAS cas_op;
+    return cas_op(dest, std::forward<OP>(op));
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  unsigned long atomicInc (volatile unsigned long * dest, const unsigned long& val) {
+    return atomic_cas_op(dest, [=](unsigned long old){return ((old>=val)?0:(old+1));});
+  }
+
+
 //----------------------------------------------------------------------------
 
 template< typename T >
@@ -375,18 +445,7 @@ bool rocm_inter_block_reduction( ROCmTeamMember& team,
 #endif
 }
 #endif
-#if 0
 
-//----------------------------------------------------------------------------
-// See section B.17 of ROCm C Programming Guide Version 3.2
-// for discussion of
-//   __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
-// function qualifier which could be used to improve performance.
-//----------------------------------------------------------------------------
-// Maximize shared memory and minimize L1 cache:
-//   rocmFuncSetCacheConfig(MyKernel, rocmFuncCachePreferShared );
-// For 2.0 capability: 48 KB shared and 16 KB L1
-//----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 /*
  *  Algorithmic constraints:
@@ -406,87 +465,105 @@ void rocm_intra_block_reduce_scan( const FunctorType & functor ,
   typedef typename ValueTraits::pointer_type  pointer_type ;
 
   const unsigned value_count   = ValueTraits::value_count( functor );
-  const unsigned BlockSizeMask = team.team_size() - 1 ;
+  const unsigned BlockSizeMask = blockDim_y  - 1 ;
 
   // Must have power of two thread count
 
-  if ( BlockSizeMask & team.team_size() ) { Kokkos::abort("ROCm::rocm_intra_block_scan requires power-of-two blockDim"); }
+  if ( BlockSizeMask & blockDim_y ) { Kokkos::abort("ROCm::rocm_intra_block_scan requires power-of-two blockDim"); }
 
 #define BLOCK_REDUCE_STEP( R , TD , S )  \
-  if ( ! ( R & ((1<<(S+1))-1) ) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); }
+  if ( ! (( R & ((1<<(S+1))-1) )|(blockDim_y<(1<<(S+1)))) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); }
 
 #define BLOCK_SCAN_STEP( TD , N , S )  \
   if ( N == (1<<S) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S))); }
+#define KOKKOS_IMPL_ROCM_SYNCWF __threadfence_block()
 
-  const unsigned     rtid_intra = team.team_rank() ^ BlockSizeMask ;
-  const pointer_type tdata_intra = base_data + value_count * team.team_rank() ;
+  const unsigned     rtid_intra = threadIdx_y ^ BlockSizeMask ;
+  const pointer_type tdata_intra = base_data + value_count * threadIdx_y ;
 
-  { // Intra-workgroup reduction:
+  { // Intra-workgroup reduction: min blocksize of 64
+    KOKKOS_IMPL_ROCM_SYNCWF;
     BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,0)
+    KOKKOS_IMPL_ROCM_SYNCWF;
     BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,1)
+    KOKKOS_IMPL_ROCM_SYNCWF;
     BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,2)
+    KOKKOS_IMPL_ROCM_SYNCWF;
     BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,3)
+    KOKKOS_IMPL_ROCM_SYNCWF;
     BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,4)
+    KOKKOS_IMPL_ROCM_SYNCWF;
+    BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,5)
+    KOKKOS_IMPL_ROCM_SYNCWF;
   }
 
-  team.team_barrier(); // Wait for all workgroups to reduce
+  __syncthreads(); // Wait for all workgroups to reduce
 
   { // Inter-workgroup reduce-scan by a single workgroup to avoid extra synchronizations
-    const unsigned rtid_inter = ( team.team_rank() ^ BlockSizeMask ) << ROCmTraits::WarpIndexShift ;
+    if(threadIdx_y < value_count) {
+      for(int i=blockDim_y-65; i>0; i-= 64)
+        ValueJoin::join( functor , base_data + (blockDim_y-1)*value_count + threadIdx_y ,  base_data + i*value_count + threadIdx_y );
+    }
+    __syncthreads();
+#if 0
+    const unsigned rtid_inter = ( threadIdx_y ^ BlockSizeMask ) << ROCmTraits::WavefrontIndexShift ;
+
+    if ( rtid_inter < blockDim_y ) {
 
-    if ( rtid_inter < team.team_size() ) {
 
       const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask );
+//
+// remove these comments
+// for rocm, we start with a block size of 64, so the 5 step is already done.
+// The remaining steps are only done if block size is > 64, so we leave them
+// in place until we tune blocksize for performance, then remove the ones 
+// that will never be used.
+//      if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
+//      if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
+//      if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
+//      if ( (1<<9) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,9) }
 
-      if ( (1<<5) < BlockSizeMask ) {                        BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
-      if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
-      if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
-      if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
 
       if ( DoScan ) {
 
-        int n = ( rtid_inter &  32 ) ?  32 : (
-                ( rtid_inter &  64 ) ?  64 : (
+        int n = ( rtid_inter &  64 ) ?  64 : (
                 ( rtid_inter & 128 ) ? 128 : (
-                ( rtid_inter & 256 ) ? 256 : 0 )));
+                ( rtid_inter & 256 ) ? 256 : 0 ));
 
-        if ( ! ( rtid_inter + n < team.team_size() ) ) n = 0 ;
+        if ( ! ( rtid_inter + n < blockDim_y ) ) n = 0 ;
 
         __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,8)
         __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,7)
         __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,6)
-        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,5)
+//        __threadfence_block(); BLOCK_SCAN_STEP(tdata_inter,n,5)
       }
     }
+#endif
   }
 
-  team.team_barrier(); // Wait for inter-workgroup reduce-scan to complete
+  __syncthreads(); // Wait for inter-workgroup reduce-scan to complete
 
   if ( DoScan ) {
     int n = ( rtid_intra &  1 ) ?  1 : (
             ( rtid_intra &  2 ) ?  2 : (
             ( rtid_intra &  4 ) ?  4 : (
             ( rtid_intra &  8 ) ?  8 : (
-            ( rtid_intra & 16 ) ? 16 : 0 ))));
-
-    if ( ! ( rtid_intra + n < team.team_size() ) ) n = 0 ;
-    #ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
-    BLOCK_SCAN_STEP(tdata_intra,n,4) team.team_barrier();//__threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,3) team.team_barrier();//__threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,2) team.team_barrier();//__threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,1) team.team_barrier();//__threadfence_block();
-    BLOCK_SCAN_STEP(tdata_intra,n,0) team.team_barrier();
-    #else
-    BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
+            ( rtid_intra & 16 ) ? 16 : (
+            ( rtid_intra & 32 ) ? 32 : 0 )))));
+
+    if ( ! ( rtid_intra + n < blockDim_y ) ) n = 0 ;
+
+//    BLOCK_SCAN_STEP(tdata_intra,n,5) __threadfence_block();
+//    BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
     BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block();
     BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block();
     BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block();
     BLOCK_SCAN_STEP(tdata_intra,n,0) __threadfence_block();
-    #endif
   }
 
 #undef BLOCK_SCAN_STEP
 #undef BLOCK_REDUCE_STEP
+#undef KOKKOS_IMPL_ROCM_SYNCWF
 }
 
 //----------------------------------------------------------------------------
@@ -497,16 +574,18 @@ void rocm_intra_block_reduce_scan( const FunctorType & functor ,
  *
  *  Global reduce result is in the last threads' 'shared_data' location.
  */
+using ROCM  = Kokkos::Experimental::ROCm ;
+
 template< bool DoScan , class FunctorType , class ArgTag >
 KOKKOS_INLINE_FUNCTION
 bool rocm_single_inter_block_reduce_scan( const FunctorType     & functor ,
-                                          const ROCm::size_type   block_id ,
-                                          const ROCm::size_type   block_count ,
-                                          ROCm::size_type * const shared_data ,
-                                          ROCm::size_type * const global_data ,
-                                          ROCm::size_type * const global_flags )
+                                          const ROCM::size_type   block_id ,
+                                          const ROCM::size_type   block_count ,
+                                          typename FunctorValueTraits<FunctorType, ArgTag>::value_type * const shared_data ,
+                                          typename FunctorValueTraits<FunctorType, ArgTag>::value_type * const global_data ,
+                                          ROCM::size_type * const global_flags )
 {
-  typedef ROCm::size_type                  size_type ;
+  typedef ROCM::size_type                  size_type ;
   typedef FunctorValueTraits< FunctorType , ArgTag >  ValueTraits ;
   typedef FunctorValueJoin<   FunctorType , ArgTag >  ValueJoin ;
   typedef FunctorValueInit<   FunctorType , ArgTag >  ValueInit ;
@@ -517,16 +596,17 @@ bool rocm_single_inter_block_reduce_scan( const FunctorType     & functor ,
   typedef typename ValueTraits::value_type      value_type ;
 
   // '__ffs' = position of the least significant bit set to 1.
-  // 'team.team_size()' is guaranteed to be a power of two so this
+  // blockDim_y is guaranteed to be a power of two so this
   // is the integral shift value that can replace an integral divide.
-  const unsigned BlockSizeShift = __ffs( team.team_size() ) - 1 ;
-  const unsigned BlockSizeMask  = team.team_size() - 1 ;
+  //  const unsigned long BlockSizeShift = __ffs( blockDim_y ) - 1 ;
+  const unsigned long BlockSizeShift = __lastbit_u32_u32( blockDim_y )  ;
+  const unsigned long BlockSizeMask  = blockDim_y - 1 ;
 
   // Must have power of two thread count
-  if ( BlockSizeMask & team.team_size() ) { Kokkos::abort("ROCm::rocm_single_inter_block_reduce_scan requires power-of-two blockDim"); }
+  if ( BlockSizeMask & blockDim_y ) { Kokkos::abort("ROCm::rocm_single_inter_block_reduce_scan requires power-of-two blockDim"); }
 
-  const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
-    word_count( ValueTraits::value_size( functor ) / sizeof(size_type) );
+  const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(value_type) >
+    word_count( ValueTraits::value_size( functor )/ sizeof(value_type) );
 
   // Reduce the accumulation for the entire block.
   rocm_intra_block_reduce_scan<false,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
@@ -534,54 +614,47 @@ bool rocm_single_inter_block_reduce_scan( const FunctorType     & functor ,
   {
     // Write accumulation total to global scratch space.
     // Accumulation total is the last thread's data.
-    size_type * const shared = shared_data + word_count.value * BlockSizeMask ;
-    size_type * const global = global_data + word_count.value * block_id ;
-
-#if (__ROCM_ARCH__ < 500)
-    for ( size_type i = team.team_rank() ; i < word_count.value ; i += team.team_size() ) { global[i] = shared[i] ; }
-#else
-    for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; }
-#endif
+    value_type * const shared = shared_data +  
+                                   word_count.value * BlockSizeMask ;
+    value_type * const global = global_data + word_count.value * block_id ;
 
+    for ( int i = int(threadIdx_y) ; i < word_count.value ; i += blockDim_y ) { global[i] = shared[i] ; }
   }
 
   // Contributing blocks note that their contribution has been completed via an atomic-increment flag
   // If this block is not the last block to contribute to this group then the block is done.
-    team.team_barrier();
+    
   const bool is_last_block =
-    ! team.team_reduce( team.team_rank() ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) ,Impl::JoinAdd<ValueType>());
-
+    !  __syncthreads_or( threadIdx_y ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) );
   if ( is_last_block ) {
 
-    const size_type b = ( long(block_count) * long(team.team_rank()) ) >> BlockSizeShift ;
-    const size_type e = ( long(block_count) * long( team.team_rank() + 1 ) ) >> BlockSizeShift ;
+    const size_type b = ( long(block_count) * long(threadIdx_y )) >> BlockSizeShift ;
+    const size_type e = ( long(block_count) * long(threadIdx_y + 1 ) ) >> BlockSizeShift ;
 
     {
-      void * const shared_ptr = shared_data + word_count.value * team.team_rank() ;
-      reference_type shared_value = ValueInit::init( functor , shared_ptr );
+      value_type * const shared_ptr = shared_data + word_count.value * threadIdx_y ;
+      ValueInit::init( functor , shared_ptr );
+
 
       for ( size_type i = b ; i < e ; ++i ) {
         ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i );
       }
     }
-
     rocm_intra_block_reduce_scan<DoScan,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
 
     if ( DoScan ) {
+      value_type * const shared_value = shared_data + word_count.value * ( threadIdx_y ? threadIdx_y - 1 : blockDim_y );
 
-      size_type * const shared_value = shared_data + word_count.value * ( team.team_rank() ? team.team_rank() - 1 : team.team_size() );
-
-      if ( ! team.team_rank() ) { ValueInit::init( functor , shared_value ); }
+      if ( ! threadIdx_y ) { ValueInit::init( functor , shared_value ); }
 
       // Join previous inclusive scan value to each member
       for ( size_type i = b ; i < e ; ++i ) {
-        size_type * const global_value = global_data + word_count.value * i ;
+        value_type * const global_value = global_data + word_count.value * i ;
         ValueJoin::join( functor , shared_value , global_value );
         ValueOps ::copy( functor , global_value , shared_value );
       }
     }
   }
-
   return is_last_block ;
 }
 
@@ -592,7 +665,6 @@ unsigned rocm_single_inter_block_reduce_scan_shmem( const FunctorType & functor
 {
   return ( BlockSize + 2 ) * Impl::FunctorValueTraits< FunctorType , ArgTag >::value_size( functor );
 }
-#endif 
 
 } // namespace Impl
 } // namespace Kokkos
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp
index 9890598bc..f24db42ce 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp
@@ -98,7 +98,109 @@ void scan_enqueue(
             {
                auto j = i + d - 1;
                auto k = i + d2 - 1;
-//               join(k, j);  // no longer needed with ROCm 1.6
+
+               ValueJoin::join(f, &buffer[k], &buffer[j]);
+            }
+        }
+        t_idx.barrier.wait();
+
+        result[tile] = buffer[buffer.size()-1];
+        buffer[buffer.size()-1] = 0;
+        // Down sweep phase
+        for(std::size_t d=buffer.size()/2;d>0;d/=2)
+        {
+            auto d2 = 2*d;
+            auto i = local*d2;
+            if(i<len)
+            {
+               auto j = i + d - 1;
+               auto k = i + d2 - 1;
+               auto t = buffer[k];
+
+               ValueJoin::join(f, &buffer[k], &buffer[j]);
+               buffer[j] = t;
+            }
+            t_idx.barrier.wait();
+        }
+        // Copy tiles into global memory
+        if (global < len) scratch[global] = buffer[local];
+    }).wait();
+    copy(result,result_cpu.data());
+
+   for(int i=1; i<td.num_tiles; i++)
+      ValueJoin::join(f, &result_cpu[i], &result_cpu[i-1]);
+
+    copy(result_cpu.data(),result);
+    size_t launch_len = (((len - 1) / td.tile_size) + 1) * td.tile_size;
+    hc::parallel_for_each(hc::extent<1>(launch_len).tile(td.tile_size), [&,f,len,td](hc::tiled_index<1> t_idx) [[hc]] 
+    {
+        const auto global = t_idx.global[0];
+        const auto tile = t_idx.tile[0];
+
+        if (global < len) 
+        {
+            auto final_state = scratch[global];
+
+            if (tile != 0) ValueJoin::join(f, &final_state, &result[tile-1]);
+            rocm_invoke<Tag>(f, transform_index(t_idx, td.tile_size, td.num_tiles), final_state, true);
+        }
+    }).wait();
+}
+
+template< class Tag, class ReturnType, class F, class TransformIndex>
+void scan_enqueue(
+  const int len,
+  const F & f,
+  ReturnType & return_val,
+  TransformIndex transform_index)
+{
+    typedef Kokkos::Impl::FunctorValueTraits< F, Tag>  ValueTraits;
+    typedef Kokkos::Impl::FunctorValueInit<   F, Tag>  ValueInit;
+    typedef Kokkos::Impl::FunctorValueJoin<   F, Tag>  ValueJoin;
+    typedef Kokkos::Impl::FunctorValueOps<    F, Tag>  ValueOps;
+
+    typedef typename ValueTraits::value_type    value_type;
+    typedef typename ValueTraits::pointer_type    pointer_type;
+    typedef typename ValueTraits::reference_type  reference_type;
+
+    const auto td = get_tile_desc<value_type>(len);
+    std::vector<value_type> result_cpu(td.num_tiles);
+    hc::array<value_type> result(td.num_tiles);
+    hc::array<value_type> scratch(len);
+    std::vector<ReturnType> total_cpu(1);
+    hc::array<ReturnType> total(1);
+
+    tile_for<value_type>(td, [&,f,len,td](hc::tiled_index<1> t_idx, tile_buffer<value_type> buffer) [[hc]] 
+    {
+        const auto local = t_idx.local[0];
+        const auto global = t_idx.global[0];
+        const auto tile = t_idx.tile[0];
+
+        // Join tile buffer elements
+        const auto join = [&](std::size_t i, std::size_t j)
+        {
+            buffer.action_at(i, j, [&](value_type& x, const value_type& y)
+            {
+                ValueJoin::join(f, &x, &y);
+            });
+        };
+
+        // Copy into tile
+        buffer.action_at(local, [&](value_type& state)
+        {
+            ValueInit::init(f, &state);
+            if (global < len) rocm_invoke<Tag>(f, transform_index(t_idx, td.tile_size, td.num_tiles), state, false);
+        });
+        t_idx.barrier.wait();
+        // Up sweep phase
+        for(std::size_t d=1;d<buffer.size();d*=2)
+        {
+            auto d2 = 2*d;
+            auto i = local*d2;
+            if(i<len)
+            {
+               auto j = i + d - 1;
+               auto k = i + d2 - 1;
                ValueJoin::join(f, &buffer[k], &buffer[j]);
             }
         }
@@ -116,7 +218,6 @@ void scan_enqueue(
                auto j = i + d - 1;
                auto k = i + d2 - 1;
                auto t = buffer[k];
-//               join(k, j);  // no longer needed with ROCm 1.6
                ValueJoin::join(f, &buffer[k], &buffer[j]);
                buffer[j] = t;
             }
@@ -127,17 +228,13 @@ void scan_enqueue(
     }).wait();
     copy(result,result_cpu.data());
 
-//  The std::partial_sum was segfaulting, despite that this is cpu code.
-//   if(td.num_tiles>1)
-//      std::partial_sum(result_cpu.data(), result_cpu.data()+(td.num_tiles-1)*sizeof(value_type), result_cpu.data(), make_join_operator<ValueJoin>(f));
-// use this implementation instead.
    for(int i=1; i<td.num_tiles; i++)
       ValueJoin::join(f, &result_cpu[i], &result_cpu[i-1]);
 
     copy(result_cpu.data(),result);
-    hc::parallel_for_each(hc::extent<1>(len).tile(td.tile_size), [&,f,len,td](hc::tiled_index<1> t_idx) [[hc]] 
+    size_t launch_len = (((len - 1) / td.tile_size) + 1) * td.tile_size;
+    hc::parallel_for_each(hc::extent<1>(launch_len).tile(td.tile_size), [&,f,len,td](hc::tiled_index<1> t_idx) [[hc]] 
     {
-//        const auto local = t_idx.local[0];
         const auto global = t_idx.global[0];
         const auto tile = t_idx.tile[0];
 
@@ -145,12 +242,13 @@ void scan_enqueue(
         {
             auto final_state = scratch[global];
 
-// the join is locking up, at least with 1.6
-            if (tile != 0) final_state += result[tile-1];
-//            if (tile != 0) ValueJoin::join(f, &final_state, &result[tile-1]);
+            if (tile != 0) ValueJoin::join(f, &final_state, &result[tile-1]);
             rocm_invoke<Tag>(f, transform_index(t_idx, td.tile_size, td.num_tiles), final_state, true);
+            if(global==(len-1))  total[0] = final_state;
         }
     }).wait();
+    copy(total,total_cpu.data());
+    return_val = total_cpu[0];
 }
 
 } // namespace Impl
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp
index 12f34373c..2fe0c4192 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Space.cpp
@@ -362,6 +362,8 @@ SharedAllocationRecord( const Kokkos::Experimental::ROCmSpace & arg_space
           , arg_label.c_str()
           , SharedAllocationHeader::maximum_label_length
           );
+  // Set last element zero, in case c_str is too long
+  header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
 
   // Copy to device memory
   Kokkos::Impl::DeepCopy<Kokkos::Experimental::ROCmSpace,HostSpace>( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
@@ -399,6 +401,8 @@ SharedAllocationRecord( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_sp
           , arg_label.c_str()
           , SharedAllocationHeader::maximum_label_length
           );
+  // Set last element zero, in case c_str is too long
+  RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
 }
 
 //----------------------------------------------------------------------------
diff --git a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp
index db16db3f9..b4436ae15 100644
--- a/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp
+++ b/packages/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp
@@ -278,7 +278,7 @@ struct single_action
     void action_at(std::size_t i, Action a) [[hc]]
     {
         auto& value = static_cast<Derived&>(*this)[i];
-#if KOKKOS_ROCM_HAS_WORKAROUNDS
+#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
         T state = value;
         a(state);
         value = state;
@@ -347,7 +347,7 @@ struct tile_buffer<T[]>
 #if defined (ROCM15)
         a(value);
 #else
-#if KOKKOS_ROCM_HAS_WORKAROUNDS
+#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
         if (m > get_max_tile_array_size()) return;
         T state[get_max_tile_array_size()];
         // std::copy(value, value+m, state);
@@ -372,7 +372,6 @@ struct tile_buffer<T[]>
 #if defined (ROCM15)
         a(value);
 #else
-//#if KOKKOS_ROCM_HAS_WORKAROUNDS
         if (m > get_max_tile_array_size()) return;
         T state[get_max_tile_array_size()];
         // std::copy(value, value+m, state);
diff --git a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
index e63f868c5..e88abdba5 100644
--- a/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
+++ b/packages/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
@@ -175,6 +175,27 @@ public:
 #endif
   }
 
+  template<class Closure, class ValueType>
+  KOKKOS_INLINE_FUNCTION
+  void team_broadcast(Closure const & f, ValueType& value, const int& thread_id) const
+  {
+#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+    { }
+#else
+    // Make sure there is enough scratch space:
+    typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE
+                         , ValueType , void >::type type ;
+    f( value );
+    if ( m_team_base ) {
+      type * const local_value = ((type*) m_team_base[0]->scratch_memory());
+      if(team_rank() == thread_id) *local_value = value;
+      memory_fence();
+      team_barrier();
+      value = *local_value;
+    }
+#endif
+  }
+  
   template< typename Type >
   KOKKOS_INLINE_FUNCTION
   typename std::enable_if< !Kokkos::is_reducer< Type >::value , Type>::type
@@ -626,39 +647,77 @@ public:
 
   //----------------------------------------
 
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
   template< class FunctorType >
   inline static
   int team_size_max( const FunctorType & ) {
-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
-      int pool_size = traits::execution_space::thread_pool_size(1);
-#else
-      int pool_size = traits::execution_space::impl_thread_pool_size(1);
-#endif
-      int max_host_team_size =  Impl::HostThreadTeamData::max_team_members;
-      return pool_size<max_host_team_size?pool_size:max_host_team_size;
-    }
-
+    int pool_size = traits::execution_space::thread_pool_size(1);
+    int max_host_team_size =  Impl::HostThreadTeamData::max_team_members;
+    return pool_size<max_host_team_size?pool_size:max_host_team_size;
+  }
 
   template< class FunctorType >
-  static int team_size_recommended( const FunctorType & )
-    {
-#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
-      return traits::execution_space::thread_pool_size(2);
-#else
-      return traits::execution_space::impl_thread_pool_size(2);
-#endif
-    }
-
+  inline static
+  int team_size_recommended( const FunctorType & )
+  {
+    return traits::execution_space::thread_pool_size(2);
+  }
 
   template< class FunctorType >
   inline static
   int team_size_recommended( const FunctorType &, const int& )
-    {
+  {
+    return traits::execution_space::thread_pool_size(2);
+  }
+#endif
+
+  template<class FunctorType>
+  int team_size_max( const FunctorType&, const ParallelForTag& ) const {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    int pool_size = traits::execution_space::thread_pool_size(1);
+#else
+    int pool_size = traits::execution_space::impl_thread_pool_size(1);
+#endif
+    int max_host_team_size =  Impl::HostThreadTeamData::max_team_members;
+    return pool_size<max_host_team_size?pool_size:max_host_team_size;
+  }
+  template<class FunctorType>
+  int team_size_max( const FunctorType&, const ParallelReduceTag& ) const {
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
-      return traits::execution_space::thread_pool_size(2);
+    int pool_size = traits::execution_space::thread_pool_size(1);
 #else
-      return traits::execution_space::impl_thread_pool_size(2);
+    int pool_size = traits::execution_space::impl_thread_pool_size(1);
 #endif
+    int max_host_team_size =  Impl::HostThreadTeamData::max_team_members;
+    return pool_size<max_host_team_size?pool_size:max_host_team_size;
+  }
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType&, const ParallelForTag& ) const {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    return traits::execution_space::thread_pool_size(2);
+#else
+    return traits::execution_space::impl_thread_pool_size(2);
+#endif
+  }
+  template<class FunctorType>
+  int team_size_recommended( const FunctorType&, const ParallelReduceTag& ) const {
+#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
+    return traits::execution_space::thread_pool_size(2);
+#else
+    return traits::execution_space::impl_thread_pool_size(2);
+#endif
+  }
+
+
+  inline static
+  int vector_length_max()
+    { return 1024; } // Use arbitrary large number, is meant as a vectorizable length
+
+  inline static
+  int scratch_size_max(int level)
+    { return (level==0?
+        1024*32: // Roughly L1 size
+        20*1024*1024); // Limit to keep compatibility with CUDA
     }
 
   //----------------------------------------
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp b/packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp
similarity index 100%
rename from packages/kokkos/core/src/eti/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp
rename to packages/kokkos/core/src/eti/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Makefile.eti_Experimental::ROCm b/packages/kokkos/core/src/eti/ROCm/Makefile.eti_Experimental::ROCm
deleted file mode 100644
index 054360fd1..000000000
--- a/packages/kokkos/core/src/eti/ROCm/Makefile.eti_Experimental::ROCm
+++ /dev/null
@@ -1,288 +0,0 @@
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp
-Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_Experimental::ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/eti/ROCm/Makefile.eti_ROCm b/packages/kokkos/core/src/eti/ROCm/Makefile.eti_ROCm
new file mode 100644
index 000000000..0423c6feb
--- /dev/null
+++ b/packages/kokkos/core/src/eti/ROCm/Makefile.eti_ROCm
@@ -0,0 +1,288 @@
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp
+Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/ROCm/Kokkos_ROCm_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
index ad115dd8f..e2028db8c 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp
@@ -107,7 +107,12 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
   int done = 0;
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+  unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
   unsigned int done_active = 0;
   while (active!=done_active) {
     if(!done) {
@@ -119,7 +124,11 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
         done = 1;
       }
     }
-    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
+#else
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+#endif
   }
   return return_val;
 }
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
index 801a8091d..4e41cb125 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp
@@ -130,7 +130,12 @@ T atomic_exchange( volatile T * const dest ,
 #endif
 
   int done = 0;
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+  unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
   unsigned int done_active = 0;
   while (active!=done_active) {
     if(!done) {
@@ -141,7 +146,11 @@ T atomic_exchange( volatile T * const dest ,
         done = 1;
       }
     }
-    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
+#else
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+#endif
   }
   return return_val;
 }
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
index 8249e709d..e2e23bb5f 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp
@@ -143,7 +143,12 @@ T atomic_fetch_add( volatile T * const dest ,
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
   int done = 0;
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+  unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
   unsigned int done_active = 0;
   while (active!=done_active) {
     if(!done) {
@@ -155,7 +160,12 @@ T atomic_fetch_add( volatile T * const dest ,
         done = 1;
       }
     }
-    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
+
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
+#else
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+#endif
   }
   return return_val;
 }
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
index 3f58c5539..dd69c967c 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp
@@ -135,7 +135,12 @@ T atomic_fetch_sub( volatile T * const dest ,
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
   int done = 0;
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+  unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
   unsigned int done_active = 0;
   while (active!=done_active) {
     if(!done) {
@@ -146,7 +151,11 @@ T atomic_fetch_sub( volatile T * const dest ,
         done = 1;
       }
     }
-    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
+#else
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+#endif
   }
   return return_val;
 }
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
index 6140d4589..74e9db303 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp
@@ -246,7 +246,12 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
   // This is a way to (hopefully) avoid dead lock in a warp
   T return_val;
   int done = 0;
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+  unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
   unsigned int done_active = 0;
   while (active!=done_active) {
     if(!done) {
@@ -257,7 +262,11 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
         done=1;
       }
     }
-    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
+#else
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+#endif
   }
   return return_val;
 #endif
@@ -285,7 +294,12 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
   T return_val;
   // This is a way to (hopefully) avoid dead lock in a warp
   int done = 0;
-  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+  unsigned int mask = KOKKOS_IMPL_CUDA_ACTIVEMASK;
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,1);
+#else
+  unsigned int active = KOKKOS_IMPL_CUDA_BALLOT_MASK(1);
+#endif
   unsigned int done_active = 0;
   while (active!=done_active) {
     if(!done) {
@@ -296,7 +310,11 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
         done=1;
       }
     }
-    done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
+#ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(mask,done);
+#else
+    done_active = KOKKOS_IMPL_CUDA_BALLOT_MASK(done);
+#endif
   }
   return return_val;
 #endif
diff --git a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
index eeec2d1f4..b18134f40 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
@@ -45,13 +45,17 @@
 
 #ifdef _WIN32
 
+#ifndef NOMINMAX
 #define NOMINMAX
+#endif
 #include <winsock2.h>
-#include <Windows.h>
+#include <windows.h>
 
 namespace Kokkos {
   namespace Impl {
+#ifdef _MSC_VER
     _declspec(align(16))
+#endif
     struct cas128_t
     {
       LONGLONG lower;
@@ -60,7 +64,11 @@ namespace Kokkos {
         bool operator != (const cas128_t& a) const {
         return (lower != a.lower) || upper != a.upper;
       }
-    };
+    }
+#ifdef __GNUC__
+    __attribute__ ((aligned (16)))
+#endif
+    ;
   }
 
   template < typename T >
diff --git a/packages/kokkos/core/src/impl/Kokkos_Core.cpp b/packages/kokkos/core/src/impl/Kokkos_Core.cpp
index 8184dad3e..628e070a0 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Core.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Core.cpp
@@ -780,8 +780,20 @@ void print_configuration( std::ostream & out , const bool detail )
 #else
   msg << "no" << std::endl;
 #endif
-  msg << "  KOKKOS_ENABLE_CXX1Z: ";
-#ifdef KOKKOS_ENABLE_CXX1Z
+  msg << "  KOKKOS_ENABLE_CXX14: ";
+#ifdef KOKKOS_ENABLE_CXX14
+  msg << "yes" << std::endl;
+#else
+  msg << "no" << std::endl;
+#endif
+  msg << "  KOKKOS_ENABLE_CXX17: ";
+#ifdef KOKKOS_ENABLE_CXX17
+  msg << "yes" << std::endl;
+#else
+  msg << "no" << std::endl;
+#endif
+  msg << "  KOKKOS_ENABLE_CXX20: ";
+#ifdef KOKKOS_ENABLE_CXX20
   msg << "yes" << std::endl;
 #else
   msg << "no" << std::endl;
diff --git a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
index 699902e32..680e937db 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
@@ -235,6 +235,8 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
           , arg_label.c_str()
           , SharedAllocationHeader::maximum_label_length
           );
+  // Set last element zero, in case c_str is too long
+  RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
 }
 
 //----------------------------------------------------------------------------
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp
index da9ce6b9f..d8cb7593b 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostSpace.cpp
@@ -356,6 +356,8 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space
           , arg_label.c_str()
           , SharedAllocationHeader::maximum_label_length
           );
+  // Set last element zero, in case c_str is too long
+  RecordBase::m_alloc_ptr->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char) 0;
 }
 
 //----------------------------------------------------------------------------
diff --git a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp
index 558eef9e4..fff48e87f 100644
--- a/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp
@@ -144,6 +144,30 @@ public:
 
     return m_team_rank == 0;
   }
+  
+  inline
+  bool team_rendezvous(const int source_team_rank) const noexcept
+  {
+    int * ptr = (int *)(m_team_scratch + m_team_rendezvous);
+    HostBarrier::split_arrive( ptr
+                             , m_team_size
+                             , m_team_rendezvous_step
+                             );
+    if (m_team_rank != source_team_rank) {
+      HostBarrier::wait( ptr
+                       , m_team_size
+                       , m_team_rendezvous_step
+                       );
+    }
+    else {
+      HostBarrier::split_master_wait( ptr
+                                    , m_team_size
+                                    , m_team_rendezvous_step
+                                    );
+    }
+
+    return (m_team_rank == source_team_rank);
+  }
 
   inline
   void team_rendezvous_release() const noexcept
@@ -540,15 +564,16 @@ public:
     {
       if ( 1 < m_data.m_team_size ) {
         T volatile * const shared_value = (T*) m_data.team_reduce();
-
+		
         // Don't overwrite shared memory until all threads arrive
 
-        if ( m_data.team_rendezvous() ) {
+        if ( m_data.team_rendezvous(source_team_rank) ) {
+
           // All threads have entered 'team_rendezvous'
           // only this thread returned from 'team_rendezvous'
           // with a return value of 'true'
 
-          *shared_value = value ;
+          *shared_value = value;
 
           m_data.team_rendezvous_release();
           // This thread released all other threads from 'team_rendezvous'
@@ -574,7 +599,7 @@ public:
 
       // Don't overwrite shared memory until all threads arrive
 
-      if ( m_data.team_rendezvous() ) {
+      if ( m_data.team_rendezvous(source_team_rank) ) {
 
         // All threads have entered 'team_rendezvous'
         // only this thread returned from 'team_rendezvous'
diff --git a/packages/kokkos/core/src/impl/Kokkos_OldMacros.hpp b/packages/kokkos/core/src/impl/Kokkos_OldMacros.hpp
index bad158c29..c3198c0f1 100644
--- a/packages/kokkos/core/src/impl/Kokkos_OldMacros.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_OldMacros.hpp
@@ -142,18 +142,13 @@
 #endif
 #endif
 
-#ifdef KOKKOS_HAVE_CXX1Z
+
+#if defined(KOKKOS_HAVE_CXX1Z) || defined(KOKKOS_ENABLE_CXX17)
 #ifndef KOKKOS_ENABLE_CXX1Z
 #define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z
 #endif
 #endif
 
-#ifdef KOKKOS_HAVE_DEBUG
-#ifndef KOKKOS_DEBUG
-#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
-#endif
-#endif
-
 #ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
 #ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
 #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
@@ -482,6 +477,12 @@
 #define KOKKOS_HAVE_DEBUG 1
 #endif
 
+#ifdef KOKKOS_HAVE_DEBUG
+#ifndef KOKKOS_DEBUG
+#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
+#endif
+#endif
+
 #if (!defined(KOKKOS_HAVE_HWLOC)) && defined(KOKKOS_ENABLE_HWLOC)
 #define KOKKOS_HAVE_HWLOC 1
 #endif
diff --git a/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp
index 7b85909ed..d84a85462 100644
--- a/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_Serial_Task.cpp
@@ -60,10 +60,10 @@ template class TaskQueue< Kokkos::Serial > ;
 void TaskQueueSpecialization< Kokkos::Serial >::execute
   ( TaskQueue< Kokkos::Serial > * const queue )
 {
-  using execution_space = Kokkos::Serial ;
-  using queue_type      = TaskQueue< execution_space > ;
+  using exec_space = Kokkos::Serial ;
+  using tqs_queue_type      = TaskQueue< exec_space > ;
   using task_root_type  = TaskBase< void , void , void > ;
-  using Member          = Impl::HostThreadTeamMember< execution_space > ;
+  using Member          = Impl::HostThreadTeamMember< exec_space > ;
 
   task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
 
@@ -83,9 +83,9 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute
 
     task_root_type * task = end ;
 
-    for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
+    for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) {
       for ( int j = 0 ; j < 2 && end == task ; ++j ) {
-        task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
+        task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] );
       }
     }
 
@@ -120,10 +120,10 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
   iff_single_thread_recursive_execute(
     TaskQueue< Kokkos::Serial > * const queue )
 {
-  using execution_space = Kokkos::Serial ;
-  using queue_type      = TaskQueue< execution_space > ;
+  using exec_space = Kokkos::Serial ;
+  using tqs_queue_type      = TaskQueue< exec_space > ;
   using task_root_type  = TaskBase< void , void , void > ;
-  using Member          = Impl::HostThreadTeamMember< execution_space > ;
+  using Member          = Impl::HostThreadTeamMember< exec_space > ;
 
   task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
 
@@ -139,9 +139,9 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
 
     task = end ;
 
-    for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) {
+    for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) {
       for ( int j = 0 ; j < 2 && end == task ; ++j ) {
-        task = queue_type::pop_ready_task( & queue->m_ready[i][j] );
+        task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] );
       }
     }
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
index 3c306ec94..d4e3a03d3 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewArray.hpp
@@ -123,8 +123,8 @@ private:
 
   typedef typename Traits::value_type::pointer handle_type ;
 
-  handle_type  m_handle ;
-  offset_type  m_offset ;
+  handle_type  m_impl_handle ;
+  offset_type  m_impl_offset ;
   size_t       m_stride ;
 
   typedef typename Traits::value_type::value_type scalar_type ;
@@ -140,8 +140,8 @@ private:
 
   KOKKOS_INLINE_FUNCTION
   ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
-    : m_handle( arg_handle )
-    , m_offset( arg_offset )
+    : m_impl_handle( arg_handle )
+    , m_impl_offset( arg_offset )
     , m_stride( is_contiguous_reference ? 0 : arg_offset.span() )
     {}
 
@@ -154,44 +154,44 @@ public:
 
   template< typename iType >
   KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
-    { return m_offset.m_dim.extent(r); }
+    { return m_impl_offset.m_dim.extent(r); }
 
   KOKKOS_INLINE_FUNCTION constexpr
   typename Traits::array_layout layout() const
-    { return m_offset.layout(); }
+    { return m_impl_offset.layout(); }
 
 
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_impl_offset.dimension_0(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_impl_offset.dimension_1(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_impl_offset.dimension_2(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_impl_offset.dimension_3(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_impl_offset.dimension_4(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_impl_offset.dimension_5(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_impl_offset.dimension_6(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_impl_offset.dimension_7(); }
 
   // Is a regular layout with uniform striding for each index.
   using is_regular = typename offset_type::is_regular ;
 
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_impl_offset.stride_0(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_impl_offset.stride_1(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_impl_offset.stride_2(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_impl_offset.stride_3(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_impl_offset.stride_4(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_impl_offset.stride_5(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_impl_offset.stride_6(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_impl_offset.stride_7(); }
 
   //----------------------------------------
   // Range span
 
   /** \brief  Span of the mapped range */
   KOKKOS_INLINE_FUNCTION constexpr size_t span() const
-    { return m_offset.span() * Array_N ; }
+    { return m_impl_offset.span() * Array_N ; }
 
   /** \brief  Is the mapped range span contiguous */
   KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
-    { return m_offset.span_is_contiguous(); }
+    { return m_impl_offset.span_is_contiguous(); }
 
   typedef typename std::conditional< is_contiguous_reference , contiguous_reference , strided_reference >::type  reference_type ;
 
@@ -199,63 +199,63 @@ public:
 
   /** \brief  If data references are lvalue_reference than can query pointer to memory */
   KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
-    { return m_handle ; }
+    { return m_impl_handle ; }
 
   //----------------------------------------
   // The View class performs all rank and bounds checking before
   // calling these element reference methods.
 
   KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference() const { return reference_type( m_handle + 0 , Array_N , 0 ); }
+  reference_type reference() const { return reference_type( m_impl_handle + 0 , Array_N , 0 ); }
 
   template< typename I0 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type
   reference( const I0 & i0 ) const
-    { return reference_type( m_handle + m_offset(i0) * Array_S , Array_N , m_stride ); }
+    { return reference_type( m_impl_handle + m_impl_offset(i0) * Array_S , Array_N , m_stride ); }
 
   template< typename I0 , typename I1 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 ) const
-    { return reference_type( m_handle + m_offset(i0,i1) * Array_S , Array_N , m_stride ); }
+    { return reference_type( m_impl_handle + m_impl_offset(i0,i1) * Array_S , Array_N , m_stride ); }
 
   template< typename I0 , typename I1 , typename I2 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
-    { return reference_type( m_handle + m_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); }
+    { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); }
 
   template< typename I0 , typename I1 , typename I2 , typename I3 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
-    { return reference_type( m_handle + m_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); }
+    { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
           , typename I4 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
                           , const I4 & i4 ) const
-    { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); }
+    { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
           , typename I4 , typename I5 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
                           , const I4 & i4 , const I5 & i5 ) const
-    { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); }
+    { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
           , typename I4 , typename I5 , typename I6 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
                           , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
-    { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); }
+    { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
           , typename I4 , typename I5 , typename I6 , typename I7 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
                           , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
-    { return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); }
+    { return reference_type( m_impl_handle + m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); }
 
   //----------------------------------------
 
@@ -269,31 +269,31 @@ public:
   /** \brief  Span, in bytes, of the referenced memory */
   KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
     {
-      return ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
+      return ( m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
     }
 
   //----------------------------------------
 
   KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
-  KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset(), m_stride(0) {}
+  KOKKOS_INLINE_FUNCTION ViewMapping() : m_impl_handle(), m_impl_offset(), m_stride(0) {}
   KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
-    : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
+    : m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ), m_stride( rhs.m_stride ) {}
   KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
-    { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; ; return *this ; }
+    { m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; m_stride = rhs.m_stride ; ; return *this ; }
 
   KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
-    : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
+    : m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ), m_stride( rhs.m_stride ) {}
   KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
-    { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; return *this ; }
+    { m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; m_stride = rhs.m_stride ; return *this ; }
 
   //----------------------------------------
 
   template< class ... Args >
   KOKKOS_INLINE_FUNCTION
   ViewMapping( pointer_type ptr , Args ... args )
-    : m_handle( ptr )
-    , m_offset( std::integral_constant< unsigned , 0 >() , args... )
-    , m_stride( m_offset.span() )
+    : m_impl_handle( ptr )
+    , m_impl_offset( std::integral_constant< unsigned , 0 >() , args... )
+    , m_stride( m_impl_offset.span() )
     {}
 
   //----------------------------------------
@@ -315,10 +315,10 @@ public:
     typedef std::integral_constant< unsigned ,
       alloc_prop::allow_padding ? sizeof(scalar_type) : 0 > padding ;
 
-    m_offset = offset_type( padding(), arg_layout );
+    m_impl_offset = offset_type( padding(), arg_layout );
 
     const size_t alloc_size =
-      ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
+      ( m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
 
     // Allocate memory from the memory space and create tracking record.
     record_type * const record =
@@ -327,14 +327,14 @@ public:
                            , alloc_size );
 
     if ( alloc_size ) {
-      m_handle =
+      m_impl_handle =
         handle_type( reinterpret_cast< pointer_type >( record->data() ) );
 
       if ( alloc_prop::initialize ) {
         // The functor constructs and destroys
         record->m_destroy = functor_type( ((Kokkos::Impl::ViewCtorProp<void,execution_space> const & )arg_prop).value
-                                        , (pointer_type) m_handle
-                                        , m_offset.span() * Array_N
+                                        , (pointer_type) m_impl_handle
+                                        , m_impl_offset.span() * Array_N
                                         );
 
         record->m_destroy.construct_shared_allocation();
@@ -397,8 +397,8 @@ public:
 
       typedef typename DstType::offset_type  dst_offset_type ;
 
-      dst.m_offset = dst_offset_type( src.m_offset );
-      dst.m_handle = src.m_handle ;
+      dst.m_impl_offset = dst_offset_type( src.m_impl_offset );
+      dst.m_impl_handle = src.m_impl_handle ;
       dst.m_stride = src.m_stride ;
     }
 };
@@ -448,7 +448,7 @@ public:
       // Array dimension becomes the last dimension.
       // Arguments beyond the destination rank are ignored.
       if ( src.span_is_contiguous() ) { // not padded
-        dst.m_offset = dst_offset_type( std::integral_constant<unsigned,0>() ,
+        dst.m_impl_offset = dst_offset_type( std::integral_constant<unsigned,0>() ,
           typename DstTraits::array_layout
             ( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
             , ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
@@ -463,7 +463,7 @@ public:
       else { // is padded
         typedef std::integral_constant<unsigned,sizeof(typename SrcTraits::value_type::value_type)> padded ;
 
-        dst.m_offset = dst_offset_type( padded() ,
+        dst.m_impl_offset = dst_offset_type( padded() ,
           typename DstTraits::array_layout
             ( ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
             , ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
@@ -476,7 +476,7 @@ public:
             ) );
       }
 
-      dst.m_handle = src.m_handle ;
+      dst.m_impl_handle = src.m_impl_handle ;
     }
 };
 
@@ -579,11 +579,11 @@ public:
       typedef typename DstType::handle_type  dst_handle_type ;
 
       const SubviewExtents< SrcTraits::rank , rank >
-        extents( src.m_offset.m_dim , args... );
+        extents( src.m_impl_offset.m_dim , args... );
 
-      dst.m_offset = dst_offset_type( src.m_offset , extents );
-      dst.m_handle = dst_handle_type( src.m_handle +
-                                      src.m_offset( extents.domain_offset(0)
+      dst.m_impl_offset = dst_offset_type( src.m_impl_offset , extents );
+      dst.m_impl_handle = dst_handle_type( src.m_impl_handle +
+                                      src.m_impl_offset( extents.domain_offset(0)
                                                   , extents.domain_offset(1)
                                                   , extents.domain_offset(2)
                                                   , extents.domain_offset(3)
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp
new file mode 100644
index 000000000..a64101110
--- /dev/null
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp
@@ -0,0 +1,945 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP
+#define KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP
+
+#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+
+#include <Kokkos_Layout.hpp>
+#include <Kokkos_View.hpp>
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+// View offset and mapping for tiled view's
+
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 >
+struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, 0, 0, 0, 0, 0, 0, true> > : public std::true_type {};
+
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 >
+struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, 0, 0, 0, 0, 0, true> > : public std::true_type {};
+
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 >
+struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, 0, 0, 0, 0, true> > : public std::true_type {};
+
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 >
+struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, 0, 0, 0, true> > : public std::true_type {};
+
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 ,  unsigned ArgN5 >
+struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, 0, 0, true> > : public std::true_type {};
+
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 ,  unsigned ArgN5 ,  unsigned ArgN6 >
+struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, 0, true> > : public std::true_type {};
+
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 ,  unsigned ArgN5 ,  unsigned ArgN6 ,  unsigned ArgN7 > 
+struct is_array_layout < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, true> > : public std::true_type {};
+
+
+template< class L >
+struct is_array_layout_tiled : public std::false_type {};
+
+template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 ,  unsigned ArgN3 ,  unsigned ArgN4 ,  unsigned ArgN5 ,  unsigned ArgN6 , unsigned ArgN7 , bool IsPowerTwo >
+struct is_array_layout_tiled < Kokkos::Experimental::LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3, ArgN4, ArgN5, ArgN6, ArgN7, IsPowerTwo> > : public std::true_type {}; // Last template parameter "true" meaning this currently only supports powers-of-two
+
+
+namespace Impl {
+
+template< class Dimension , class Layout >
+struct ViewOffset< Dimension , Layout ,
+  typename std::enable_if<(
+    ( Dimension::rank <= 8 )
+    &&
+    ( Dimension::rank >= 2 )
+    &&
+    is_array_layout< Layout >::value
+    &&
+    is_array_layout_tiled< Layout >::value
+  )>::type >
+{
+public:
+
+//  enum { outer_pattern = Layout::outer_pattern };
+//  enum { inner_pattern = Layout::inner_pattern };
+  static constexpr Kokkos::Iterate outer_pattern = Layout::outer_pattern;
+  static constexpr Kokkos::Iterate inner_pattern = Layout::inner_pattern;
+
+  enum { VORank = Dimension::rank };
+
+  enum { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) };
+  enum { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) };
+  enum { SHIFT_2 = Kokkos::Impl::integral_power_of_two(Layout::N2) };
+  enum { SHIFT_3 = Kokkos::Impl::integral_power_of_two(Layout::N3) };
+  enum { SHIFT_4 = Kokkos::Impl::integral_power_of_two(Layout::N4) };
+  enum { SHIFT_5 = Kokkos::Impl::integral_power_of_two(Layout::N5) };
+  enum { SHIFT_6 = Kokkos::Impl::integral_power_of_two(Layout::N6) };
+  enum { SHIFT_7 = Kokkos::Impl::integral_power_of_two(Layout::N7) };
+  enum { MASK_0  = Layout::N0 - 1 };
+  enum { MASK_1  = Layout::N1 - 1 };
+  enum { MASK_2  = Layout::N2 - 1 };
+  enum { MASK_3  = Layout::N3 - 1 };
+  enum { MASK_4  = Layout::N4 - 1 };
+  enum { MASK_5  = Layout::N5 - 1 };
+  enum { MASK_6  = Layout::N6 - 1 };
+  enum { MASK_7  = Layout::N7 - 1 };
+
+  enum { SHIFT_2T = SHIFT_0 + SHIFT_1 };
+  enum { SHIFT_3T = SHIFT_0 + SHIFT_1 + SHIFT_2 };
+  enum { SHIFT_4T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 };
+  enum { SHIFT_5T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 };
+  enum { SHIFT_6T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 };
+  enum { SHIFT_7T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6 };
+  enum { SHIFT_8T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6 + SHIFT_7 };
+
+  // Is an irregular layout that does not have uniform striding for each index.
+  using is_mapping_plugin = std::true_type ;
+  using is_regular        = std::false_type ;
+
+  typedef size_t     size_type ;
+  typedef Dimension  dimension_type ;
+  typedef Layout     array_layout ;
+
+  dimension_type m_dim ;
+  size_type      m_tile_N0 ; // Num tiles dim 0
+  size_type      m_tile_N1 ;
+  size_type      m_tile_N2 ;
+  size_type      m_tile_N3 ;
+  size_type      m_tile_N4 ;
+  size_type      m_tile_N5 ;
+  size_type      m_tile_N6 ;
+  size_type      m_tile_N7 ;
+
+  //----------------------------------------
+
+#define DEBUG_OUTPUT_CHECK 0
+
+  // Rank 2
+  template< typename I0 , typename I1 >
+  KOKKOS_INLINE_FUNCTION
+  size_type operator()( I0 const & i0 , I1 const & i1 ) const {
+    auto tile_offset = (outer_pattern == (Kokkos::Iterate::Left)) 
+                     ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1)) ) << SHIFT_2T)
+                     : ( ( (m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) ) << SHIFT_2T) ;
+    //                     ( num_tiles[1] * ti0     +  ti1 ) * FTD
+
+    auto local_offset = (inner_pattern == (Kokkos::Iterate::Left)) 
+                      ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) )
+                      : ( ((i0 & MASK_0) << SHIFT_1) + (i1 & MASK_1) ) ;
+    //                     ( tile_dim[1] * li0         +  li1 )
+
+#if DEBUG_OUTPUT_CHECK
+    std::cout << "Am I Outer Left? " << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl;
+    std::cout << "Am I Inner Left? " << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl;
+    std::cout << "i0 = " << i0
+      << " i1 = " << i1
+      << "\ntilei0 = " << (i0>>SHIFT_0)
+      << " tilei1 = " << (i1>>SHIFT_1)
+      << "locali0 = " << (i0 & MASK_0)
+      << "\nlocali1 = " << (i1 & MASK_1) 
+      << std::endl;
+#endif
+
+    return tile_offset + local_offset;
+  }
+
+  // Rank 3
+  template< typename I0 , typename I1 , typename I2 >
+  KOKKOS_INLINE_FUNCTION
+  size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 ) const {
+    auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) 
+                     ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*(i2>>SHIFT_2)) ) << SHIFT_3T)
+                     : ( ( m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2) ) << SHIFT_3T) ;
+
+    auto local_offset = (inner_pattern == Kokkos::Iterate::Left) 
+                      ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) )
+                      : ( ((i0 & MASK_0) << (SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_2)) + (i2 & MASK_2) ) ;
+
+#if DEBUG_OUTPUT_CHECK
+    std::cout << "Am I Outer Left? " << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl;
+    std::cout << "Am I Inner Left? " << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl;
+    std::cout << "i0 = " << i0
+      << " i1 = " << i1
+      << " i2 = " << i2
+      << "\ntilei0 = " << (i0>>SHIFT_0)
+      << " tilei1 = " << (i1>>SHIFT_1)
+      << " tilei2 = " << (i2>>SHIFT_2)
+      << "\nlocali0 = " << (i0 & MASK_0)
+      << "locali1 = " << (i1 & MASK_1)
+      << "locali2 = " << (i2 & MASK_2)
+      << std::endl;
+#endif
+
+    return tile_offset + local_offset;
+  }
+
+  // Rank 4
+  template< typename I0 , typename I1 , typename I2 , typename I3 >
+  KOKKOS_INLINE_FUNCTION
+  size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 ) const {
+    auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) 
+                     ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*(i3>>SHIFT_3))) ) << SHIFT_4T)
+                     : ( ( m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3) ) << SHIFT_4T) ;
+
+    auto local_offset = (inner_pattern == Kokkos::Iterate::Left) 
+                      ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) )
+                      : ( ((i0 & MASK_0) << (SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_3)) + (i3 & MASK_3) ) ;
+
+    return tile_offset + local_offset;
+  }
+
+  // Rank 5
+  template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 >
+  KOKKOS_INLINE_FUNCTION
+  size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 ) const {
+    auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) 
+                     ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*(i4>>SHIFT_4)))) ) << SHIFT_5T)
+                     : ( ( m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4) ) << SHIFT_5T) ;
+
+    auto local_offset = (inner_pattern == Kokkos::Iterate::Left) 
+                      ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) )
+                      : ( ((i0 & MASK_0) << (SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_4)) + (i4 & MASK_4) ) ;
+
+    return tile_offset + local_offset;
+  }
+
+  // Rank 6
+  template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 >
+  KOKKOS_INLINE_FUNCTION
+  size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 ) const {
+    auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) 
+                     ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*(i5>>SHIFT_5))))) ) << SHIFT_6T)
+                     : ( ( m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5) ) << SHIFT_6T) ;
+
+    auto local_offset = (inner_pattern == Kokkos::Iterate::Left) 
+                      ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) )
+                      : ( ((i0 & MASK_0) << (SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_5)) + (i5 & MASK_5) ) ;
+
+    return tile_offset + local_offset;
+  }
+
+  // Rank 7
+  template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 >
+  KOKKOS_INLINE_FUNCTION
+  size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 , I6 const & i6 ) const {
+    auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) 
+                     ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*((i5>>SHIFT_5) + m_tile_N5*(i6>>SHIFT_6)))))) ) << SHIFT_7T)
+                     : ( ( m_tile_N6*(m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5)) + (i6>>SHIFT_6) ) << SHIFT_7T) ;
+
+    auto local_offset = (inner_pattern == Kokkos::Iterate::Left) 
+                      ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) + ((i6 & MASK_6)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5)) )
+                      : ( ((i0 & MASK_0) << (SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_6+SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_6+SHIFT_5)) + ((i5 & MASK_5)<<(SHIFT_6)) + (i6 & MASK_6) ) ;
+
+    return tile_offset + local_offset;
+  }
+
+  // Rank 8
+  template< typename I0 , typename I1 , typename I2 , typename I3 , typename I4 , typename I5 , typename I6 , typename I7 >
+  KOKKOS_INLINE_FUNCTION
+  size_type operator()( I0 const & i0 , I1 const & i1 , I2 const & i2 , I3 const & i3 , I4 const & i4 , I5 const & i5 , I6 const & i6 , I7 const & i7 ) const {
+    auto tile_offset = (outer_pattern == Kokkos::Iterate::Left) 
+                     ? ( ( (i0>>SHIFT_0) + m_tile_N0*((i1>>SHIFT_1) + m_tile_N1*((i2>>SHIFT_2) + m_tile_N2*((i3>>SHIFT_3) + m_tile_N3*((i4>>SHIFT_4) + m_tile_N4*((i5>>SHIFT_5) + m_tile_N5*((i6>>SHIFT_6) + m_tile_N6*(i7>>SHIFT_7))))))) ) << SHIFT_8T)
+                     : ( ( m_tile_N7*(m_tile_N6*(m_tile_N5*(m_tile_N4*(m_tile_N3*(m_tile_N2*(m_tile_N1*(i0>>SHIFT_0) + (i1>>SHIFT_1)) + (i2>>SHIFT_2)) + (i3>>SHIFT_3)) + (i4>>SHIFT_4)) + (i5>>SHIFT_5)) + (i6>>SHIFT_6)) + (i7>>SHIFT_7) ) << SHIFT_8T) ;
+
+    auto local_offset = (inner_pattern == Kokkos::Iterate::Left) 
+                      ? ( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) + ((i2 & MASK_2)<<(SHIFT_0+SHIFT_1)) + ((i3 & MASK_3)<<(SHIFT_0+SHIFT_1+SHIFT_2)) + ((i4 & MASK_4)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3)) + ((i5 & MASK_5)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4)) + ((i6 & MASK_6)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5)) + ((i7 & MASK_7)<<(SHIFT_0+SHIFT_1+SHIFT_2+SHIFT_3+SHIFT_4+SHIFT_5+SHIFT_6)) )
+                      : ( ((i0 & MASK_0) << (SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2+SHIFT_1)) + ((i1 & MASK_1) << (SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3+SHIFT_2)) + ((i2 & MASK_2)<<(SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4+SHIFT_3)) + ((i3 & MASK_3)<<(SHIFT_7+SHIFT_6+SHIFT_5+SHIFT_4)) + ((i4 & MASK_4)<<(SHIFT_7+SHIFT_6+SHIFT_5)) + ((i5 & MASK_5)<<(SHIFT_7+SHIFT_6)) + ((i6 & MASK_6)<<(SHIFT_7)) + (i7 & MASK_7) ) ;
+
+    return tile_offset + local_offset;
+  }
+
+  //----------------------------------------
+
+  KOKKOS_INLINE_FUNCTION constexpr
+  array_layout layout() const
+    { return array_layout( m_dim.N0 , m_dim.N1 , m_dim.N2 , m_dim.N2  , m_dim.N3  , m_dim.N4  , m_dim.N5  , m_dim.N6  , m_dim.N7 ); }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
+
+  // Strides are meaningless due to irregularity
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0 ; }
+  KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0 ; }
+
+  // Stride with [ rank ] value is the total length
+  template< typename iType >
+  KOKKOS_INLINE_FUNCTION
+  void stride( iType * const s ) const
+    {
+      s[0] = 0 ;
+      if ( 0 < dimension_type::rank ) { s[1] = 0 ; }
+      if ( 1 < dimension_type::rank ) { s[2] = 0 ; }
+      if ( 2 < dimension_type::rank ) { s[3] = 0 ; }
+      if ( 3 < dimension_type::rank ) { s[4] = 0 ; }
+      if ( 4 < dimension_type::rank ) { s[5] = 0 ; }
+      if ( 5 < dimension_type::rank ) { s[6] = 0 ; }
+      if ( 6 < dimension_type::rank ) { s[7] = 0 ; }
+      if ( 7 < dimension_type::rank ) { s[8] = 0 ; }
+    }
+
+  KOKKOS_INLINE_FUNCTION constexpr size_type span() const
+    {
+      // Rank2: ( NumTile0 * ( NumTile1 ) ) * TileSize, etc
+      return   ( VORank == 2 ) ? ( m_tile_N0 * m_tile_N1 ) << SHIFT_2T
+             : ( VORank == 3 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 ) << SHIFT_3T
+             : ( VORank == 4 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 ) << SHIFT_4T
+             : ( VORank == 5 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 ) << SHIFT_5T
+             : ( VORank == 6 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 ) << SHIFT_6T
+             : ( VORank == 7 ) ? ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 * m_tile_N6 ) << SHIFT_7T
+             : ( m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3 * m_tile_N4 * m_tile_N5 * m_tile_N6 * m_tile_N7 ) << SHIFT_8T ;
+    }
+
+  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
+    {
+      return true;
+    }
+
+  //----------------------------------------
+
+#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND
+  KOKKOS_INLINE_FUNCTION ~ViewOffset() {}
+  KOKKOS_INLINE_FUNCTION ViewOffset() {}
+  KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & rhs )
+  : m_dim(rhs.m_dim)
+  , m_tile_N0(rhs.m_tile_N0)
+  , m_tile_N1(rhs.m_tile_N1)
+  , m_tile_N2(rhs.m_tile_N2)
+  , m_tile_N3(rhs.m_tile_N3)
+  , m_tile_N4(rhs.m_tile_N4)
+  , m_tile_N5(rhs.m_tile_N5)
+  , m_tile_N6(rhs.m_tile_N6)
+  , m_tile_N7(rhs.m_tile_N7)
+  {}
+
+  KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & rhs ) {
+    m_dim = rhs.m_dim;
+    m_tile_N0 = rhs.m_tile_N0;
+    m_tile_N1 = rhs.m_tile_N1;
+    m_tile_N2 = rhs.m_tile_N2;
+    m_tile_N3 = rhs.m_tile_N3;
+    m_tile_N4 = rhs.m_tile_N4;
+    m_tile_N5 = rhs.m_tile_N5;
+    m_tile_N6 = rhs.m_tile_N6;
+    m_tile_N7 = rhs.m_tile_N7;
+    return *this;
+  }
+
+#else
+  KOKKOS_INLINE_FUNCTION ~ViewOffset() = default;
+  KOKKOS_INLINE_FUNCTION ViewOffset() = default;
+  KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default;
+  KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default;
+#endif
+
+  template< unsigned TrivialScalarSize >
+  KOKKOS_INLINE_FUNCTION
+  constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const & ,
+                        array_layout const arg_layout )
+    : m_dim( arg_layout.dimension[0], arg_layout.dimension[1], arg_layout.dimension[2], arg_layout.dimension[3], arg_layout.dimension[4], arg_layout.dimension[5], arg_layout.dimension[6], arg_layout.dimension[7] )
+    , m_tile_N0( ( arg_layout.dimension[0] + MASK_0 ) >> SHIFT_0 /* number of tiles in first dimension */ )
+    , m_tile_N1( ( arg_layout.dimension[1] + MASK_1 ) >> SHIFT_1 )
+    , m_tile_N2( (VORank > 2 ) ? ( arg_layout.dimension[2] + MASK_2 ) >> SHIFT_2 : 0 )
+    , m_tile_N3( (VORank > 3 ) ? ( arg_layout.dimension[3] + MASK_3 ) >> SHIFT_3 : 0 )
+    , m_tile_N4( (VORank > 4 ) ? ( arg_layout.dimension[4] + MASK_4 ) >> SHIFT_4 : 0 )
+    , m_tile_N5( (VORank > 5 ) ? ( arg_layout.dimension[5] + MASK_5 ) >> SHIFT_5 : 0 )
+    , m_tile_N6( (VORank > 6 ) ? ( arg_layout.dimension[6] + MASK_6 ) >> SHIFT_6 : 0 )
+    , m_tile_N7( (VORank > 7 ) ? ( arg_layout.dimension[7] + MASK_7 ) >> SHIFT_7 : 0 )
+    {}
+
+};
+
+
+//----------------------------------------
+
+
+// ViewMapping assign method needed in order to return a 'subview' tile as a proper View
+// The outer iteration pattern determines the mapping of the pointer offset to the beginning of requested tile
+// The inner iteration pattern is needed for the layout of the tile's View to be returned
+// Rank 2
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
+        , class ... P
+        , typename iType0 , typename iType1
+        >
+struct ViewMapping
+  < typename std::enable_if< (N2 == 0 && N3 == 0 && N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void
+  , Kokkos::ViewTraits<T**,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
+  , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
+  , iType0
+  , iType1 >
+{
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>  src_layout ;
+  typedef Kokkos::ViewTraits< T** , src_layout , P... > src_traits ;
+
+  enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
+  enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
+  typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::ViewTraits< T[N0][N1] , array_layout , P ... > traits ;
+  typedef Kokkos::View< T[N0][N1] , array_layout , P ... > type ;
+
+  KOKKOS_INLINE_FUNCTION static
+  void assign( ViewMapping< traits , void > & dst
+             , const ViewMapping< src_traits , void > & src
+             , const src_layout &
+             , const iType0 i_tile0
+             , const iType1 i_tile1
+             )
+    {
+      typedef ViewMapping< traits , void >        dst_map_type ;
+      typedef ViewMapping< src_traits , void >    src_map_type ;
+      typedef typename dst_map_type::handle_type  dst_handle_type ;
+      typedef typename dst_map_type::offset_type  dst_offset_type ;
+      typedef typename src_map_type::offset_type  src_offset_type ;
+
+      dst = dst_map_type(
+         dst_handle_type( src.m_impl_handle +
+                          ( is_outer_left ? ( (i_tile0 + src.m_impl_offset.m_tile_N0 * i_tile1) << src_offset_type::SHIFT_2T )
+                                          : ( (src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1) << src_offset_type::SHIFT_2T )
+                          ) // offset to start of the tile
+                        )
+       , dst_offset_type() );
+    }
+};
+
+// Rank 3
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
+        , class ... P
+        , typename iType0 , typename iType1 , typename iType2
+        >
+struct ViewMapping
+  < typename std::enable_if< (N3 == 0 && N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void
+  , Kokkos::ViewTraits<T***,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
+  , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
+  , iType0
+  , iType1
+  , iType2 >
+{
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>  src_layout ;
+  typedef Kokkos::ViewTraits< T*** , src_layout , P... > src_traits ;
+
+  enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
+  enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
+  typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::ViewTraits< T[N0][N1][N2] , array_layout , P ... > traits ;
+  typedef Kokkos::View< T[N0][N1][N2] , array_layout , P ... > type ;
+
+  KOKKOS_INLINE_FUNCTION static
+  void assign( ViewMapping< traits , void > & dst
+             , const ViewMapping< src_traits , void > & src
+             , const src_layout &
+             , const iType0 i_tile0
+             , const iType1 i_tile1
+             , const iType2 i_tile2
+             )
+    {
+      typedef ViewMapping< traits , void >        dst_map_type ;
+      typedef ViewMapping< src_traits , void >    src_map_type ;
+      typedef typename dst_map_type::handle_type  dst_handle_type ;
+      typedef typename dst_map_type::offset_type  dst_offset_type ;
+      typedef typename src_map_type::offset_type  src_offset_type ;
+
+      dst = dst_map_type(
+         dst_handle_type( src.m_impl_handle +
+                          ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * i_tile2 ) ) << src_offset_type::SHIFT_3T ) 
+                                          : ( ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) << src_offset_type::SHIFT_3T )
+                          )
+                        ) // offset to start of the tile
+       , dst_offset_type() );
+    }
+};
+
+// Rank 4
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
+        , class ... P
+        , typename iType0 , typename iType1 , typename iType2 , typename iType3
+        >
+struct ViewMapping
+  < typename std::enable_if< (N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0) >::type //void
+  , Kokkos::ViewTraits<T****,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
+  , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
+  , iType0
+  , iType1
+  , iType2 
+  , iType3 >
+{
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>  src_layout ;
+  typedef Kokkos::ViewTraits< T**** , src_layout , P... > src_traits ;
+
+  enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
+  enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
+  typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::ViewTraits< T[N0][N1][N2][N3] , array_layout , P ... > traits ;
+  typedef Kokkos::View< T[N0][N1][N2][N3] , array_layout , P ... > type ;
+
+  KOKKOS_INLINE_FUNCTION static
+  void assign( ViewMapping< traits , void > & dst
+             , const ViewMapping< src_traits , void > & src
+             , const src_layout &
+             , const iType0 i_tile0
+             , const iType1 i_tile1
+             , const iType2 i_tile2
+             , const iType3 i_tile3
+             )
+    {
+      typedef ViewMapping< traits , void >        dst_map_type ;
+      typedef ViewMapping< src_traits , void >    src_map_type ;
+      typedef typename dst_map_type::handle_type  dst_handle_type ;
+      typedef typename dst_map_type::offset_type  dst_offset_type ;
+      typedef typename src_map_type::offset_type  src_offset_type ;
+
+      dst = dst_map_type(
+         dst_handle_type( src.m_impl_handle +
+                          ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * i_tile3 ) ) ) << src_offset_type::SHIFT_4T ) 
+                                          : ( ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) << src_offset_type::SHIFT_4T )
+                          )
+                        ) // offset to start of the tile
+       , dst_offset_type() );
+    }
+};
+
+// Rank 5
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
+        , class ... P
+        , typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4
+        >
+struct ViewMapping
+  < typename std::enable_if< (N5 == 0 && N6 == 0 && N7 == 0) >::type //void
+  , Kokkos::ViewTraits<T*****,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
+  , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
+  , iType0
+  , iType1
+  , iType2 
+  , iType3
+  , iType4 >
+{
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>  src_layout ;
+  typedef Kokkos::ViewTraits< T***** , src_layout , P... > src_traits ;
+
+  enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
+  enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
+  typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4] , array_layout , P ... > traits ;
+  typedef Kokkos::View< T[N0][N1][N2][N3][N4] , array_layout , P ... > type ;
+
+  KOKKOS_INLINE_FUNCTION static
+  void assign( ViewMapping< traits , void > & dst
+             , const ViewMapping< src_traits , void > & src
+             , const src_layout &
+             , const iType0 i_tile0
+             , const iType1 i_tile1
+             , const iType2 i_tile2
+             , const iType3 i_tile3
+             , const iType4 i_tile4
+             )
+    {
+      typedef ViewMapping< traits , void >        dst_map_type ;
+      typedef ViewMapping< src_traits , void >    src_map_type ;
+      typedef typename dst_map_type::handle_type  dst_handle_type ;
+      typedef typename dst_map_type::offset_type  dst_offset_type ;
+      typedef typename src_map_type::offset_type  src_offset_type ;
+
+      dst = dst_map_type(
+         dst_handle_type( src.m_impl_handle +
+                          ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * i_tile4 ) ) ) ) << src_offset_type::SHIFT_5T ) 
+                                          : ( ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) << src_offset_type::SHIFT_5T )
+                          )
+                        ) // offset to start of the tile
+       , dst_offset_type() );
+    }
+};
+
+// Rank 6
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
+        , class ... P
+        , typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5
+        >
+struct ViewMapping
+  < typename std::enable_if< (N6 == 0 && N7 == 0) >::type //void
+  , Kokkos::ViewTraits<T******,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
+  , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
+  , iType0
+  , iType1
+  , iType2 
+  , iType3
+  , iType4
+  , iType5 >
+{
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>  src_layout ;
+  typedef Kokkos::ViewTraits< T****** , src_layout , P... > src_traits ;
+
+  enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
+  enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
+  typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5] , array_layout , P ... > traits ;
+  typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5] , array_layout , P ... > type ;
+
+  KOKKOS_INLINE_FUNCTION static
+  void assign( ViewMapping< traits , void > & dst
+             , const ViewMapping< src_traits , void > & src
+             , const src_layout &
+             , const iType0 i_tile0
+             , const iType1 i_tile1
+             , const iType2 i_tile2
+             , const iType3 i_tile3
+             , const iType4 i_tile4
+             , const iType5 i_tile5
+             )
+    {
+      typedef ViewMapping< traits , void >        dst_map_type ;
+      typedef ViewMapping< src_traits , void >    src_map_type ;
+      typedef typename dst_map_type::handle_type  dst_handle_type ;
+      typedef typename dst_map_type::offset_type  dst_offset_type ;
+      typedef typename src_map_type::offset_type  src_offset_type ;
+
+      dst = dst_map_type(
+         dst_handle_type( src.m_impl_handle +
+                          ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * i_tile5 ) ) ) ) ) << src_offset_type::SHIFT_6T ) 
+                                          : ( ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) << src_offset_type::SHIFT_6T )
+                          )
+                        ) // offset to start of the tile
+       , dst_offset_type() );
+    }
+};
+
+// Rank 7
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
+        , class ... P
+        , typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5 , typename iType6
+        >
+struct ViewMapping
+  < typename std::enable_if< (N7 == 0) >::type //void
+  , Kokkos::ViewTraits<T*******,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
+  , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
+  , iType0
+  , iType1
+  , iType2 
+  , iType3
+  , iType4
+  , iType5 
+  , iType6 >
+{
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>  src_layout ;
+  typedef Kokkos::ViewTraits< T******* , src_layout , P... > src_traits ;
+
+  enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
+  enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
+  typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P ... > traits ;
+  typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P ... > type ;
+
+  KOKKOS_INLINE_FUNCTION static
+  void assign( ViewMapping< traits , void > & dst
+             , const ViewMapping< src_traits , void > & src
+             , const src_layout &
+             , const iType0 i_tile0
+             , const iType1 i_tile1
+             , const iType2 i_tile2
+             , const iType3 i_tile3
+             , const iType4 i_tile4
+             , const iType5 i_tile5
+             , const iType6 i_tile6
+             )
+    {
+      typedef ViewMapping< traits , void >        dst_map_type ;
+      typedef ViewMapping< src_traits , void >    src_map_type ;
+      typedef typename dst_map_type::handle_type  dst_handle_type ;
+      typedef typename dst_map_type::offset_type  dst_offset_type ;
+      typedef typename src_map_type::offset_type  src_offset_type ;
+
+      dst = dst_map_type(
+         dst_handle_type( src.m_impl_handle +
+                          ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * ( i_tile5 + src.m_impl_offset.m_tile_N5 * i_tile6 ) ) ) ) ) ) << src_offset_type::SHIFT_7T ) 
+                                          : ( ( src.m_impl_offset.m_tile_N6 * ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) + i_tile6 ) << src_offset_type::SHIFT_7T )
+                          )
+                        ) // offset to start of the tile
+       , dst_offset_type() );
+    }
+};
+
+// Rank 8
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7
+        , class ... P
+        , typename iType0 , typename iType1 , typename iType2 , typename iType3 , typename iType4 , typename iType5 , typename iType6 , typename iType7
+        >
+struct ViewMapping
+  < typename std::enable_if< (N0 != 0 && N1 != 0 && N2 != 0 && N3 != 0 && N4 != 0 && N5 != 0 && N6 != 0 && N7 != 0) >::type //void
+  , Kokkos::ViewTraits<T********,Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>,P...>
+  , Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>
+  , iType0
+  , iType1
+  , iType2 
+  , iType3
+  , iType4
+  , iType5 
+  , iType6
+  , iType7 >
+{
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>  src_layout ;
+  typedef Kokkos::ViewTraits< T******** , src_layout , P... > src_traits ;
+
+  enum { is_outer_left = (OuterP == Kokkos::Iterate::Left) };
+  enum { is_inner_left = (InnerP == Kokkos::Iterate::Left) };
+  typedef typename std::conditional< is_inner_left, Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::ViewTraits< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P ... > traits ;
+  typedef Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P ... > type ;
+
+  KOKKOS_INLINE_FUNCTION static
+  void assign( ViewMapping< traits , void > & dst
+             , const ViewMapping< src_traits , void > & src
+             , const src_layout &
+             , const iType0 i_tile0
+             , const iType1 i_tile1
+             , const iType2 i_tile2
+             , const iType3 i_tile3
+             , const iType4 i_tile4
+             , const iType5 i_tile5
+             , const iType6 i_tile6
+             , const iType7 i_tile7
+             )
+    {
+      typedef ViewMapping< traits , void >        dst_map_type ;
+      typedef ViewMapping< src_traits , void >    src_map_type ;
+      typedef typename dst_map_type::handle_type  dst_handle_type ;
+      typedef typename dst_map_type::offset_type  dst_offset_type ;
+      typedef typename src_map_type::offset_type  src_offset_type ;
+
+      dst = dst_map_type(
+         dst_handle_type( src.m_impl_handle +
+                          ( is_outer_left ? ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * ( i_tile1 + src.m_impl_offset.m_tile_N1 * ( i_tile2 + src.m_impl_offset.m_tile_N2 * ( i_tile3 + src.m_impl_offset.m_tile_N3 * ( i_tile4 + src.m_impl_offset.m_tile_N4 * ( i_tile5 + src.m_impl_offset.m_tile_N5 * ( i_tile6 + src.m_impl_offset.m_tile_N6 * i_tile7 ) ) ) ) ) ) ) << src_offset_type::SHIFT_8T ) 
+                                          : ( ( src.m_impl_offset.m_tile_N7 * ( src.m_impl_offset.m_tile_N6 * ( src.m_impl_offset.m_tile_N5 * ( src.m_impl_offset.m_tile_N4 * ( src.m_impl_offset.m_tile_N3 * ( src.m_impl_offset.m_tile_N2 * ( src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1 ) + i_tile2 ) + i_tile3 ) + i_tile4 ) + i_tile5 ) + i_tile6 ) + i_tile7 ) << src_offset_type::SHIFT_8T )
+                          )
+                        ) // offset to start of the tile
+       , dst_offset_type() );
+    }
+};
+
+
+} /* namespace Impl */
+} /* namespace Kokkos */
+
+//----------------------------------------
+
+namespace Kokkos {
+
+// Rank 2
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 
+        , class ... P 
+        >
+KOKKOS_INLINE_FUNCTION
+Kokkos::View< T[N0][N1] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
+tile_subview( const Kokkos::View<T**, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
+            , const size_t i_tile0
+            , const size_t i_tile1
+            )
+{
+  // Force the specialized ViewMapping for extracting a tile
+  // by using the first subview argument as the layout.
+  typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
+
+  return Kokkos::View< T[N0][N1] , array_layout , P... >
+    ( src , SrcLayout() , i_tile0 , i_tile1 );
+}
+
+// Rank 3
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 
+        , class ... P 
+        >
+KOKKOS_INLINE_FUNCTION
+Kokkos::View< T[N0][N1][N2] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
+tile_subview( const Kokkos::View<T***, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
+            , const size_t i_tile0
+            , const size_t i_tile1
+            , const size_t i_tile2
+            )
+{
+  // Force the specialized ViewMapping for extracting a tile
+  // by using the first subview argument as the layout.
+  typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
+
+  return Kokkos::View< T[N0][N1][N2] , array_layout , P... >
+    ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 );
+}
+
+// Rank 4
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 
+        , class ... P 
+        >
+KOKKOS_INLINE_FUNCTION
+Kokkos::View< T[N0][N1][N2][N3] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
+tile_subview( const Kokkos::View<T****, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
+            , const size_t i_tile0
+            , const size_t i_tile1
+            , const size_t i_tile2
+            , const size_t i_tile3
+            )
+{
+  // Force the specialized ViewMapping for extracting a tile
+  // by using the first subview argument as the layout.
+  typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
+
+  return Kokkos::View< T[N0][N1][N2][N3] , array_layout , P... >
+    ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 );
+}
+
+// Rank 5
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 
+        , class ... P 
+        >
+KOKKOS_INLINE_FUNCTION
+Kokkos::View< T[N0][N1][N2][N3][N4] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
+tile_subview( const Kokkos::View<T*****, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
+            , const size_t i_tile0
+            , const size_t i_tile1
+            , const size_t i_tile2
+            , const size_t i_tile3
+            , const size_t i_tile4
+            )
+{
+  // Force the specialized ViewMapping for extracting a tile
+  // by using the first subview argument as the layout.
+  typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
+
+  return Kokkos::View< T[N0][N1][N2][N3][N4] , array_layout , P... >
+    ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 );
+}
+
+// Rank 6
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 
+        , class ... P 
+        >
+KOKKOS_INLINE_FUNCTION
+Kokkos::View< T[N0][N1][N2][N3][N4][N5] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
+tile_subview( const Kokkos::View<T******, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
+            , const size_t i_tile0
+            , const size_t i_tile1
+            , const size_t i_tile2
+            , const size_t i_tile3
+            , const size_t i_tile4
+            , const size_t i_tile5
+            )
+{
+  // Force the specialized ViewMapping for extracting a tile
+  // by using the first subview argument as the layout.
+  typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
+
+  return Kokkos::View< T[N0][N1][N2][N3][N4][N5] , array_layout , P... >
+    ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 );
+}
+
+// Rank 7
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 
+        , class ... P 
+        >
+KOKKOS_INLINE_FUNCTION
+Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
+tile_subview( const Kokkos::View<T*******, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
+            , const size_t i_tile0
+            , const size_t i_tile1
+            , const size_t i_tile2
+            , const size_t i_tile3
+            , const size_t i_tile4
+            , const size_t i_tile5
+            , const size_t i_tile6
+            )
+{
+  // Force the specialized ViewMapping for extracting a tile
+  // by using the first subview argument as the layout.
+  typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
+
+  return Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6] , array_layout , P... >
+    ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 , i_tile6 );
+}
+
+// Rank 8
+template< typename T , Kokkos::Iterate OuterP , Kokkos::Iterate InnerP , unsigned N0 , unsigned N1 , unsigned N2 , unsigned N3 , unsigned N4 , unsigned N5 , unsigned N6 , unsigned N7 
+        , class ... P 
+        >
+KOKKOS_INLINE_FUNCTION
+Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type , P... >
+tile_subview( const Kokkos::View<T********, Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true>, P...> & src
+            , const size_t i_tile0
+            , const size_t i_tile1
+            , const size_t i_tile2
+            , const size_t i_tile3
+            , const size_t i_tile4
+            , const size_t i_tile5
+            , const size_t i_tile6
+            , const size_t i_tile7
+            )
+{
+  // Force the specialized ViewMapping for extracting a tile
+  // by using the first subview argument as the layout.
+  typedef typename std::conditional< (InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, Kokkos::LayoutRight >::type array_layout;
+  typedef Kokkos::Experimental::LayoutTiled<OuterP,InnerP,N0,N1,N2,N3,N4,N5,N6,N7,true> SrcLayout ;
+
+  return Kokkos::View< T[N0][N1][N2][N3][N4][N5][N6][N7] , array_layout , P... >
+    ( src , SrcLayout() , i_tile0 , i_tile1 , i_tile2 , i_tile3 , i_tile4 , i_tile5 , i_tile6 , i_tile7 );
+}
+
+} /* namespace Kokkos */
+#endif //!defined(KOKKOS_ENABLE_DEPRECATED_CODE
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWLAYOUTTILE_HPP */
+
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
index d4890c534..bb3bcfd33 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewMapping.hpp
@@ -260,6 +260,9 @@ namespace Impl {
 struct ALL_t {
   KOKKOS_INLINE_FUNCTION
   constexpr const ALL_t & operator()() const { return *this ; }
+
+  KOKKOS_INLINE_FUNCTION
+  constexpr bool operator == ( const ALL_t & right) const { return true;}
 };
 
 }} // namespace Kokkos::Impl
@@ -1030,13 +1033,6 @@ struct ViewOffset< Dimension , Kokkos::LayoutLeft
   ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
     : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
     {
-      static_assert( 
-        ( DimRHS::rank == 0 &&
-          dimension_type::rank == 0 ) ||
-        ( DimRHS::rank == 1 &&
-          dimension_type::rank == 1 &&
-          dimension_type::rank_dynamic == 1 )
-        , "ViewOffset LayoutLeft and LayoutStride are only compatible when rank <= 1" );
       if ( rhs.m_stride.S0 != 1 ) {
         Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride  requires stride == 1" );
       }
@@ -1275,6 +1271,18 @@ public:
       // Also requires equal static dimensions ...
     }
 
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
+    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
+           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
+    , m_stride( rhs.stride_1() )
+    {
+      if ( rhs.m_stride.S0 != 1 ) {
+        Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" );
+      }
+    }
+
   //----------------------------------------
   // Subview construction
   // This subview must be 2 == rank and 2 == rank_dynamic
@@ -1518,16 +1526,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight
   ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
     : m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
     {
-      static_assert(
-       ( DimRHS::rank == 0 &&
-         dimension_type::rank == 0 ) ||
-       ( DimRHS::rank == 1 &&
-         dimension_type::rank == 1 &&
-         dimension_type::rank_dynamic == 1 )
-      , "ViewOffset LayoutRight and LayoutString are only compatible when rank <= 1" );
-      if ( rhs.m_stride.S0 != 1 ) {
-        Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutLeft/Right from LayoutStride  requires stride == 1" );
-      }
+
     }
 
   //----------------------------------------
@@ -1771,6 +1770,23 @@ public:
       // Also requires equal static dimensions ...
     }
 
+  template< class DimRHS >
+  KOKKOS_INLINE_FUNCTION
+  ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
+    : m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
+           , rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
+    , m_stride( rhs.stride_0() )
+    {
+      if ( ((dimension_type::rank == 2)?rhs.m_stride.S1:
+           ((dimension_type::rank == 3)?rhs.m_stride.S2:
+           ((dimension_type::rank == 4)?rhs.m_stride.S3:
+           ((dimension_type::rank == 5)?rhs.m_stride.S4:
+           ((dimension_type::rank == 6)?rhs.m_stride.S5:
+           ((dimension_type::rank == 7)?rhs.m_stride.S6:rhs.m_stride.S7)))))) != 1 ){
+        Kokkos::abort("Kokkos::Impl::ViewOffset assignment of LayoutRight from LayoutStride requires right-most stride == 1" );
+      }
+    }
+
   //----------------------------------------
   // Subview construction
   // Last dimension must be non-zero
@@ -2498,7 +2514,7 @@ struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ >
 #if defined(KOKKOS_ENABLE_PROFILING)
         uint64_t kpID = 0;
         if(Kokkos::Profiling::profileLibraryLoaded()) {
-          Kokkos::Profiling::beginParallelFor("Kokkos::View::initialization", 0, &kpID);
+          Kokkos::Profiling::beginParallelFor((destroy ? "Kokkos::View::destruction" : "Kokkos::View::initialization"), 0, &kpID);
         }
 #endif
         const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType >
@@ -2588,11 +2604,8 @@ class ViewMapping< Traits ,
               , void >::is_mapping_plugin::value
   )>::type >
 {
-private:
-
-  template< class , class ... > friend class ViewMapping ;
-  template< class , class ... > friend class Kokkos::View ;
 
+public:
   typedef ViewOffset< typename Traits::dimension
                     , typename Traits::array_layout
                     , void
@@ -2600,13 +2613,17 @@ private:
 
   typedef typename ViewDataHandle< Traits >::handle_type  handle_type ;
 
-  handle_type  m_handle ;
-  offset_type  m_offset ;
+  handle_type  m_impl_handle ;
+  offset_type  m_impl_offset ;
+
+private:
+
+  template < class , class ...> friend class ViewMapping;
 
   KOKKOS_INLINE_FUNCTION
   ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
-    : m_handle( arg_handle )
-    , m_offset( arg_offset )
+    : m_impl_handle( arg_handle )
+    , m_impl_offset( arg_offset )
     {}
 
 public:
@@ -2621,44 +2638,44 @@ public:
 
   template< typename iType >
   KOKKOS_INLINE_FUNCTION constexpr size_t extent( const iType & r ) const
-    { return m_offset.m_dim.extent(r); }
+    { return m_impl_offset.m_dim.extent(r); }
 
   KOKKOS_INLINE_FUNCTION constexpr
   typename Traits::array_layout layout() const
-    { return m_offset.layout(); }
+    { return m_impl_offset.layout(); }
 
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_impl_offset.dimension_0(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_impl_offset.dimension_1(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_impl_offset.dimension_2(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_impl_offset.dimension_3(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_impl_offset.dimension_4(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_impl_offset.dimension_5(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_impl_offset.dimension_6(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_impl_offset.dimension_7(); }
 
   // Is a regular layout with uniform striding for each index.
   using is_regular = typename offset_type::is_regular ;
 
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
-  KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_impl_offset.stride_0(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_impl_offset.stride_1(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_impl_offset.stride_2(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_impl_offset.stride_3(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_impl_offset.stride_4(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_impl_offset.stride_5(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_impl_offset.stride_6(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_impl_offset.stride_7(); }
 
   template< typename iType >
-  KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); }
+  KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_impl_offset.stride(s); }
 
   //----------------------------------------
   // Range span
 
   /** \brief  Span of the mapped range */
-  KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); }
+  KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_impl_offset.span(); }
 
   /** \brief  Is the mapped range span contiguous */
-  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); }
+  KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_impl_offset.span_is_contiguous(); }
 
   typedef typename ViewDataHandle< Traits >::return_type  reference_type ;
   typedef typename Traits::value_type *                   pointer_type ;
@@ -2666,7 +2683,7 @@ public:
   /** \brief  Query raw pointer to memory */
   KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
     {
-      return m_handle;
+      return m_impl_handle;
     }
 
   //----------------------------------------
@@ -2674,7 +2691,7 @@ public:
   // calling these element reference methods.
 
   KOKKOS_FORCEINLINE_FUNCTION
-  reference_type reference() const { return m_handle[0]; }
+  reference_type reference() const { return m_impl_handle[0]; }
 
   template< typename I0 >
   KOKKOS_FORCEINLINE_FUNCTION
@@ -2682,7 +2699,7 @@ public:
     std::enable_if< std::is_integral<I0>::value &&
                     ! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
                   , reference_type >::type
-  reference( const I0 & i0 ) const { return m_handle[i0]; }
+  reference( const I0 & i0 ) const { return m_impl_handle[i0]; }
 
   template< typename I0 >
   KOKKOS_FORCEINLINE_FUNCTION
@@ -2690,50 +2707,50 @@ public:
     std::enable_if< std::is_integral<I0>::value &&
                     std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
                   , reference_type >::type
-  reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; }
+  reference( const I0 & i0 ) const { return m_impl_handle[ m_impl_offset(i0) ]; }
 
   template< typename I0 , typename I1 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 ) const
-    { return m_handle[ m_offset(i0,i1) ]; }
+    { return m_impl_handle[ m_impl_offset(i0,i1) ]; }
 
   template< typename I0 , typename I1 , typename I2 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
-    { return m_handle[ m_offset(i0,i1,i2) ]; }
+    { return m_impl_handle[ m_impl_offset(i0,i1,i2) ]; }
 
   template< typename I0 , typename I1 , typename I2 , typename I3 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3) ]; }
+    { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3) ]; }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
           , typename I4 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
                           , const I4 & i4 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; }
+    { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4) ]; }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
           , typename I4 , typename I5 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
                           , const I4 & i4 , const I5 & i5 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; }
+    { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5) ]; }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
           , typename I4 , typename I5 , typename I6 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
                           , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; }
+    { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5,i6) ]; }
 
   template< typename I0 , typename I1 , typename I2 , typename I3
           , typename I4 , typename I5 , typename I6 , typename I7 >
   KOKKOS_FORCEINLINE_FUNCTION
   reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
                           , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
-    { return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; }
+    { return m_impl_handle[ m_impl_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; }
 
   //----------------------------------------
 
@@ -2747,22 +2764,22 @@ public:
   /** \brief  Span, in bytes, of the referenced memory */
   KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
     {
-      return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask);
+      return ( m_impl_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask);
     }
 
   //----------------------------------------
 
   KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
-  KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {}
+  KOKKOS_INLINE_FUNCTION ViewMapping() : m_impl_handle(), m_impl_offset() {}
   KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
-    : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
+    : m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ) {}
   KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
-    { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
+    { m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; return *this ; }
 
   KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
-    : m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
+    : m_impl_handle( rhs.m_impl_handle ), m_impl_offset( rhs.m_impl_offset ) {}
   KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
-    { m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
+    { m_impl_handle = rhs.m_impl_handle ; m_impl_offset = rhs.m_impl_offset ; return *this ; }
 
   //----------------------------------------
 
@@ -2780,14 +2797,14 @@ public:
   ViewMapping( Kokkos::Impl::ViewCtorProp< P ... > const & arg_prop
              , typename Traits::array_layout const & arg_layout
              )
-    : m_handle( ( (Kokkos::Impl::ViewCtorProp<void,pointer_type> const &) arg_prop ).value )
-    , m_offset( std::integral_constant< unsigned , 0 >() , arg_layout )
+    : m_impl_handle( ( (Kokkos::Impl::ViewCtorProp<void,pointer_type> const &) arg_prop ).value )
+    , m_impl_offset( std::integral_constant< unsigned , 0 >() , arg_layout )
     {}
 
   /**\brief  Assign data */
   KOKKOS_INLINE_FUNCTION
   void assign_data( pointer_type arg_ptr )
-    { m_handle = handle_type( arg_ptr ); }
+    { m_impl_handle = handle_type( arg_ptr ); }
 
   //----------------------------------------
   /*  Allocate and construct mapped array.
@@ -2815,10 +2832,10 @@ public:
       , alloc_prop::allow_padding ? sizeof(value_type) : 0
       > padding ;
 
-    m_offset = offset_type( padding(), arg_layout );
+    m_impl_offset = offset_type( padding(), arg_layout );
 
     const size_t alloc_size =
-      ( m_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
+      ( m_impl_offset.span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
 
     // Create shared memory tracking record with allocate memory from the memory space
     record_type * const record =
@@ -2829,7 +2846,7 @@ public:
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
     if ( alloc_size ) {
 #endif
-    m_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) );
+    m_impl_handle = handle_type( reinterpret_cast< pointer_type >( record->data() ) );
 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
     }
 #endif
@@ -2840,8 +2857,8 @@ public:
       // Assume destruction is only required when construction is requested.
       // The ViewValueFunctor has both value construction and destruction operators.
       record->m_destroy = functor_type( ( (Kokkos::Impl::ViewCtorProp<void,execution_space> const &) arg_prop).value
-                                      , (value_type *) m_handle
-                                      , m_offset.span()
+                                      , (value_type *) m_impl_handle
+                                      , m_impl_offset.span()
                                       );
 
       // Construct values
@@ -2859,16 +2876,17 @@ public:
 template< class DstTraits , class SrcTraits >
 class ViewMapping< DstTraits , SrcTraits ,
   typename std::enable_if<(
-    /* default mappings */
+    !(std::is_same<typename SrcTraits::array_layout, LayoutStride>::value) && //Added to have a new specialization for SrcType of LayoutStride
+    // default mappings
     std::is_same< typename DstTraits::specialize , void >::value
     &&
     std::is_same< typename SrcTraits::specialize , void >::value
     &&
     (
-      /* same layout */
+      // same layout
       std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value
       ||
-      /* known layout */
+      // known layout
       (
         (
           std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
@@ -2968,8 +2986,127 @@ public:
         if(!assignable)
           Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension.");
       }
-      dst.m_offset = dst_offset_type( src.m_offset );
-      dst.m_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track );
+      dst.m_impl_offset = dst_offset_type( src.m_impl_offset );
+      dst.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_impl_handle , src_track );
+    }
+};
+
+//----------------------------------------------------------------------------
+//Create new specialization for SrcType of LayoutStride. Runtime check for compatible layout
+template< class DstTraits , class SrcTraits >
+class ViewMapping< DstTraits , SrcTraits ,
+  typename std::enable_if<(
+    std::is_same< typename SrcTraits::array_layout, Kokkos::LayoutStride >::value
+    &&
+    std::is_same< typename DstTraits::specialize , void >::value
+    &&
+    std::is_same< typename SrcTraits::specialize , void >::value
+    &&
+    (
+      // same layout
+      std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value
+      ||
+      // known layout
+      (
+        std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
+        std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
+        std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
+      )
+    )
+  )>::type >
+{
+private:
+
+  enum { is_assignable_space =
+   Kokkos::Impl::MemorySpaceAccess
+     < typename DstTraits::memory_space
+     , typename SrcTraits::memory_space >::assignable };
+
+  enum { is_assignable_value_type =
+    std::is_same< typename DstTraits::value_type
+                , typename SrcTraits::value_type >::value ||
+    std::is_same< typename DstTraits::value_type
+                , typename SrcTraits::const_value_type >::value };
+
+  enum { is_assignable_dimension =
+    ViewDimensionAssignable< typename DstTraits::dimension
+                           , typename SrcTraits::dimension >::value };
+
+public:
+
+  enum { is_assignable = is_assignable_space &&
+                         is_assignable_value_type &&
+                         is_assignable_dimension };
+
+  typedef Kokkos::Impl::SharedAllocationTracker  TrackType ;
+  typedef ViewMapping< DstTraits , void >  DstType ;
+  typedef ViewMapping< SrcTraits , void >  SrcType ;
+
+  KOKKOS_INLINE_FUNCTION
+  static bool assignable_layout_check(DstType & dst, const SrcType & src) //Runtime check
+    {
+      size_t strides[9];
+      bool assignable = true;
+      src.stride(strides);
+      size_t exp_stride = 1;
+      if (std::is_same< typename DstTraits::array_layout, Kokkos::LayoutLeft >::value) {
+	    for(int i=0; i<src.Rank; i++) {
+          if (i>0) exp_stride *= src.extent(i-1);
+          if (strides[i] != exp_stride){assignable=false;break;}
+	    }
+      }
+      else if (std::is_same< typename DstTraits::array_layout, Kokkos::LayoutRight >::value) {
+	    for(int i=src.Rank-1; i>=0; i--) {
+          if (i<src.Rank-1) exp_stride *= src.extent(i+1);
+          if (strides[i] != exp_stride){assignable=false;break;}
+	    }
+      }
+      return assignable; 
+    }
+
+  KOKKOS_INLINE_FUNCTION
+  static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
+    {
+      static_assert( is_assignable_space
+                   , "View assignment must have compatible spaces" );
+
+      static_assert( is_assignable_value_type
+                   , "View assignment must have same value type or const = non-const" );
+
+      static_assert( is_assignable_dimension
+                   , "View assignment must have compatible dimensions" );
+
+      bool assignable_layout = assignable_layout_check(dst, src); //Runtime check
+      if(!assignable_layout)
+          Kokkos::abort("View assignment must have compatible layouts\n");
+	  
+      typedef typename DstType::offset_type  dst_offset_type ;
+
+      if ( size_t(DstTraits::dimension::rank_dynamic) < size_t(SrcTraits::dimension::rank_dynamic) ) {
+        typedef typename DstTraits::dimension dst_dim;
+        bool assignable =
+          ( ( 1 > DstTraits::dimension::rank_dynamic && 1 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN0 == src.dimension_0() : true ) &&
+          ( ( 2 > DstTraits::dimension::rank_dynamic && 2 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN1 == src.dimension_1() : true ) &&
+          ( ( 3 > DstTraits::dimension::rank_dynamic && 3 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN2 == src.dimension_2() : true ) &&
+          ( ( 4 > DstTraits::dimension::rank_dynamic && 4 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN3 == src.dimension_3() : true ) &&
+          ( ( 5 > DstTraits::dimension::rank_dynamic && 5 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN4 == src.dimension_4() : true ) &&
+          ( ( 6 > DstTraits::dimension::rank_dynamic && 6 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN5 == src.dimension_5() : true ) &&
+          ( ( 7 > DstTraits::dimension::rank_dynamic && 7 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN6 == src.dimension_6() : true ) &&
+          ( ( 8 > DstTraits::dimension::rank_dynamic && 8 <= SrcTraits::dimension::rank_dynamic ) ?
+            dst_dim::ArgN7 == src.dimension_7() : true )
+          ;
+        if(!assignable)
+          Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension.");
+      }
+      dst.m_impl_offset = dst_offset_type( src.m_impl_offset );
+      dst.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_impl_handle , src_track );
     }
 };
 
@@ -3106,12 +3243,12 @@ public:
       typedef typename DstType::offset_type  dst_offset_type ;
 
       const SubviewExtents< SrcTraits::rank , rank >
-        extents( src.m_offset.m_dim , args... );
+        extents( src.m_impl_offset.m_dim , args... );
 
-      dst.m_offset = dst_offset_type( src.m_offset , extents );
+      dst.m_impl_offset = dst_offset_type( src.m_impl_offset , extents );
 
-      dst.m_handle = ViewDataHandle< DstTraits >::assign(src.m_handle,
-          src.m_offset( extents.domain_offset(0)
+      dst.m_impl_handle = ViewDataHandle< DstTraits >::assign(src.m_impl_handle,
+          src.m_impl_offset( extents.domain_offset(0)
                       , extents.domain_offset(1)
                       , extents.domain_offset(2)
                       , extents.domain_offset(3)
@@ -3152,6 +3289,7 @@ bool view_verify_operator_bounds
          && view_verify_operator_bounds<R+1>( map , args ... );
 }
 
+
 template< unsigned , class MapType >
 inline
 void view_error_operator_bounds( char * , int , const MapType & )
@@ -3176,6 +3314,7 @@ void view_error_operator_bounds
   view_error_operator_bounds<R+1>(buf+n,len-n,map,args...);
 }
 
+
 #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
 
 /* Check #3: is the View managed as determined by the MemoryTraits? */
@@ -3275,6 +3414,8 @@ void view_verify_operator_bounds
   }
 }
 
+
+
 } /* namespace Impl */
 } /* namespace Kokkos */
 
diff --git a/packages/kokkos/core/src/impl/Kokkos_ViewTile.hpp b/packages/kokkos/core/src/impl/Kokkos_ViewTile.hpp
index 42bc8c230..716b9ceca 100644
--- a/packages/kokkos/core/src/impl/Kokkos_ViewTile.hpp
+++ b/packages/kokkos/core/src/impl/Kokkos_ViewTile.hpp
@@ -202,8 +202,8 @@ struct ViewMapping
       typedef typename src_map_type::offset_type  src_offset_type ;
 
       dst = dst_map_type(
-         dst_handle_type( src.m_handle +
-                        ( ( i_tile0 + src.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
+         dst_handle_type( src.m_impl_handle +
+                        ( ( i_tile0 + src.m_impl_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
          dst_offset_type() );
     }
 };
diff --git a/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp b/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp
index 7dc8a5356..423944962 100644
--- a/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp
+++ b/packages/kokkos/core/src/impl/Kokkos_hwloc.cpp
@@ -336,11 +336,11 @@ Sentinel::Sentinel()
 
     const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 );
 
-    if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
+    if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) {
 
       hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc();
 
-      hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset );
+      hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->cpuset );
 
       bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology ,
                                         s_process_no_core_zero ,
@@ -402,14 +402,14 @@ Sentinel::Sentinel()
 
     const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );
 
-    if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
+    if ( hwloc_bitmap_intersects( s_process_binding , root->cpuset ) ) {
 
       ++root_count ;
 
       // Remember which root (NUMA) object the master thread is running on.
       // This will be logical NUMA rank #0 for this process.
 
-      if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
+      if ( hwloc_bitmap_intersects( proc_cpuset_location, root->cpuset ) ) {
         root_base = i ;
       }
 
@@ -417,7 +417,7 @@ Sentinel::Sentinel()
 
       const unsigned max_core =
         hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
-                                                root->allowed_cpuset ,
+                                                root->cpuset ,
                                                 HWLOC_OBJ_CORE );
 
       unsigned core_count = 0 ;
@@ -426,7 +426,7 @@ Sentinel::Sentinel()
 
         const hwloc_obj_t core =
           hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
-                                               root->allowed_cpuset ,
+                                               root->cpuset ,
                                                HWLOC_OBJ_CORE , j );
 
         // If process' cpuset intersects core's cpuset then process can access this core.
@@ -438,13 +438,13 @@ Sentinel::Sentinel()
         // This assumes that it would be performance-detrimental
         // to spawn more than one MPI process per core and use nested threading.
 
-        if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
+        if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) {
 
           ++core_count ;
 
           const unsigned pu_count =
             hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
-                                                    core->allowed_cpuset ,
+                                                    core->cpuset ,
                                                     HWLOC_OBJ_PU );
 
           if ( pu_per_core == 0 ) pu_per_core = pu_count ;
@@ -480,11 +480,11 @@ Sentinel::Sentinel()
 
     const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );
 
-    if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
+    if ( hwloc_bitmap_intersects( s_process_binding , root->cpuset ) ) {
 
       const unsigned max_core =
         hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
-                                                root->allowed_cpuset ,
+                                                root->cpuset ,
                                                 HWLOC_OBJ_CORE );
 
       unsigned core_count = 0 ;
@@ -493,12 +493,12 @@ Sentinel::Sentinel()
 
         const hwloc_obj_t core =
           hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
-                                               root->allowed_cpuset ,
+                                               root->cpuset ,
                                                HWLOC_OBJ_CORE , j );
 
-        if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
+        if ( hwloc_bitmap_intersects( s_process_binding , core->cpuset ) ) {
 
-          s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;
+          s_core[ core_count + core_per_root * i ] = core->cpuset ;
 
           ++core_count ;
         }
diff --git a/packages/kokkos/core/src/kokkos.pc.in b/packages/kokkos/core/src/kokkos.pc.in
new file mode 100644
index 000000000..f27b57c96
--- /dev/null
+++ b/packages/kokkos/core/src/kokkos.pc.in
@@ -0,0 +1,71 @@
+# 
+#                        Kokkos v. 2.0
+#              Copyright (2014) Sandia Corporation
+# 
+# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+# the U.S. Government retains certain rights in this software.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the Corporation nor the names of the
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+# 
+
+# Add the directory where kokkos.pc got installed to your PKG_CONFIG_PATH
+
+# Use this on commandline with:
+# c++ `pkg-config --cflags --libs kokkos` -o myapp myapp.cpp
+
+# Use this in a Makefile:
+# myapp: myapp.cpp
+#       $(CC) `pkg-config --cflags --libs kokkos` -o $@ $<
+
+# Use this in autotools:
+# configure.ac:
+# PKG_CHECK_MODULES([KOKKOS], [kokkos])
+# Makefile.am:
+# myapp_CFLAGS = $(KOKKOS_CFLAGS)
+# myapp_LDADD = $(KOKKOS_LIBS)
+
+# Use this in CMake:
+# CMakeLists.txt:
+# find_package(PkgConfig)
+# pkg_check_modules(KOKKOS IMPORTED_TARGET kokkos)
+# target_link_libraries(<lib> PkgConfig::KOKKOS)
+
+prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${prefix}/lib
+includedir=${prefix}/include
+
+Name: kokkos
+Description: Kokkos C++ Performance Portability Programming EcoSystem
+URL: https://github.com/kokkos
+Version:
+Requires:
+Libs: -L${libdir} -lkokkos @KOKKOS_EXTRA_LIBS_LIST@ @KOKKOS_LINK_FLAGS@
+Libs.private: -lm
+Cflags: -I${includedir} @KOKKOS_CXXFLAGS@
diff --git a/packages/kokkos/core/unit_test/CMakeLists.txt b/packages/kokkos/core/unit_test/CMakeLists.txt
index 651abf04d..fad4e1d45 100644
--- a/packages/kokkos/core/unit_test/CMakeLists.txt
+++ b/packages/kokkos/core/unit_test/CMakeLists.txt
@@ -284,6 +284,7 @@ IF(Kokkos_ENABLE_Cuda)
     SOURCES
       UnitTestMainInit.cpp
       cuda/TestCudaHostPinned_SharedAlloc.cpp
+      cuda/TestCudaHostPinned_ViewCopy.cpp
       cuda/TestCudaHostPinned_ViewAPI_a.cpp
       cuda/TestCudaHostPinned_ViewAPI_b.cpp
       cuda/TestCudaHostPinned_ViewAPI_c.cpp
@@ -293,6 +294,7 @@ IF(Kokkos_ENABLE_Cuda)
       cuda/TestCudaHostPinned_ViewMapping_b.cpp
       cuda/TestCudaHostPinned_ViewMapping_subview.cpp
       cuda/TestCudaUVM_SharedAlloc.cpp
+      cuda/TestCudaUVM_ViewCopy.cpp
       cuda/TestCudaUVM_ViewAPI_a.cpp
       cuda/TestCudaUVM_ViewAPI_b.cpp
       cuda/TestCudaUVM_ViewAPI_c.cpp
diff --git a/packages/kokkos/core/unit_test/Makefile b/packages/kokkos/core/unit_test/Makefile
index b50222e37..72832271c 100644
--- a/packages/kokkos/core/unit_test/Makefile
+++ b/packages/kokkos/core/unit_test/Makefile
@@ -43,12 +43,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 	OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o
 	OBJ_CUDA += TestCuda_RangePolicy.o
 	OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o
-	OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o
-	OBJ_CUDA += TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o
+	OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewLayoutStrideAssignment.o
+	OBJ_CUDA += TestCudaUVM_ViewCopy.o TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o
 	OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o
-	OBJ_CUDA += TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o
+	OBJ_CUDA += TestCudaHostPinned_ViewCopy.o TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o
 	OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o
-        OBJ_CUDA += TestCuda_View_64bit.o
+	OBJ_CUDA += TestCuda_View_64bit.o
 	OBJ_CUDA += TestCuda_ViewOfClass.o
 	OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o
 	OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o
@@ -57,13 +57,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 	OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o
 	OBJ_CUDA += TestCuda_SubView_c13.o
 	OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o
+	OBJ_CUDA += TestCuda_Reductions_DeviceView.o
 	OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o
 	OBJ_CUDA += TestCuda_Complex.o
 	OBJ_CUDA += TestCuda_AtomicOperations_int.o TestCuda_AtomicOperations_unsignedint.o TestCuda_AtomicOperations_longint.o 
 	OBJ_CUDA += TestCuda_AtomicOperations_unsignedlongint.o TestCuda_AtomicOperations_longlongint.o TestCuda_AtomicOperations_double.o TestCuda_AtomicOperations_float.o
 	OBJ_CUDA += TestCuda_AtomicViews.o TestCuda_Atomics.o
 	OBJ_CUDA += TestCuda_Team.o TestCuda_TeamScratch.o
-	OBJ_CUDA += TestCuda_TeamReductionScan.o
+	OBJ_CUDA += TestCuda_TeamReductionScan.o TestCuda_TeamTeamSize.o
 	OBJ_CUDA += TestCuda_Other.o
 	OBJ_CUDA += TestCuda_MDRange_a.o TestCuda_MDRange_b.o TestCuda_MDRange_c.o TestCuda_MDRange_d.o TestCuda_MDRange_e.o
 	OBJ_CUDA += TestCuda_Crs.o
@@ -84,13 +85,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
 	OBJ_ROCM += TestROCm_AtomicOperations_int.o TestROCm_AtomicOperations_unsignedint.o TestROCm_AtomicOperations_longint.o 
 	OBJ_ROCM += TestROCm_AtomicOperations_unsignedlongint.o TestROCm_AtomicOperations_longlongint.o TestROCm_AtomicOperations_double.o TestROCm_AtomicOperations_float.o
         OBJ_ROCM += TestROCm_Atomics.o
-# complex failing
         OBJ_ROCM += TestROCm_AtomicViews.o
         OBJ_ROCM += TestROCm_Other.o
-# Compiles but runtime Segfaults:
-#        OBJ_ROCM += TestROCm_MDRange_a.o TestROCm_MDRange_b.o TestROCm_MDRange_c.o TestROCm_MDRange_d.o TestROCm_MDRange_e.o
-# rocm.memory_pool
-	OBJ_ROCM += TestROCm_Reductions.o 
+        OBJ_ROCM += TestROCm_MDRange_a.o TestROCm_MDRange_b.o TestROCm_MDRange_c.o TestROCm_MDRange_d.o TestROCm_MDRange_e.o
+        OBJ_ROCM += TestROCm_MDRangeReduce_a.o TestROCm_MDRangeReduce_b.o TestROCm_MDRangeReduce_c.o TestROCm_MDRangeReduce_d.o TestROCm_MDRangeReduce_e.o
+	OBJ_ROCM += TestROCm_Reductions.o
 	OBJ_ROCM += TestROCm_Reducers_a.o TestROCm_Reducers_b.o TestROCm_Reducers_c.o TestROCm_Reducers_d.o
         OBJ_ROCM += TestROCm_Scan.o
         OBJ_ROCM += TestROCm_SharedAlloc.o
@@ -108,22 +107,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
         OBJ_ROCM += TestROCm_SubView_c10.o
         OBJ_ROCM += TestROCm_SubView_c11.o
         OBJ_ROCM += TestROCm_SubView_c12.o
+        OBJ_ROCM += TestROCm_SubView_c13.o
         OBJ_ROCM += TestROCm_Team.o
-# compile fails / compiler segfaults
-        #OBJ_ROCM += TestROCm_TeamReductionScan.o
-# compile fails
-        OBJ_ROCM += TestROCm_TeamScratch.o
+        OBJ_ROCM += TestROCm_TeamReductionScan.o
+        OBJ_ROCM += TestROCm_TeamScratch.o TestROCm_TeamTeamSize.o
         OBJ_ROCM += TestROCm_ViewAPI_a.o TestROCm_ViewAPI_b.o TestROCm_ViewAPI_c.o TestROCm_ViewAPI_d.o TestROCm_ViewAPI_e.o
         OBJ_ROCM += TestROCm_ViewMapping_a.o
         OBJ_ROCM += TestROCm_ViewMapping_b.o
         OBJ_ROCM += TestROCm_ViewMapping_subview.o
-        OBJ_ROCM += TestROCmHostPinned_ViewAPI_a.o TestROCmHostPinned_ViewAPI_b.o TestROCmHostPinned_ViewAPI_c.o TestROCmHostPinned_ViewAPI_d.o TestROCmHostPinned_ViewAPI_e.o
+        OBJ_ROCM += TestROCmHostPinned_ViewCopy.o TestROCmHostPinned_ViewAPI_a.o TestROCmHostPinned_ViewAPI_b.o TestROCmHostPinned_ViewAPI_c.o TestROCmHostPinned_ViewAPI_d.o TestROCmHostPinned_ViewAPI_e.o
         OBJ_ROCM += TestROCmHostPinned_View_64bit.o
-	    OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o 
-	    OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o 
-	    OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o
+        OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o 
+        OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o 
+        OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o
         OBJ_ROCM += TestROCm_ViewOfClass.o
-	    OBJ_ROCM += TestROCm_Spaces.o
+        OBJ_ROCM += TestROCm_Spaces.o
+        OBJ_ROCM += TestROCm_Crs.o
      
         TARGETS += KokkosCore_UnitTest_ROCm
         TEST_TARGETS += test-rocm
@@ -137,7 +136,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
 	OBJ_THREADS += TestThreads_RangePolicy.o
     OBJ_THREADS += TestThreads_View_64bit.o
 	OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o
-	OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o
+	OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o TestThreads_ViewLayoutStrideAssignment.o
 	OBJ_THREADS += TestThreads_ViewOfClass.o
 	OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o
 	OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o
@@ -145,12 +144,13 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
 	OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o
 	OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o
 	OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o
+	OBJ_THREADS += TestThreads_Reductions_DeviceView.o
 	OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o
 	OBJ_THREADS += TestThreads_Complex.o
 	OBJ_THREADS += TestThreads_AtomicOperations_int.o TestThreads_AtomicOperations_unsignedint.o TestThreads_AtomicOperations_longint.o 
 	OBJ_THREADS += TestThreads_AtomicOperations_unsignedlongint.o TestThreads_AtomicOperations_longlongint.o TestThreads_AtomicOperations_double.o TestThreads_AtomicOperations_float.o
 	OBJ_THREADS += TestThreads_AtomicViews.o TestThreads_Atomics.o
-	OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o
+	OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o TestThreads_TeamTeamSize.o
 	OBJ_THREADS += TestThreads_TeamReductionScan.o
 	OBJ_THREADS += TestThreads_Other.o
 	OBJ_THREADS += TestThreads_MDRange_a.o TestThreads_MDRange_b.o TestThreads_MDRange_c.o TestThreads_MDRange_d.o TestThreads_MDRange_e.o
@@ -167,7 +167,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
 	OBJ_OPENMP += TestOpenMP_RangePolicy.o
     OBJ_OPENMP += TestOpenMP_View_64bit.o
 	OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o
-	OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o
+	OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o TestOpenMP_ViewLayoutStrideAssignment.o
 	OBJ_OPENMP += TestOpenMP_ViewOfClass.o
 	OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o
 	OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o
@@ -176,13 +176,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
 	OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o
 	OBJ_OPENMP += TestOpenMP_SubView_c13.o
 	OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o
+	OBJ_OPENMP += TestOpenMP_Reductions_DeviceView.o
 	OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o
 	OBJ_OPENMP += TestOpenMP_Complex.o
 	OBJ_OPENMP += TestOpenMP_AtomicOperations_int.o TestOpenMP_AtomicOperations_unsignedint.o TestOpenMP_AtomicOperations_longint.o 
 	OBJ_OPENMP += TestOpenMP_AtomicOperations_unsignedlongint.o TestOpenMP_AtomicOperations_longlongint.o TestOpenMP_AtomicOperations_double.o TestOpenMP_AtomicOperations_float.o
 	OBJ_OPENMP += TestOpenMP_AtomicViews.o TestOpenMP_Atomics.o
 	OBJ_OPENMP += TestOpenMP_Team.o TestOpenMP_TeamScratch.o
-	OBJ_OPENMP += TestOpenMP_TeamReductionScan.o
+	OBJ_OPENMP += TestOpenMP_TeamReductionScan.o TestOpenMP_TeamTeamSize.o
 	OBJ_OPENMP += TestOpenMP_Other.o
 	OBJ_OPENMP += TestOpenMP_MDRange_a.o TestOpenMP_MDRange_b.o TestOpenMP_MDRange_c.o TestOpenMP_MDRange_d.o TestOpenMP_MDRange_e.o
 	OBJ_OPENMP += TestOpenMP_Crs.o
@@ -256,7 +257,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
         OBJ_SERIAL += TestSerial_RangePolicy.o
         OBJ_SERIAL += TestSerial_View_64bit.o
         OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o
-        OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o
+        OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o TestSerial_ViewLayoutStrideAssignment.o
         OBJ_SERIAL += TestSerial_ViewOfClass.o
         OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o
         OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o
@@ -265,13 +266,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
         OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o
         OBJ_SERIAL += TestSerial_SubView_c13.o
         OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o
+	OBJ_SERIAL += TestSerial_Reductions_DeviceView.o
     	OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o
         OBJ_SERIAL += TestSerial_Complex.o
 	    OBJ_SERIAL += TestSerial_AtomicOperations_int.o TestSerial_AtomicOperations_unsignedint.o TestSerial_AtomicOperations_longint.o 
 	    OBJ_SERIAL += TestSerial_AtomicOperations_unsignedlongint.o TestSerial_AtomicOperations_longlongint.o TestSerial_AtomicOperations_double.o TestSerial_AtomicOperations_float.o
 	    OBJ_SERIAL += TestSerial_AtomicViews.o TestSerial_Atomics.o
         OBJ_SERIAL += TestSerial_Team.o TestSerial_TeamScratch.o
-        OBJ_SERIAL += TestSerial_TeamReductionScan.o
+        OBJ_SERIAL += TestSerial_TeamReductionScan.o TestSerial_TeamTeamSize.o
         OBJ_SERIAL += TestSerial_Other.o
         #HCC_WORKAROUND
         ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
diff --git a/packages/kokkos/core/unit_test/TestAtomic.hpp b/packages/kokkos/core/unit_test/TestAtomic.hpp
index cf4bae584..58b632511 100644
--- a/packages/kokkos/core/unit_test/TestAtomic.hpp
+++ b/packages/kokkos/core/unit_test/TestAtomic.hpp
@@ -224,7 +224,8 @@ T AddLoop( int loop ) {
 
   struct AddFunctorReduce< T, execution_space > f_add_red;
   f_add_red.data = data;
-  Kokkos::parallel_reduce( loop, f_add_red );
+  int dummy_result;
+  Kokkos::parallel_reduce( loop, f_add_red , dummy_result );
   execution_space::fence();
 
   return val;
@@ -309,7 +310,8 @@ T CASLoop( int loop ) {
 
   struct CASFunctorReduce< T, execution_space > f_cas_red;
   f_cas_red.data = data;
-  Kokkos::parallel_reduce( loop, f_cas_red );
+  int dummy_result;
+  Kokkos::parallel_reduce( loop, f_cas_red , dummy_result );
   execution_space::fence();
 
   return val;
@@ -401,7 +403,8 @@ T ExchLoop( int loop ) {
   struct ExchFunctorReduce< T, execution_space > f_exch_red;
   f_exch_red.data = data;
   f_exch_red.data2 = data2;
-  Kokkos::parallel_reduce( loop, f_exch_red );
+  int dummy_result;
+  Kokkos::parallel_reduce( loop, f_exch_red , dummy_result );
   execution_space::fence();
 
   return val;
@@ -529,7 +532,7 @@ TEST_F( TEST_CATEGORY, atomics )
   ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 3 ) ) );
 
 #ifndef KOKKOS_ENABLE_OPENMPTARGET
-#ifndef KOKKOS_ENABLE_ROCM
+#ifndef KOKKOS_ENABLE_ROCM // ROCM doesn't yet support atomics for >64bit types
   ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 1 ) ) );
   ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 2 ) ) );
   ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 3 ) ) );
diff --git a/packages/kokkos/core/unit_test/TestCXX11.hpp b/packages/kokkos/core/unit_test/TestCXX11.hpp
index b6c34d2d4..8a158e266 100644
--- a/packages/kokkos/core/unit_test/TestCXX11.hpp
+++ b/packages/kokkos/core/unit_test/TestCXX11.hpp
@@ -216,7 +216,7 @@ template< class DeviceType, bool PWRTest >
 double ReduceTestFunctor() {
   typedef Kokkos::TeamPolicy< DeviceType > policy_type;
   typedef Kokkos::View< double**, DeviceType > view_type;
-  typedef Kokkos::View< double, typename view_type::host_mirror_space, Kokkos::MemoryUnmanaged > unmanaged_result;
+  typedef Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > unmanaged_result;
 
   view_type a( "A", 100, 5 );
   typename view_type::HostMirror h_a = Kokkos::create_mirror_view( a );
@@ -244,7 +244,7 @@ template< class DeviceType, bool PWRTest >
 double ReduceTestLambda() {
   typedef Kokkos::TeamPolicy< DeviceType > policy_type;
   typedef Kokkos::View< double**, DeviceType > view_type;
-  typedef Kokkos::View< double, typename view_type::host_mirror_space, Kokkos::MemoryUnmanaged > unmanaged_result;
+  typedef Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > unmanaged_result;
 
   view_type a( "A", 100, 5 );
   typename view_type::HostMirror h_a = Kokkos::create_mirror_view( a );
@@ -327,12 +327,18 @@ bool Test( int test ) {
                            };
   bool passed = true;
 
-  if ( res_functor != res_lambda ) {
+  auto a = res_functor;
+  auto b = res_lambda;
+  // use a tolerant comparison because functors and lambdas vectorize differently
+  // https://github.com/trilinos/Trilinos/issues/3233
+  auto rel_err = (std::abs(b - a) / std::max(std::abs(a), std::abs(b)));
+  auto tol = 1e-14;
+  if (rel_err > tol) {
     passed = false;
 
     std::cout << "CXX11 ( test = '"
-              << testnames[test] << "' FAILED : "
-              << res_functor << " != " << res_lambda
+              << testnames[test] << "' FAILED : relative error "
+              << rel_err << " > tolerance " << tol
               << std::endl;
   }
 
diff --git a/packages/kokkos/core/unit_test/TestComplex.hpp b/packages/kokkos/core/unit_test/TestComplex.hpp
index fcaebe3c6..f4343df0c 100644
--- a/packages/kokkos/core/unit_test/TestComplex.hpp
+++ b/packages/kokkos/core/unit_test/TestComplex.hpp
@@ -71,7 +71,7 @@ struct TestComplexConstruction {
     ASSERT_FLOAT_EQ(h_results(7).real(),7.5);  ASSERT_FLOAT_EQ(h_results(7).imag(),0.0);
     ASSERT_FLOAT_EQ(h_results(8).real(),double(8));  ASSERT_FLOAT_EQ(h_results(8).imag(),0.0);
 
-#ifndef KOKKOS_ENABLE_ROCM
+#ifndef KOKKOS_ENABLE_ROCM // Copy construction conversion between Kokkos::complex and std::complex doesn't compile
     Kokkos::complex<double> a(1.5,2.5),b(3.25,5.25),r_kk;
     std::complex<double> sa(a),sb(3.25,5.25),r;
     r = a; r_kk = a;         ASSERT_FLOAT_EQ(r.real(),r_kk.real()); ASSERT_FLOAT_EQ(r.imag(),r_kk.imag());
diff --git a/packages/kokkos/core/unit_test/TestMDRange.hpp b/packages/kokkos/core/unit_test/TestMDRange.hpp
index 9298983aa..88b3a9b0c 100644
--- a/packages/kokkos/core/unit_test/TestMDRange.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange.hpp
@@ -318,6 +318,24 @@ struct TestMDRange_2D {
 
       ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) );
     }
+    // Test with reducers - scalar + label
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type;
+      int s0 = 1;
+      int s1 = 1;
+      range_type range( {{ s0, s1 }}, {{ N0, N1 }}, {{ 3, 3 }} );
+
+      TestMDRange_2D functor( N0, N1 );
+
+      parallel_for( "rank2-parfor-label", range, functor );
+
+      value_type sum = 0.0;
+      Kokkos::Sum< value_type > reducer_scalar( sum );
+
+      parallel_reduce( "rank2-reducer-label", range, functor, reducer_scalar );
+
+      ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) );
+    }
     // Test with reducers - scalar view
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type;
@@ -337,7 +355,31 @@ struct TestMDRange_2D {
 
       ASSERT_EQ( sum, 2 * N0 * N1 );
     }
+    // Test Min reducer with lambda
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type;
+      range_type range( {{ 1, 1 }}, {{ N0, N1 }}, {{ 3, 3 }} );
+
+      Kokkos::View< double**, ExecSpace > v_in("v_in", N0, N1 );
+
+      parallel_for( "rank2-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j ) {
+          v_in( i , j ) = (i+1) * (j+1) ;
+        });
 
+      double min;
+      Kokkos::Min< double > reducer_scalar( min );
+
+      parallel_reduce( "rank2-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, double& min_val ) {
+            min_val = fmin( v_in(i,j), min_val );
+          }
+        , reducer_scalar);
+
+      ASSERT_EQ( min, 4.0 );
+    }
+#endif
+#endif
     // Tagged operator test
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int>, InitTag > range_type;
@@ -858,6 +900,22 @@ struct TestMDRange_3D {
 
       ASSERT_EQ( sum, 2 * N0 * N1 * N2 );
     }
+    // Test with reducers - scalar + label
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<3>, Kokkos::IndexType<int> > range_type;
+      range_type range( {{ 0, 0, 0 }}, {{ N0, N1, N2 }}, {{ 3, 3, 3 }} );
+
+      TestMDRange_3D functor( N0, N1, N2 );
+
+      parallel_for( "rank3-parfor-label", range, functor );
+
+      value_type sum = 0.0;
+      Kokkos::Sum< value_type > reducer_scalar( sum );
+
+      parallel_reduce( "rank3-reducer-label", range, functor, reducer_scalar );
+
+      ASSERT_EQ( sum, 2 * N0 * N1 * N2 );
+    }
     // Test with reducers - scalar view
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<3>, Kokkos::IndexType<int> > range_type;
@@ -877,6 +935,31 @@ struct TestMDRange_3D {
 
       ASSERT_EQ( sum, 2 * N0 * N1 * N2 );
     }
+    // Test Min reducer with lambda
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<3>, Kokkos::IndexType<int> > range_type;
+
+      range_type range( {{ 1, 1, 1 }}, {{ N0, N1, N2 }}, {{ 3, 3, 3 }} );
+
+      Kokkos::View< double***, ExecSpace > v_in("v_in", N0, N1, N2 );
+
+      parallel_for( "rank3-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j, const int k ) {
+          v_in( i, j, k ) = (i+1) * (j+1) * (k+1) ;
+        });
+
+      double min;
+
+      parallel_reduce("rank3-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, double& min_val ) {
+            min_val = (v_in(i,j,k) < min_val) ? v_in(i,j,k) : min_val;
+          }
+        , Kokkos::Min<double>(min) );
+
+      ASSERT_EQ( min, 8.0 );
+    }
+#endif
+#endif
 
     // Tagged operator test
     {
@@ -1382,6 +1465,23 @@ struct TestMDRange_4D {
       ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 );
     }
 
+    // Test with reducers - scalar + label
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4>, Kokkos::IndexType<int> > range_type;
+      range_type range( {{ 0, 0, 0, 0 }}, {{ N0, N1, N2, N3 }}, {{ 3, 3, 3, 3 }} );
+
+      TestMDRange_4D functor( N0, N1, N2, N3 );
+
+      parallel_for( "rank4-parfor-label", range, functor );
+
+      value_type sum = 0.0;
+      Kokkos::Sum< value_type > reducer_scalar( sum );
+
+      parallel_reduce( "rank4-reducer-label", range, functor, reducer_scalar );
+
+      ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 );
+    }
+
     // Test with reducers - scalar view
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4>, Kokkos::IndexType<int> > range_type;
@@ -1402,6 +1502,32 @@ struct TestMDRange_4D {
       ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 );
     }
 
+    // Test Min reducer with lambda
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4>, Kokkos::IndexType<int> > range_type;
+
+      range_type range( {{ 1, 1, 1, 1 }}, {{ N0, N1, N2, N3 }}, {{ 3, 3, 3, 3 }} );
+
+      Kokkos::View< double****, ExecSpace > v_in("v_in", N0, N1, N2, N3 );
+
+      parallel_for( "rank4-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l ) {
+          v_in( i, j, k, l ) = (i+1) * (j+1) * (k+1) * (l+1) ;
+        });
+
+      double min;
+
+      parallel_reduce("rank4-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, double& min_val ) {
+            min_val = (v_in(i,j,k,l) < min_val) ? v_in(i,j,k,l) : min_val;
+          }
+        , Kokkos::Min<double>(min) );
+
+      ASSERT_EQ( min, 16.0 );
+    }
+#endif
+#endif
+
     // Tagged operator test
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int>, InitTag > range_type;
@@ -1926,6 +2052,23 @@ struct TestMDRange_5D {
       ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 );
     }
 
+    // Test with reducers - scalar + label
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5>, Kokkos::IndexType<int> > range_type;
+      range_type range( {{ 0, 0, 0, 0, 0 }}, {{ N0, N1, N2, N3, N4 }}, {{ 3, 3, 3, 3, 3 }} );
+
+      TestMDRange_5D functor( N0, N1, N2, N3, N4 );
+
+      parallel_for( "rank5-parfor-label", range, functor );
+
+      value_type sum = 0.0;
+      Kokkos::Sum< value_type > reducer_scalar( sum );
+
+      parallel_reduce( "rank5-reducer-label", range, functor, reducer_scalar );
+
+      ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 );
+    }
+
     // Test with reducers - scalar view
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5>, Kokkos::IndexType<int> > range_type;
@@ -1946,6 +2089,32 @@ struct TestMDRange_5D {
       ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 );
     }
 
+    // Test Min reducer with lambda
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5>, Kokkos::IndexType<int> > range_type;
+
+      range_type range( {{ 1, 1, 1, 1, 1 }}, {{ N0, N1, N2, N3, N4 }}, {{ 3, 3, 3, 2, 2 }} );
+
+      Kokkos::View< double*****, ExecSpace > v_in("v_in", N0, N1, N2, N3, N4 );
+
+      parallel_for( "rank5-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, const int m ) {
+          v_in( i, j, k, l, m ) = (i+1) * (j+1) * (k+1) * (l+1) * (m+1) ;
+        });
+
+      double min;
+
+      parallel_reduce("rank5-min-reducer", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, const int m, double& min_val ) {
+            min_val = (v_in(i,j,k,l,m) < min_val) ? v_in(i,j,k,l,m) : min_val;
+          }
+        , Kokkos::Min<double>(min) );
+
+      ASSERT_EQ( min, 32.0 );
+    }
+#endif
+#endif
+
     // Tagged operator test
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int>, InitTag > range_type;
@@ -2401,6 +2570,23 @@ struct TestMDRange_6D {
       ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 );
     }
 
+    // Test with reducers - scalar + label
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int> > range_type;
+      range_type range( {{ 0, 0, 0, 0, 0, 0 }}, {{ N0, N1, N2, N3, N4, N5 }}, {{ 3, 3, 3, 3, 3, 2 }} );
+
+      TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 );
+
+      parallel_for( "rank6-parfor-label", range, functor );
+
+      value_type sum = 0.0;
+      Kokkos::Sum< value_type > reducer_scalar( sum );
+
+      parallel_reduce( "rank6-reducer-label", range, functor, reducer_scalar );
+
+      ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 );
+    }
+
     // Test with reducers - scalar view
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int> > range_type;
@@ -2421,6 +2607,32 @@ struct TestMDRange_6D {
       ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 );
     }
 
+    // Test Min reducer with lambda
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
+    {
+      typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int> > range_type;
+
+      range_type range( {{ 1, 1, 1, 1, 1, 1 }}, {{ N0, N1, N2, N3, N4, N5 }}, {{ 3, 3, 3, 2, 2, 1 }} );
+
+      Kokkos::View< double******, ExecSpace > v_in("v_in", N0, N1, N2, N3, N4, N5 );
+
+      parallel_for( "rank6-init-lambda", range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, const int m, const int n ) {
+          v_in( i, j, k, l, m, n ) = (i+1) * (j+1) * (k+1) * (l+1) * (m+1) * (n+1) ;
+        });
+
+      double min;
+
+      parallel_reduce("rank6-min-reducer",  range, KOKKOS_LAMBDA ( const int i, const int j, const int k, const int l, const int m, const int n, double& min_val ) {
+            min_val = (v_in(i,j,k,l,m,n) < min_val) ? v_in(i,j,k,l,m,n) : min_val;
+          }
+        , Kokkos::Min<double>(min) );
+
+      ASSERT_EQ( min, 64.0 );
+    }
+#endif
+#endif
+
     // Tagged operator test
     {
       typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int>, InitTag > range_type;
diff --git a/packages/kokkos/core/unit_test/TestMDRange_a.hpp b/packages/kokkos/core/unit_test/TestMDRange_a.hpp
index 5de5225eb..308b3a302 100644
--- a/packages/kokkos/core/unit_test/TestMDRange_a.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange_a.hpp
@@ -44,8 +44,12 @@
 #include<TestMDRange.hpp>
 
 namespace Test {
+
 TEST_F( TEST_CATEGORY , mdrange_5d ) {
+#if !defined ( KOKKOS_ENABLE_ROCM ) // MDRange Reduce explicitly handled in its own cpp file
   TestMDRange_5D< TEST_EXECSPACE >::test_reduce5( 100, 10, 10, 10, 5 );
+#endif
   TestMDRange_5D< TEST_EXECSPACE >::test_for5( 100, 10, 10, 10, 5 );
 }
+
 }
diff --git a/packages/kokkos/core/unit_test/TestMDRange_b.hpp b/packages/kokkos/core/unit_test/TestMDRange_b.hpp
index 60ece56aa..e714f1839 100644
--- a/packages/kokkos/core/unit_test/TestMDRange_b.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange_b.hpp
@@ -45,10 +45,11 @@
 
 namespace Test {
 
-
 TEST_F( TEST_CATEGORY , mdrange_6d ) {
   TestMDRange_6D< TEST_EXECSPACE >::test_for6( 10, 10, 10, 10, 5, 5 );
+#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduce explicitly handled in its own cpp file
   TestMDRange_6D< TEST_EXECSPACE >::test_reduce6( 100, 10, 10, 10, 5, 5 );
+#endif
 }
 
 }
diff --git a/packages/kokkos/core/unit_test/TestMDRange_c.hpp b/packages/kokkos/core/unit_test/TestMDRange_c.hpp
index 029b1e2b1..810e1d82b 100644
--- a/packages/kokkos/core/unit_test/TestMDRange_c.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange_c.hpp
@@ -46,8 +46,10 @@
 namespace Test {
 
 TEST_F( TEST_CATEGORY , mdrange_2d) {
-  TestMDRange_2D< TEST_EXECSPACE >::test_for2( 100, 100 );
+#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduce explicitly handled in its own cpp file
   TestMDRange_2D< TEST_EXECSPACE >::test_reduce2( 100, 100 );
+#endif
+  TestMDRange_2D< TEST_EXECSPACE >::test_for2( 100, 100 );
 }
 
 TEST_F( TEST_CATEGORY , mdrange_array_reduce ) {
diff --git a/packages/kokkos/core/unit_test/TestMDRange_d.hpp b/packages/kokkos/core/unit_test/TestMDRange_d.hpp
index 240df9aec..1a477a228 100644
--- a/packages/kokkos/core/unit_test/TestMDRange_d.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange_d.hpp
@@ -44,9 +44,12 @@
 #include<TestMDRange.hpp>
 
 namespace Test {
+
 TEST_F( TEST_CATEGORY , mdrange_3d) {
   TestMDRange_3D< TEST_EXECSPACE >::test_for3( 100, 10, 100 );
+#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduced explicitly handled in its own cpp file
   TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 100, 10, 100 );
+#endif
 }
 
 TEST_F( TEST_CATEGORY , mdrange_neg_idx ) {
@@ -56,4 +59,5 @@ TEST_F( TEST_CATEGORY , mdrange_neg_idx ) {
   TestMDRange_5D_NegIdx< TEST_EXECSPACE >::test_5D_negidx( 128, 32, 8, 8, 4 );
   TestMDRange_6D_NegIdx< TEST_EXECSPACE >::test_6D_negidx( 128, 32, 8, 8, 4, 2 );
 }
+
 }
diff --git a/packages/kokkos/core/unit_test/TestMDRange_e.hpp b/packages/kokkos/core/unit_test/TestMDRange_e.hpp
index 8162184c9..a62672535 100644
--- a/packages/kokkos/core/unit_test/TestMDRange_e.hpp
+++ b/packages/kokkos/core/unit_test/TestMDRange_e.hpp
@@ -44,8 +44,12 @@
 #include<TestMDRange.hpp>
 
 namespace Test {
+
 TEST_F( TEST_CATEGORY , mdrange_4d ) {
+#if !defined( KOKKOS_ENABLE_ROCM ) // MDRange Reduce explicitly handled in its own cpp file
   TestMDRange_4D< TEST_EXECSPACE >::test_reduce4( 100, 10, 10, 10 );
+#endif
   TestMDRange_4D< TEST_EXECSPACE >::test_for4( 100, 10, 10, 10 );
 }
+
 }
diff --git a/packages/kokkos/core/unit_test/TestMemoryPool.hpp b/packages/kokkos/core/unit_test/TestMemoryPool.hpp
index 9fb1d900f..00079e02e 100644
--- a/packages/kokkos/core/unit_test/TestMemoryPool.hpp
+++ b/packages/kokkos/core/unit_test/TestMemoryPool.hpp
@@ -626,7 +626,9 @@ TEST_F( TEST_CATEGORY, memory_pool )
   TestMemoryPool::test_host_memory_pool_stats<>();
   TestMemoryPool::test_memory_pool_v2< TEST_EXECSPACE >(false,false);
   TestMemoryPool::test_memory_pool_corners< TEST_EXECSPACE >(false,false);
+#ifdef KOKKOS_ENABLE_LARGE_MEM_TESTS
   TestMemoryPool::test_memory_pool_huge< TEST_EXECSPACE >();
+#endif
 }
 
 }
diff --git a/packages/kokkos/core/unit_test/TestReduce.hpp b/packages/kokkos/core/unit_test/TestReduce.hpp
index 5748df1f1..924d8eb45 100644
--- a/packages/kokkos/core/unit_test/TestReduce.hpp
+++ b/packages/kokkos/core/unit_test/TestReduce.hpp
@@ -63,9 +63,11 @@ public:
 
   const size_type nwork;
 
+  KOKKOS_INLINE_FUNCTION
   ReduceFunctor( const size_type & arg_nwork )
     : nwork( arg_nwork ) {}
 
+  KOKKOS_INLINE_FUNCTION
   ReduceFunctor( const ReduceFunctor & rhs )
     : nwork( rhs.nwork ) {}
 
@@ -102,6 +104,7 @@ class ReduceFunctorFinal : public ReduceFunctor< long, DeviceType > {
 public:
   typedef typename ReduceFunctor< long, DeviceType >::value_type value_type;
 
+  KOKKOS_INLINE_FUNCTION
   ReduceFunctorFinal( const size_t n )
     : ReduceFunctor< long, DeviceType >( n ) {}
 
diff --git a/packages/kokkos/core/unit_test/TestReduceDeviceView.hpp b/packages/kokkos/core/unit_test/TestReduceDeviceView.hpp
new file mode 100644
index 000000000..4f65166e3
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestReduceDeviceView.hpp
@@ -0,0 +1,131 @@
+#include<Kokkos_Core.hpp>
+
+namespace Test {
+namespace {
+
+struct TestIsAsynchFunctor {
+  Kokkos::View<double,TEST_EXECSPACE> atomic_test;
+  TestIsAsynchFunctor(Kokkos::View<double,TEST_EXECSPACE> atomic_test_):atomic_test(atomic_test_){}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int) const {
+    Kokkos::atomic_add(&atomic_test(),1.0);
+  }
+};
+
+template<class PolicyType, class ReduceFunctor>
+void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor) {
+
+     using ExecSpace = TEST_EXECSPACE;
+     
+     Kokkos::View<int64_t,TEST_EXECSPACE> result("Result");
+     Kokkos::View<double,TEST_EXECSPACE> atomic_test("Atomic");
+     int64_t reducer_result, view_result, scalar_result;
+
+     
+     Kokkos::Timer timer;     
+
+     // Establish whether execspace is asynchronous
+     Kokkos::parallel_for("Test::ReduceDeviceView::TestIsAsynch",Kokkos::RangePolicy<TEST_EXECSPACE>(0,1000000),
+       TestIsAsynchFunctor(atomic_test));
+     double time0 = timer.seconds();
+     timer.reset();
+     ExecSpace::execution_space::fence();
+     double time_fence0 = timer.seconds(); 
+     Kokkos::deep_copy(result,0);
+     timer.reset();
+     bool is_async = time0<time_fence0;
+
+     // Test Reducer 
+
+     Kokkos::parallel_reduce("Test::ReduceDeviceView::TestReducer",policy, functor, Kokkos::Sum<int64_t,TEST_EXECSPACE>(result));
+     double time1 = timer.seconds();
+     // Check whether it was asyncronous
+     timer.reset();
+     ExecSpace::execution_space::fence();
+     double time_fence1 = timer.seconds();    
+     Kokkos::deep_copy(reducer_result,result);    
+     Kokkos::deep_copy(result,0);
+     ASSERT_EQ(N,reducer_result); 
+     timer.reset();
+     
+     
+     // Test View 
+     Kokkos::parallel_reduce("Test::ReduceDeviceView::TestView",policy, functor, result);
+     double time2 = timer.seconds();
+     // Check whether it was asyncronous
+     timer.reset();
+     ExecSpace::execution_space::fence();
+     double time_fence2 = timer.seconds();    
+     Kokkos::deep_copy(view_result,result);    
+     Kokkos::deep_copy(result,0);
+     ASSERT_EQ(N,view_result); 
+     timer.reset();
+     
+     
+     // Test Scalar
+     Kokkos::parallel_reduce("Test::ReduceDeviceView::TestScalar",policy, functor, scalar_result);
+     double time3 = timer.seconds();
+
+     // Check whether it was asyncronous
+     timer.reset();
+     ExecSpace::execution_space::fence();
+     double time_fence3 = timer.seconds();
+
+     ASSERT_EQ(N,scalar_result); 
+     if(is_async) {
+       ASSERT_TRUE(time1<time_fence1);
+     }
+     if(is_async) {
+       ASSERT_TRUE(time2<time_fence2);
+       ASSERT_TRUE(time3>time_fence3);
+     }
+  }
+
+struct RangePolicyFunctor {
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int, int64_t& lsum) const {
+    lsum += 1;
+  }
+};
+
+struct MDRangePolicyFunctor {
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int, const int, const int, int64_t& lsum) const {
+    lsum += 1;
+  }
+};
+
+struct TeamPolicyFunctor {
+  int M;
+  TeamPolicyFunctor(int M_):M(M_){}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const Kokkos::TeamPolicy<TEST_EXECSPACE>::member_type& team, int64_t& lsum) const {
+    for(int i=team.team_rank(); i<M; i+=team.team_size())
+      lsum += 1;
+  }
+};
+
+} // namespace
+
+TEST_F( TEST_CATEGORY, reduce_device_view_range_policy )
+{
+  int N=1000*1024*1024;
+  test_reduce_device_view(N,Kokkos::RangePolicy<TEST_EXECSPACE>(0,N),RangePolicyFunctor());
+}
+
+TEST_F( TEST_CATEGORY, reduce_device_view_mdrange_policy )
+{
+  int N=1000*1024*1024;
+  test_reduce_device_view(N,Kokkos::MDRangePolicy<TEST_EXECSPACE,Kokkos::Rank<3>>({0,0,0},{1000,1024,1024}),MDRangePolicyFunctor());
+}
+
+TEST_F( TEST_CATEGORY, reduce_device_view_team_policy )
+{
+  int N=1000*1024*1024;
+  test_reduce_device_view(N,Kokkos::TeamPolicy<TEST_EXECSPACE>(1000*1024,Kokkos::AUTO),TeamPolicyFunctor(1024));
+}
+
+} // namespace Test
+
diff --git a/packages/kokkos/core/unit_test/TestReducers.hpp b/packages/kokkos/core/unit_test/TestReducers.hpp
index 519e3a80f..7270ea337 100644
--- a/packages/kokkos/core/unit_test/TestReducers.hpp
+++ b/packages/kokkos/core/unit_test/TestReducers.hpp
@@ -477,7 +477,7 @@ struct TestReducers {
     int reference_loc = -1;
 
     for ( int i = 0; i < N; i++ ) {
-      h_values( i ) = (Scalar) ( rand() % 100000 );
+      h_values( i ) = (Scalar) ( rand() % 100000 + 2 );
 
       if ( h_values( i ) < reference_min ) {
         reference_min = h_values( i );
@@ -485,7 +485,7 @@ struct TestReducers {
       }
       else if ( h_values( i ) == reference_min ) {
         // Make min unique.
-        h_values( i ) += std::numeric_limits< Scalar >::epsilon();
+        h_values( i ) += Scalar(1);
       }
     }
     Kokkos::deep_copy( values, h_values );
@@ -537,7 +537,7 @@ struct TestReducers {
     int reference_loc = -1;
 
     for ( int i = 0; i < N; i++ ) {
-      h_values( i ) = (Scalar) ( rand() % 100000 );
+      h_values( i ) = (Scalar) ( rand() % 100000 + 2 );
 
       if ( h_values( i ) > reference_max ) {
         reference_max = h_values( i );
@@ -545,7 +545,7 @@ struct TestReducers {
       }
       else if ( h_values( i ) == reference_max ) {
         // Make max unique.
-        h_values( i ) -= std::numeric_limits< Scalar >::epsilon();
+        h_values( i ) -= Scalar(1);
       }
     }
     Kokkos::deep_copy( values, h_values );
@@ -599,7 +599,7 @@ struct TestReducers {
      int reference_maxloc = -1;
 
      for ( int i = 0; i < N; i++ ) {
-       h_values( i ) = (Scalar) ( rand() % 100000 );
+       h_values( i ) = (Scalar) ( rand() % 100000 + 2);
      }
 
      for ( int i = 0; i < N; i++ ) {
@@ -609,7 +609,7 @@ struct TestReducers {
        }
        else if ( h_values( i ) == reference_max ) {
          // Make max unique.
-         h_values( i ) -= std::numeric_limits< Scalar >::epsilon();
+         h_values( i ) -= Scalar(1);
        }
      }
 
@@ -620,7 +620,7 @@ struct TestReducers {
        }
        else if ( h_values( i ) == reference_min ) {
          // Make min unique.
-         h_values( i ) += std::numeric_limits< Scalar >::epsilon();
+         h_values( i ) += Scalar(1);
        }
      }
 
diff --git a/packages/kokkos/core/unit_test/TestScan.hpp b/packages/kokkos/core/unit_test/TestScan.hpp
index 5700c21dc..e021ed09f 100644
--- a/packages/kokkos/core/unit_test/TestScan.hpp
+++ b/packages/kokkos/core/unit_test/TestScan.hpp
@@ -69,9 +69,9 @@ struct TestScan {
       const value_type answer = n & 1 ? ( n * ( ( n + 1 ) / 2 ) ) : ( ( n / 2 ) * ( n + 1 ) );
 
       if ( answer != update ) {
-        errors()++;
+        int fail = errors()++;
 
-        if ( errors() < 20 ) {
+        if ( fail < 20 ) {
           printf( "TestScan(%d,%ld) != %ld\n", iwork, update, answer );
         }
       }
@@ -97,6 +97,7 @@ struct TestScan {
     long long int total = 0;
     Kokkos::parallel_scan( N, *this, total );
     run_check( size_t( ( N+1 )*N/2 ), size_t( total ) );
+    check_error();
   }
 
   TestScan( const WorkSpec & Start , const WorkSpec & N )
@@ -108,6 +109,13 @@ struct TestScan {
     errors = errors_a;
     
     Kokkos::parallel_scan( exec_policy( Start , N ) , *this );
+    check_error();
+  }
+
+  void check_error() {
+    int total_errors;
+    Kokkos::deep_copy(total_errors, errors);
+    ASSERT_EQ(total_errors,0);
   }
 
   static void test_range( const WorkSpec & begin, const WorkSpec & end )
diff --git a/packages/kokkos/core/unit_test/TestTeam.hpp b/packages/kokkos/core/unit_test/TestTeam.hpp
index 2fe615a75..487a4d581 100644
--- a/packages/kokkos/core/unit_test/TestTeam.hpp
+++ b/packages/kokkos/core/unit_test/TestTeam.hpp
@@ -61,7 +61,7 @@ struct TestTeamPolicy {
 
   TestTeamPolicy( const size_t league_size )
     : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ),
-               Kokkos::TeamPolicy< ScheduleType,  ExecSpace >::team_size_max( *this ),
+               Kokkos::TeamPolicy< ScheduleType,  ExecSpace >(1,1).team_size_max( *this, Kokkos::ParallelReduceTag() ),
                league_size ) {}
 
   struct VerifyInitTag {};
@@ -113,11 +113,14 @@ struct TestTeamPolicy {
   static void test_for( const size_t league_size )
   {
     TestTeamPolicy functor( league_size );
+    typedef Kokkos::TeamPolicy< ScheduleType,  ExecSpace > policy_type;
+    typedef Kokkos::TeamPolicy< ScheduleType,  ExecSpace, VerifyInitTag > policy_type_init;
 
-    const int team_size = Kokkos::TeamPolicy< ScheduleType,  ExecSpace >::team_size_max( functor );
+    const int team_size = policy_type(league_size,1).team_size_max( functor, Kokkos::ParallelForTag() );
+    const int team_size_init = policy_type_init(league_size,1).team_size_max( functor, Kokkos::ParallelForTag() );
 
-    Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType,  ExecSpace >( league_size, team_size ), functor );
-    Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType,  ExecSpace, VerifyInitTag >( league_size, team_size ), functor );
+    Kokkos::parallel_for( policy_type( league_size, team_size ), functor );
+    Kokkos::parallel_for( policy_type_init( league_size, team_size_init ), functor );
 
     test_small_league_size();
   }
@@ -142,15 +145,19 @@ struct TestTeamPolicy {
   {
     TestTeamPolicy functor( league_size );
 
-    const int team_size = Kokkos::TeamPolicy< ScheduleType,  ExecSpace >::team_size_max( functor );
+    typedef Kokkos::TeamPolicy< ScheduleType,  ExecSpace > policy_type;
+    typedef Kokkos::TeamPolicy< ScheduleType,  ExecSpace, ReduceTag > policy_type_reduce;
+
+    const int team_size = policy_type_reduce(league_size,1).team_size_max( functor, Kokkos::ParallelReduceTag() );
+
     const long N = team_size * league_size;
 
     long total = 0;
 
-    Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size, team_size ), functor, total );
+    Kokkos::parallel_reduce( policy_type( league_size, team_size ), functor, total );
     ASSERT_EQ( size_t( ( N - 1 ) * ( N ) ) / 2, size_t( total ) );
 
-    Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace, ReduceTag >( league_size, team_size ), functor, total );
+    Kokkos::parallel_reduce( policy_type_reduce( league_size, team_size ), functor, total );
     ASSERT_EQ( ( size_t( N ) * size_t( N + 1 ) ) / 2, size_t( total ) );
   }
 };
@@ -177,8 +184,10 @@ public:
 
   const size_type nwork;
 
+  KOKKOS_INLINE_FUNCTION
   ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {}
 
+  KOKKOS_INLINE_FUNCTION
   ReduceTeamFunctor( const ReduceTeamFunctor & rhs ) : nwork( rhs.nwork ) {}
 
   KOKKOS_INLINE_FUNCTION
@@ -244,10 +253,12 @@ public:
     const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 )
                                       : ( nw / 2 ) * ( nw + 1 );
 
-    const unsigned team_size   = policy_type::team_size_recommended( functor_type( nwork ) );
+    policy_type team_exec( nw, 1 );
+
+    const unsigned team_size   = team_exec.team_size_recommended( functor_type( nwork ), Kokkos::ParallelReduceTag() );
     const unsigned league_size = ( nwork + team_size - 1 ) / team_size;
 
-    policy_type team_exec( league_size, team_size );
+    team_exec = policy_type( league_size, team_size );
 
     for ( unsigned i = 0; i < Repeat; ++i ) {
       result_type tmp( & result[i] );
@@ -370,7 +381,8 @@ public:
 
     functor_type functor;
 
-    policy_type team_exec( nteam, policy_type::team_size_max( functor ) );
+    policy_type team_exec( nteam, 1);
+    team_exec = policy_type(nteam, team_exec.team_size_max(functor, Kokkos::ParallelReduceTag()));
 
     for ( unsigned i = 0; i < Repeat; ++i ) {
       long int accum = 0;
@@ -475,7 +487,8 @@ struct TestSharedTeam {
     typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor;
     typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type;
 
-    const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() );
+    const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >(8192, 1).team_size_max( Functor(),
+        Kokkos::ParallelReduceTag() );
 
     Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size, team_size );
 
@@ -648,16 +661,20 @@ struct TestScratchTeam {
     typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged >  result_type;
     typedef Kokkos::TeamPolicy< ScheduleType,  ExecSpace > p_type;
 
-    const size_t team_size = p_type::team_size_max( Functor() );
-
-    p_type team_exec( 8192 / team_size, team_size );
 
     typename Functor::value_type error_count = 0;
 
+    int thread_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_THREAD_COUNT );
+
+    p_type team_exec = p_type(8192,1).set_scratch_size( 1, Kokkos::PerTeam( Functor::shared_int_array_type::shmem_size( Functor::SHARED_TEAM_COUNT ) ),
+                                                           Kokkos::PerThread( thread_scratch_size + 3*sizeof(int)));
+
+    const size_t team_size = team_exec.team_size_max( Functor(), Kokkos::ParallelReduceTag() );
+
     int team_scratch_size   = Functor::shared_int_array_type::shmem_size( Functor::SHARED_TEAM_COUNT ) +
                               Functor::shared_int_array_type::shmem_size( 3 * team_size );
 
-    int thread_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_THREAD_COUNT );
+    team_exec = p_type(8192 / team_size, team_size );
 
     Kokkos::parallel_reduce( team_exec.set_scratch_size( 1, Kokkos::PerTeam( team_scratch_size ),
                                                          Kokkos::PerThread( thread_scratch_size ) ),
@@ -956,7 +973,7 @@ struct TestShmemSize {
 
     size_t size = view_type::shmem_size( d1, d2, d3 );
 
-    ASSERT_EQ( size, d1 * d2 * d3 * sizeof( long ) );
+    ASSERT_EQ( size, (d1 * d2 * d3 + 1)* sizeof( long ) );
 
     test_layout_stride();
   }
@@ -973,3 +990,123 @@ struct TestShmemSize {
 };
 
 } // namespace Test
+
+/*--------------------------------------------------------------------------*/
+
+namespace Test {
+
+namespace {
+
+template< class ExecSpace, class ScheduleType >
+struct TestTeamBroadcast {
+  typedef typename Kokkos::TeamPolicy< ScheduleType,  ExecSpace >::member_type team_member;
+
+  TestTeamBroadcast( const size_t league_size ) {}
+
+  struct BroadcastTag {};
+
+  typedef long value_type;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const team_member &teamMember, value_type &update ) const
+  {
+    int lid = teamMember.league_rank();
+    int tid = teamMember.team_rank();
+    int ts  = teamMember.team_size();
+
+    value_type parUpdate = 0;
+    value_type value     = tid * 3 + 1;
+	
+    teamMember.team_broadcast(value, lid%ts); 
+
+    Kokkos::parallel_reduce( Kokkos::TeamThreadRange( teamMember, ts ), [&] ( const int j, value_type &teamUpdate ) {
+      teamUpdate += value;
+    }, parUpdate );
+
+    if ( teamMember.team_rank() == 0 ) update += parUpdate;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()( const BroadcastTag &, const team_member &teamMember, value_type &update ) const
+  {
+    int lid = teamMember.league_rank();
+    int tid = teamMember.team_rank();
+    int ts  = teamMember.team_size();
+
+    value_type parUpdate = 0;
+    value_type value     = tid * 3 + 1;
+
+    teamMember.team_broadcast([&] (value_type & var) { var*=2; }, value, lid%ts);
+    
+    Kokkos::parallel_reduce( Kokkos::TeamThreadRange( teamMember, ts ), [&] ( const int j, value_type &teamUpdate ) {
+      teamUpdate += value;
+    }, parUpdate );
+
+    if ( teamMember.team_rank() == 0 ) update += parUpdate;
+  }
+
+  static void test_teambroadcast( const size_t league_size )
+  {
+    TestTeamBroadcast functor( league_size );
+
+    typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace > policy_type;
+    typedef Kokkos::TeamPolicy< ScheduleType, ExecSpace, BroadcastTag > policy_type_f;
+
+    const int team_size = policy_type_f(league_size,1).team_size_max( functor, Kokkos::ParallelReduceTag() ); //printf("team_size=%d\n",team_size);
+
+    //team_broadcast with value
+    long total = 0;
+
+    Kokkos::parallel_reduce( policy_type( league_size, team_size ), functor, total );
+    
+    value_type expected_result = 0;
+    for (unsigned int i=0; i<league_size; i++){
+      value_type val  = ((i%team_size)*3+1)*team_size;
+      expected_result+= val;
+    }
+    ASSERT_EQ( size_t( expected_result ), size_t( total ) ); //printf("team_broadcast with value -- expected_result=%d, total=%d\n",expected_result, total);
+
+    //team_broadcast with funtion object
+    total = 0;
+
+    Kokkos::parallel_reduce( policy_type_f( league_size, team_size ), functor, total );
+
+    expected_result = 0;
+    for (unsigned int i=0; i<league_size; i++){
+      value_type val  = ((i%team_size)*3+1)*2*team_size;
+      expected_result+= val;
+    }
+    ASSERT_EQ( size_t( expected_result ), size_t( total ) ); //printf("team_broadcast with funtion object -- expected_result=%d, total=%d\n",expected_result, total);
+  }
+};
+
+template<class ExecSpace>
+struct TestScratchAlignment {
+  struct TestScalar {
+    double x,y,z;
+  };
+  TestScratchAlignment() {
+    test(true);
+    test(false);
+  }
+  typedef Kokkos::View<TestScalar*,typename ExecSpace::scratch_memory_space> ScratchView;
+  typedef Kokkos::View<int*,typename ExecSpace::scratch_memory_space> ScratchViewInt;
+  void test(bool allocate_small) {
+    int shmem_size = ScratchView::shmem_size(11);
+    if(allocate_small) shmem_size += ScratchViewInt::shmem_size(1);
+    Kokkos::parallel_for(Kokkos::TeamPolicy<ExecSpace>(1,1).set_scratch_size(0,Kokkos::PerTeam(shmem_size)),
+     KOKKOS_LAMBDA (const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) {
+     if(allocate_small) ScratchViewInt p(team.team_scratch(0),1);
+     ScratchView a(team.team_scratch(0),11);
+     if(ptrdiff_t(a.data())%sizeof(TestScalar)!=0)
+       Kokkos::abort("Error: invalid scratch view alignment\n");
+    });
+    Kokkos::fence();
+  }
+};
+
+} // namespace
+
+} // namespace Test
+
+/*--------------------------------------------------------------------------*/
diff --git a/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp
new file mode 100644
index 000000000..f9d5add5d
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestTeamTeamSize.hpp
@@ -0,0 +1,146 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cstdio>
+#include <stdexcept>
+#include <sstream>
+#include <iostream>
+
+#include <Kokkos_Core.hpp>
+
+namespace Test {
+
+namespace {
+  template<class T,int N>
+  class MyArray {
+  public:
+    T values[N];
+    KOKKOS_INLINE_FUNCTION
+    void operator+= (const MyArray& src) { for(int i=0; i<N; i++) values[i] += src.values[i]; }
+    KOKKOS_INLINE_FUNCTION
+    void operator=  (const MyArray& src) { for(int i=0; i<N; i++) values[i] =  src.values[i]; }
+    KOKKOS_INLINE_FUNCTION
+    void operator+= (const volatile MyArray& src) volatile { for(int i=0; i<N; i++) values[i] += src.values[i]; }
+    KOKKOS_INLINE_FUNCTION
+    void operator=  (const volatile MyArray& src) volatile { for(int i=0; i<N; i++) values[i] =  src.values[i]; }
+  };
+
+  template<class T,int N, class PolicyType, int S>
+  struct FunctorFor {
+    double static_array[S];
+    KOKKOS_INLINE_FUNCTION
+    void operator() (const typename PolicyType::member_type& team) const {
+    }
+  };
+  template<class T,int N, class PolicyType, int S>
+  struct FunctorReduce {
+    double static_array[S];
+    KOKKOS_INLINE_FUNCTION
+    void operator() (const typename PolicyType::member_type& team, MyArray<T,N>& lval) const {
+      for(int j=0; j<N; j++)
+        lval.values[j] += 1 + lval.values[0];
+    }
+  };
+}
+
+
+typedef Kokkos::TeamPolicy<TEST_EXECSPACE> policy_type;
+typedef Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<128,8> > policy_type_128_8;
+typedef Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<1024,2> > policy_type_1024_2;
+
+template<class T, int N, class PolicyType, int S>
+void test_team_policy_max_recommended_static_size(int scratch_size) {
+  PolicyType p = PolicyType(10000, Kokkos::AUTO, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size));
+  int team_size_max_for = p.team_size_max(FunctorFor<T,N,PolicyType,S>(),Kokkos::ParallelForTag());
+  int team_size_rec_for = p.team_size_recommended(FunctorFor<T,N,PolicyType,S>(),Kokkos::ParallelForTag());
+  int team_size_max_reduce = p.team_size_max(FunctorReduce<T,N,PolicyType,S>(),Kokkos::ParallelReduceTag());
+  int team_size_rec_reduce = p.team_size_recommended(FunctorReduce<T,N,PolicyType,S>(),Kokkos::ParallelReduceTag());
+
+  ASSERT_TRUE( team_size_max_for >= team_size_rec_for );
+  ASSERT_TRUE( team_size_max_reduce >= team_size_rec_reduce );
+  ASSERT_TRUE( team_size_max_for >= team_size_max_reduce );
+
+  Kokkos::parallel_for(PolicyType(10000, team_size_max_for, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)),
+                       FunctorFor<T,N,PolicyType,S>());
+  Kokkos::parallel_for(PolicyType(10000, team_size_rec_for, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)),
+                       FunctorFor<T,N,PolicyType,S>());
+  MyArray<T,N> val;
+  Kokkos::parallel_reduce(PolicyType(10000, team_size_max_reduce, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)),
+                       FunctorReduce<T,N,PolicyType,S>(),val);
+  Kokkos::parallel_reduce(PolicyType(10000, team_size_rec_reduce, 4).set_scratch_size(0,Kokkos::PerTeam(scratch_size)),
+                       FunctorReduce<T,N,PolicyType,S>(),val);
+  Kokkos::fence();
+}
+
+template<class T, int N, class PolicyType>
+void test_team_policy_max_recommended(int scratch_size) {
+  test_team_policy_max_recommended_static_size<T,N,PolicyType,1>(scratch_size);
+  test_team_policy_max_recommended_static_size<T,N,PolicyType,1000>(scratch_size);
+}
+
+TEST_F( TEST_CATEGORY, team_policy_max_recommended )
+{
+  int max_scratch_size = policy_type::scratch_size_max(0);
+  test_team_policy_max_recommended<double,2,policy_type>(0);
+  test_team_policy_max_recommended<double,2,policy_type>(max_scratch_size/3);
+  test_team_policy_max_recommended<double,2,policy_type>(max_scratch_size);
+  test_team_policy_max_recommended<double,2,policy_type_128_8>(0);
+  test_team_policy_max_recommended<double,2,policy_type_128_8>(max_scratch_size/3/8);
+  test_team_policy_max_recommended<double,2,policy_type_128_8>(max_scratch_size/8);
+  test_team_policy_max_recommended<double,2,policy_type_1024_2>(0);
+  test_team_policy_max_recommended<double,2,policy_type_1024_2>(max_scratch_size/3/2);
+  test_team_policy_max_recommended<double,2,policy_type_1024_2>(max_scratch_size/2);
+
+  test_team_policy_max_recommended<double,16,policy_type>(0);
+  test_team_policy_max_recommended<double,16,policy_type>(max_scratch_size/3);
+  test_team_policy_max_recommended<double,16,policy_type>(max_scratch_size);
+  test_team_policy_max_recommended<double,16,policy_type_128_8>(0);
+  test_team_policy_max_recommended<double,16,policy_type_128_8>(max_scratch_size/3/8);
+  test_team_policy_max_recommended<double,16,policy_type_128_8>(max_scratch_size/8);
+  test_team_policy_max_recommended<double,16,policy_type_1024_2>(0);
+  test_team_policy_max_recommended<double,16,policy_type_1024_2>(max_scratch_size/3/2);
+  test_team_policy_max_recommended<double,16,policy_type_1024_2>(max_scratch_size/2);
+}
+
+
+} // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestTeamVector.hpp b/packages/kokkos/core/unit_test/TestTeamVector.hpp
index 783fde600..294247a78 100644
--- a/packages/kokkos/core/unit_test/TestTeamVector.hpp
+++ b/packages/kokkos/core/unit_test/TestTeamVector.hpp
@@ -227,14 +227,13 @@ struct functor_team_for {
 
   functor_team_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
 
-  unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; }
+  typedef typename ExecutionSpace::scratch_memory_space shmem_space;
+  typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
+  unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( typename policy_type::member_type team ) const {
-    typedef typename ExecutionSpace::scratch_memory_space shmem_space;
-    typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
-    typedef typename shared_int::size_type size_type;
-
+    typedef typename shmem_space::size_type size_type;
     const size_type shmemSize = team.team_size() * 13;
     shared_int values = shared_int( team.team_shmem(), shmemSize );
 
@@ -290,7 +289,9 @@ struct functor_team_reduce {
 
   functor_team_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
 
-  unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; }
+  typedef typename ExecutionSpace::scratch_memory_space shmem_space;
+  typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
+  unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( typename policy_type::member_type team ) const {
@@ -333,7 +334,9 @@ struct functor_team_reduce_reducer {
 
   functor_team_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
 
-  unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; }
+  typedef typename ExecutionSpace::scratch_memory_space shmem_space;
+  typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
+  unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( typename policy_type::member_type team ) const {
@@ -376,12 +379,12 @@ struct functor_team_vector_for {
 
   functor_team_vector_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
 
-  unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; }
+  typedef typename ExecutionSpace::scratch_memory_space shmem_space;
+  typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
+  unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( typename policy_type::member_type team ) const {
-    typedef typename ExecutionSpace::scratch_memory_space shmem_space;
-    typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
     typedef typename shared_int::size_type size_type;
 
     const size_type shmemSize = team.team_size() * 13;
@@ -442,7 +445,9 @@ struct functor_team_vector_reduce {
   Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag;
   functor_team_vector_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
 
-  unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; }
+  typedef typename ExecutionSpace::scratch_memory_space shmem_space;
+  typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
+  unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( typename policy_type::member_type team ) const {
@@ -485,7 +490,9 @@ struct functor_team_vector_reduce_reducer {
 
   functor_team_vector_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
 
-  unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; }
+  typedef typename ExecutionSpace::scratch_memory_space shmem_space;
+  typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
+  unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( typename policy_type::member_type team ) const {
@@ -568,12 +575,12 @@ struct functor_vec_for {
 
   functor_vec_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {}
 
-  unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; }
+  typedef typename ExecutionSpace::scratch_memory_space shmem_space;
+  typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
+  unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); }
 
   KOKKOS_INLINE_FUNCTION
   void operator()( typename policy_type::member_type team ) const {
-    typedef typename ExecutionSpace::scratch_memory_space shmem_space;
-    typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int;
 
     shared_int values = shared_int( team.team_shmem(), team.team_size() * 13 );
 
@@ -739,23 +746,16 @@ bool test_scalar( int nteams, int team_size, int test ) {
                           functor_vec_red< Scalar, ExecutionSpace >( d_flag ) );
   }
   else if ( test == 1 ) {
-    // WORKAROUND ROCM/CUDA
+    // WORKAROUND CUDA
     #if defined(KOKKOS_ENABLE_CUDA)
     #if defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) || defined(KOKKOS_ARCH_PASCAL)
     if(!std::is_same<ExecutionSpace,Kokkos::Cuda>::value)
     #endif
     #endif
-    #if defined(KOKKOS_ENABLE_ROCM)
-    if(!std::is_same<ExecutionSpace,Kokkos::Experimental::ROCm>::value)
-    #endif
     Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
                           functor_vec_red_reducer< Scalar, ExecutionSpace >( d_flag ) );
   }
   else if ( test == 2 ) {
-    // WORKAROUND ROCM
-    #if defined(KOKKOS_ENABLE_ROCM)
-    if(!std::is_same<ExecutionSpace,Kokkos::Experimental::ROCm>::value)
-    #endif
     Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
                           functor_vec_scan< Scalar, ExecutionSpace >( d_flag ) );
   }
@@ -776,10 +776,6 @@ bool test_scalar( int nteams, int team_size, int test ) {
                           functor_team_reduce< Scalar, ExecutionSpace >( d_flag ) );
   }
   else if ( test == 7 ) {
-    // WORKAROUND ROCM
-    #if defined(KOKKOS_ENABLE_ROCM)
-    if(!std::is_same<ExecutionSpace,Kokkos::Experimental::ROCm>::value)
-    #endif
     Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ),
                           functor_team_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) );
   }
@@ -792,10 +788,6 @@ bool test_scalar( int nteams, int team_size, int test ) {
                           functor_team_vector_reduce< Scalar, ExecutionSpace >( d_flag ) );
   }
   else if ( test == 10 ) {
-    // WORKAROUND ROCM
-    #if defined(KOKKOS_ENABLE_ROCM)
-    if(!std::is_same<ExecutionSpace,Kokkos::Experimental::ROCm>::value)
-    #endif
     Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ),
                           functor_team_vector_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) );
   }
@@ -955,28 +947,22 @@ TEST_F( TEST_CATEGORY, triple_nested_parallelism )
 // With KOKKOS_DEBUG enabled, the functor uses too many registers to run
 // with a team size of 32 on GPUs, 16 is the max possible (at least on a K80 GPU)
 // See https://github.com/kokkos/kokkos/issues/1513
-#if defined(KOKKOS_DEBUG) && defined(KOKKOS_ENABLE_CUDA)
+#if defined(KOKKOS_ENABLE_DEBUG) && defined(KOKKOS_ENABLE_CUDA)
   if (!std::is_same<TEST_EXECSPACE, Kokkos::Cuda>::value) {
 #endif
 #ifdef KOKKOS_ENABLE_ROCM // ROCm doesn't support TeamSize 32x32
-  if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::ROCm>::value) {
+  if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::ROCm>::value)
 #endif
   TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 32, 32 );
-#ifdef KOKKOS_ENABLE_ROCM
-  }
-#endif
   TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 32, 16 );
-#if defined(KOKKOS_DEBUG) && defined(KOKKOS_ENABLE_CUDA)
+#if defined(KOKKOS_ENABLE_DEBUG) && defined(KOKKOS_ENABLE_CUDA)
   }
 #endif
   TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 16 );
 #ifdef KOKKOS_ENABLE_ROCM // ROCm doesn't support team sizes not powers of two
-  if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::ROCm>::value) {
+  if (!std::is_same<TEST_EXECSPACE, Kokkos::Experimental::ROCm>::value)
 #endif
   TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 7, 16 );
-#ifdef KOKKOS_ENABLE_ROCM
-  }
-#endif
 }
 #endif
 
diff --git a/packages/kokkos/core/unit_test/TestViewAPI_a.hpp b/packages/kokkos/core/unit_test/TestViewAPI_a.hpp
index efc9ab27b..ba74331c5 100644
--- a/packages/kokkos/core/unit_test/TestViewAPI_a.hpp
+++ b/packages/kokkos/core/unit_test/TestViewAPI_a.hpp
@@ -45,11 +45,9 @@
 
 namespace Test {
 
-#if !defined(KOKKOS_ENABLE_ROCM)
 TEST_F( TEST_CATEGORY, view_api_a )
 {
   TestViewAPI< double, TEST_EXECSPACE >::run_test();
 }
-#endif
 
 }
diff --git a/packages/kokkos/core/unit_test/TestViewAPI_b.hpp b/packages/kokkos/core/unit_test/TestViewAPI_b.hpp
index e006dd9b6..03b41db51 100644
--- a/packages/kokkos/core/unit_test/TestViewAPI_b.hpp
+++ b/packages/kokkos/core/unit_test/TestViewAPI_b.hpp
@@ -45,13 +45,11 @@
 
 namespace Test {
 
-#if !defined(KOKKOS_ENABLE_ROCM)
 TEST_F( TEST_CATEGORY, view_api_b )
 {
   TestViewAPI< double, TEST_EXECSPACE >::run_test_view_operator_a();
   TestViewAPI< double, TEST_EXECSPACE >::run_test_mirror();
   TestViewAPI< double, TEST_EXECSPACE >::run_test_scalar();
 }
-#endif
 
 }
diff --git a/packages/kokkos/core/unit_test/TestViewAPI_c.hpp b/packages/kokkos/core/unit_test/TestViewAPI_c.hpp
index a0f03ff18..d34ae6340 100644
--- a/packages/kokkos/core/unit_test/TestViewAPI_c.hpp
+++ b/packages/kokkos/core/unit_test/TestViewAPI_c.hpp
@@ -45,12 +45,10 @@
 
 namespace Test {
 
-#if !defined(KOKKOS_ENABLE_ROCM)
 TEST_F( TEST_CATEGORY, view_api_c )
 {
   TestViewAPI< double, TEST_EXECSPACE >::run_test_deep_copy_empty();
   TestViewAPI< double, TEST_EXECSPACE >::run_test_view_operator_b();
 }
-#endif
 
 }
diff --git a/packages/kokkos/core/unit_test/TestViewAPI_d.hpp b/packages/kokkos/core/unit_test/TestViewAPI_d.hpp
index b984df98b..38e10381f 100644
--- a/packages/kokkos/core/unit_test/TestViewAPI_d.hpp
+++ b/packages/kokkos/core/unit_test/TestViewAPI_d.hpp
@@ -45,7 +45,6 @@
 
 namespace Test {
 
-#if !defined(KOKKOS_ENABLE_ROCM)
 TEST_F( TEST_CATEGORY, view_api_d )
 {
   TestViewAPI< double, TEST_EXECSPACE >::run_test_const();
@@ -54,6 +53,5 @@ TEST_F( TEST_CATEGORY, view_api_d )
   TestViewAPI< double, TEST_EXECSPACE >::run_test_vector();
   TestViewAPI< double, TEST_EXECSPACE >::run_test_view_operator_c();
 }
-#endif
 
 }
diff --git a/packages/kokkos/core/unit_test/TestViewCopy.hpp b/packages/kokkos/core/unit_test/TestViewCopy.hpp
new file mode 100644
index 000000000..7eab9daa1
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestViewCopy.hpp
@@ -0,0 +1,155 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cstdio>
+
+#include <gtest/gtest.h>
+
+#include <Kokkos_Core.hpp>
+
+namespace Test {
+
+namespace {
+
+template < typename ExecSpace >
+struct TestViewCopy {
+
+  using InExecSpace = ExecSpace;
+
+  static void test_view_copy()
+  {
+#if defined( KOKKOS_ENABLE_CUDA ) || defined( KOKKOS_ENABLE_ROCM )
+   // ExecSpace = CudaUVM, CudaHostPinned
+   // This test will fail at runtime with an illegal memory access if something goes wrong
+   // Test 1: deep_copy from host_mirror_space to ExecSpace and ExecSpace back to host_mirror_space
+   {
+    const int dim0 = 4;
+    const int dim1 = 2;
+    const int dim2 = 3;
+
+    typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
+    Rank4ViewType view_4;
+    view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
+
+    typedef typename Kokkos::Impl::is_space<InExecSpace>::host_mirror_space::execution_space host_space_type;
+    Kokkos::View<double**,Kokkos::LayoutLeft,host_space_type> srcView("srcView", dim2, dim2);
+
+    // Strided dst view
+    auto dstView = Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL());
+
+    // host_mirror_space to ExecSpace
+    Kokkos::deep_copy( dstView, srcView );
+    Kokkos::fence();
+
+    // ExecSpace to host_mirror_space 
+    Kokkos::deep_copy( srcView, dstView );
+    Kokkos::fence();
+   }
+
+   // Test 2: deep_copy from Cuda to ExecSpace and ExecSpace back to Cuda
+   {
+    const int dim0 = 4;
+    const int dim1 = 2;
+    const int dim2 = 3;
+
+    typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
+    Rank4ViewType view_4;
+    view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
+
+#if defined( KOKKOS_ENABLE_CUDA )
+    typedef Kokkos::Cuda space_type;
+#endif
+#if defined( KOKKOS_ENABLE_ROCM )
+    typedef Kokkos::Experimental::ROCm space_type;
+#endif
+    Kokkos::View<double**,Kokkos::LayoutLeft,space_type> srcView("srcView", dim2, dim2);
+
+    // Strided dst view
+    auto dstView = Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL());
+
+    // Cuda to ExecSpace
+    Kokkos::deep_copy( dstView, srcView );
+    Kokkos::fence();
+
+    // ExecSpace to Cuda
+    Kokkos::deep_copy( srcView, dstView );
+    Kokkos::fence();
+   }
+
+   // Test 3: deep_copy from host_space to ExecSpace and ExecSpace back to host_space
+   {
+    const int dim0 = 4;
+    const int dim1 = 2;
+    const int dim2 = 3;
+
+    typedef Kokkos::View<double****,InExecSpace> Rank4ViewType;
+    Rank4ViewType view_4;
+    view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2);
+
+    typedef Kokkos::HostSpace host_space_type;
+    Kokkos::View<double**,Kokkos::LayoutLeft,host_space_type> srcView("srcView", dim2, dim2);
+
+    // Strided dst view
+    auto dstView = Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL());
+
+    // host_space to ExecSpace
+    Kokkos::deep_copy( dstView, srcView );
+    Kokkos::fence();
+
+    // ExecSpace to host_space 
+    Kokkos::deep_copy( srcView, dstView );
+    Kokkos::fence();
+   }
+#endif
+  } // end test_view_copy
+
+}; // end struct
+
+} // namespace
+
+TEST_F( TEST_CATEGORY , view_copy_tests ) {
+  //Only include this file to be compiled with CudaUVM and CudaHostPinned
+  TestViewCopy< TEST_EXECSPACE >::test_view_copy();
+}
+
+} // namespace Test
diff --git a/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp
new file mode 100644
index 000000000..3185fa547
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestViewLayoutStrideAssignment.hpp
@@ -0,0 +1,740 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+
+#include <stdexcept>
+#include <sstream>
+#include <iostream>
+#include <time.h>
+
+#include <Kokkos_Core.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY , view_layoutstride_left_to_layoutleft_assignment )
+{
+  typedef TEST_EXECSPACE exec_space ;
+
+  auto t = time(0);
+  srand(t);// Use current time as seed for random generator
+  printf("view_layoutstride_left_to_layoutleft_assignment: srand(%lu)\n",size_t(t));
+
+  { // Assignment of rank-1 LayoutLeft = LayoutStride
+    int ndims = 1;
+    int dims [] = {10};
+    int order [] = {0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double*, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double*, Kokkos::LayoutLeft, exec_space > dst = src;
+
+    Kokkos::View< double*, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-2 LayoutLeft = LayoutStride
+    int ndims = 2;
+    int dims [] = {10,9};
+    int order [] = {0,1};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double**, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);  
+  
+    Kokkos::View< double**, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double**, Kokkos::LayoutLeft, exec_space > dst = src;
+
+    Kokkos::View< double**, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-3 LayoutLeft = LayoutStride
+    int ndims = 3;
+    int dims [] = {10,9,8};
+    int order [] = {0,1,2};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double***, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double***, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double***, Kokkos::LayoutLeft, exec_space > dst = src;
+
+    Kokkos::View< double***, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-4 LayoutLeft = LayoutStride
+    int ndims = 4;
+    int dims [] = {10,9,8,7};
+    int order [] = {0,1,2,3};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double****, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double****, Kokkos::LayoutLeft, exec_space > dst = src;
+
+    Kokkos::View< double****, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-5 LayoutLeft = LayoutStride
+    int ndims = 5;
+    int dims [] = {10,9,8,7,6};
+    int order [] = {0,1,2,3,4};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double*****, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double*****, Kokkos::LayoutLeft, exec_space > dst = src;
+
+    Kokkos::View< double*****, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-6 LayoutLeft = LayoutStride
+    int ndims = 6;
+    int dims [] = {10,9,8,7,6,5};
+    int order [] = {0,1,2,3,4,5};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double******, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double******, Kokkos::LayoutLeft, exec_space > dst = src;
+
+    Kokkos::View< double******, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-7 LayoutLeft = LayoutStride
+    int ndims = 7;
+    int dims [] = {10,9,8,7,6,5,4};
+    int order [] = {0,1,2,3,4,5,6};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double*******, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double*******, Kokkos::LayoutLeft, exec_space > dst = src;
+
+    Kokkos::View< double*******, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-8 LayoutLeft = LayoutStride
+    int ndims = 8;
+    int dims [] = {10,9,8,7,6,5,4,3};
+    int order [] = {0,1,2,3,4,5,6,7};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double********, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double********, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double********, Kokkos::LayoutLeft, exec_space > dst = src;
+
+    Kokkos::View< double********, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+
+}
+
+TEST_F( TEST_CATEGORY , view_layoutstride_right_to_layoutright_assignment )
+{
+  typedef TEST_EXECSPACE exec_space ;
+
+  auto t = time(0);
+  srand(t);// Use current time as seed for random generator
+  printf("view_layoutstride_right_to_layoutright_assignment: srand(%lu)\n",size_t(t));
+
+  { // Assignment of rank-1 LayoutRight = LayoutStride
+    int ndims = 1;
+    int dims [] = {10};
+    int order [] = {0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double*, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double*, Kokkos::LayoutRight, exec_space > dst = src;
+
+    Kokkos::View< double*, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-2 LayoutRight = LayoutStride
+    int ndims = 2;
+    int dims [] = {10,9};
+    int order [] = {1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double**, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double**, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double**, Kokkos::LayoutRight, exec_space > dst = src;
+
+    Kokkos::View< double**, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-3 LayoutRight = LayoutStride
+    int ndims = 3;
+    int dims [] = {10,9,8};
+    int order [] = {2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double***, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double***, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double***, Kokkos::LayoutRight, exec_space > dst = src;
+
+    Kokkos::View< double***, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-4 LayoutRight = LayoutStride
+    int ndims = 4;
+    int dims [] = {10,9,8,7};
+    int order [] = {3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double****, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double****, Kokkos::LayoutRight, exec_space > dst = src;
+
+    Kokkos::View< double****, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-5 LayoutRight = LayoutStride
+    int ndims = 5;
+    int dims [] = {10,9,8,7,6};
+    int order [] = {4,3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double*****, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double*****, Kokkos::LayoutRight, exec_space > dst = src;
+
+    Kokkos::View< double*****, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-6 LayoutRight = LayoutStride
+    int ndims = 6;
+    int dims [] = {10,9,8,7,6,5};
+    int order [] = {5,4,3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double******, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double******, Kokkos::LayoutRight, exec_space > dst = src;
+
+    Kokkos::View< double******, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-7 LayoutRight = LayoutStride
+    int ndims = 7;
+    int dims [] = {10,9,8,7,6,5,4};
+    int order [] = {6,5,4,3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double*******, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double*******, Kokkos::LayoutRight, exec_space > dst = src;
+
+    Kokkos::View< double*******, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-8 LayoutRight = LayoutStride
+    int ndims = 8;
+    int dims [] = {10,9,8,7,6,5,4,3};
+    int order [] = {7,6,5,4,3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double********, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double********, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double********, Kokkos::LayoutRight, exec_space > dst = src;
+
+    Kokkos::View< double********, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+
+}
+
+TEST_F( TEST_CATEGORY , view_layoutstride_right_to_layoutleft_assignment )
+{
+  typedef TEST_EXECSPACE exec_space ;
+
+  auto t = time(0);
+  srand(t);// Use current time as seed for random generator
+  printf("view_layoutstride_right_to_layoutleft_assignment: srand(%lu)\n",size_t(t));
+
+  { // Assignment of rank-1 LayoutLeft = LayoutStride (LayoutRight compatible)
+    int ndims = 1;
+    int dims [] = {10};
+    int order [] = {0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double*, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double*, Kokkos::LayoutLeft, exec_space > dst;
+    
+    dst = src;
+
+    Kokkos::View< double*, Kokkos::LayoutLeft, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-2 LayoutLeft = LayoutStride (LayoutRight compatible)
+    int ndims = 2;
+    int dims [] = {10,9};
+    int order [] = {1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double**, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double**, Kokkos::LayoutLeft, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-3 LayoutLeft = LayoutStride (LayoutRight compatible)
+    int ndims = 3;
+    int dims [] = {10,9,8};
+    int order [] = {2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double***, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double***, Kokkos::LayoutLeft, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-4 LayoutLeft = LayoutStride (LayoutRight compatible)
+    int ndims = 4;
+    int dims [] = {10,9,8,7};
+    int order [] = {3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double****, Kokkos::LayoutLeft, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-5 LayoutLeft = LayoutStride (LayoutRight compatible)
+    int ndims = 5;
+    int dims [] = {10,9,8,7,6};
+    int order [] = {4,3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double*****, Kokkos::LayoutLeft, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-6 LayoutLeft = LayoutStride (LayoutRight compatible)
+    int ndims = 6;
+    int dims [] = {10,9,8,7,6,5};
+    int order [] = {5,4,3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double******, Kokkos::LayoutLeft, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-7 LayoutLeft = LayoutStride (LayoutRight compatible)
+    int ndims = 7;
+    int dims [] = {10,9,8,7,6,5,4};
+    int order [] = {6,5,4,3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double*******, Kokkos::LayoutLeft, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-8 LayoutLeft = LayoutStride (LayoutRight compatible)
+    int ndims = 8;
+    int dims [] = {10,9,8,7,6,5,4,3};
+    int order [] = {7,6,5,4,3,2,1,0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double********, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double********, Kokkos::LayoutLeft, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+
+}
+
+TEST_F( TEST_CATEGORY , view_layoutstride_left_to_layoutright_assignment )
+{
+  typedef TEST_EXECSPACE exec_space ;
+
+  auto t = time(0);
+  srand(t);// Use current time as seed for random generator
+  printf("view_layoutstride_left_to_layoutright_assignment: srand(%lu)\n",size_t(t));
+
+  { // Assignment of rank-1 LayoutRight = LayoutStride (LayoutLeft compatible)
+    int ndims = 1;
+    int dims [] = {10};
+    int order [] = {0};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double*, Kokkos::LayoutStride, exec_space >::HostMirror h_src = Kokkos::create_mirror_view( src );
+
+    for(size_t i=0; i<src.span(); i++) h_src.data()[i] = (double)rand() / RAND_MAX * (100);
+
+    Kokkos::deep_copy( src, h_src );
+
+    Kokkos::View< double*, Kokkos::LayoutRight, exec_space > dst;
+    
+    dst = src;
+
+    Kokkos::View< double*, Kokkos::LayoutRight, exec_space >::HostMirror h_dst = Kokkos::create_mirror_view( dst );
+
+    Kokkos::deep_copy( h_dst, dst );
+
+    bool test = true;
+    for(size_t i=0; i<src.span();i++){
+      if (h_src.data()[i]!=h_dst.data()[i]) {test = false;break;}
+    }
+    ASSERT_EQ( dst.span(), src.span() );
+    ASSERT_EQ( test, true );
+  }
+  { // Assignment of rank-2 LayoutRight = LayoutStride (LayoutLeft compatible)
+    int ndims = 2;
+    int dims [] = {10,9};
+    int order [] = {0,1};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double**, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double**, Kokkos::LayoutRight, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-3 LayoutRight = LayoutStride (LayoutLeft compatible)
+    int ndims = 3;
+    int dims [] = {10,9,8};
+    int order [] = {0,1,2};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double***, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double***, Kokkos::LayoutRight, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-4 LayoutRight = LayoutStride (LayoutLeft compatible)
+    int ndims = 4;
+    int dims [] = {10,9,8,7};
+    int order [] = {0,1,2,3};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double****, Kokkos::LayoutRight, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-5 LayoutRight = LayoutStride (LayoutLeft compatible)
+    int ndims = 5;
+    int dims [] = {10,9,8,7,6};
+    int order [] = {0,1,2,3,4};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*****, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double*****, Kokkos::LayoutRight, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-6 LayoutRight = LayoutStride (LayoutLeft compatible)
+    int ndims = 6;
+    int dims [] = {10,9,8,7,6,5};
+    int order [] = {0,1,2,3,4,5};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+	
+    Kokkos::View< double******, Kokkos::LayoutRight, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-7 LayoutRight = LayoutStride (LayoutLeft compatible)
+    int ndims = 7;
+    int dims [] = {10,9,8,7,6,5,4};
+    int order [] = {0,1,2,3,4,5,6};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double*******, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double*******, Kokkos::LayoutRight, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+  { // Assignment of rank-8 LayoutRight = LayoutStride (LayoutLeft compatible)
+    int ndims = 8;
+    int dims [] = {10,9,8,7,6,5,4,3};
+    int order [] = {0,1,2,3,4,5,6,7};
+    Kokkos::LayoutStride layout = Kokkos::LayoutStride::order_dimensions(ndims, order, dims);
+    Kokkos::View< double********, Kokkos::LayoutStride, exec_space > src("LayoutStride", layout);
+
+    Kokkos::View< double********, Kokkos::LayoutRight, exec_space > dst;
+
+    ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+    ASSERT_DEATH({dst=src;}, "View assignment must have compatible layouts");
+  }
+
+}
+
+}
+
diff --git a/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp b/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp
new file mode 100644
index 000000000..e765e8b06
--- /dev/null
+++ b/packages/kokkos/core/unit_test/TestViewLayoutTiled.hpp
@@ -0,0 +1,1215 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cstdio>
+
+#include <gtest/gtest.h>
+
+#include <Kokkos_Core.hpp>
+#include <impl/Kokkos_ViewLayoutTiled.hpp>
+
+#include <type_traits>
+#include <typeinfo>
+
+namespace Test {
+
+#ifndef KOKKOS_ENABLE_DEPRECATED_CODE
+namespace {
+
+template <typename ExecSpace >
+struct TestViewLayoutTiled {
+
+  typedef double Scalar;
+
+  static constexpr int T0 = 2;
+  static constexpr int T1 = 4;
+  static constexpr int T2 = 4;
+  static constexpr int T3 = 2;
+  static constexpr int T4 = 2;
+  static constexpr int T5 = 2;
+  static constexpr int T6 = 2;
+  static constexpr int T7 = 2;
+
+  // Rank 2
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1>   LayoutLL_2D_2x4;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1>  LayoutRL_2D_2x4;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1>  LayoutLR_2D_2x4;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1> LayoutRR_2D_2x4;
+
+  // Rank 3
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1, T2>   LayoutLL_3D_2x4x4;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1, T2>  LayoutRL_3D_2x4x4;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1, T2>  LayoutLR_3D_2x4x4;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2> LayoutRR_3D_2x4x4;
+
+  // Rank 4
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Left, T0, T1, T2, T3>   LayoutLL_4D_2x4x4x2;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Left, T0, T1, T2, T3>  LayoutRL_4D_2x4x4x2;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Left, Kokkos::Iterate::Right, T0, T1, T2, T3>  LayoutLR_4D_2x4x4x2;
+  typedef Kokkos::Experimental::LayoutTiled<Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2, T3> LayoutRR_4D_2x4x4x2;
+
+
+  static void test_view_layout_tiled_2d( const int N0, const int N1 )
+  {
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
+    const int FT = T0*T1;
+
+    const int NT0 = int( std::ceil( N0 / T0 ) );
+    const int NT1 = int( std::ceil( N1 / T1 ) );
+    // Test create_mirror_view, deep_copy
+    // Create LL View
+    {
+      typedef typename Kokkos::View< Scalar**, LayoutLL_2D_2x4, ExecSpace > ViewType;
+      ViewType v("v", N0, N1);
+
+      typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
+
+      // Initialize host-view
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          hv(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i + j*T0 );
+        } }
+      } }
+
+      // copy to device
+      Kokkos::deep_copy(v, hv);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 2 LL", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int ti, const int tj) {
+          for ( int j = 0; j < T1; ++j ) {
+          for ( int i = 0; i < T0; ++i ) {
+            if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
+          } }
+        });
+
+      Kokkos::deep_copy(hv, v);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
+          if ( tile_subview(i,j) != (( ti + tj*NT0 )*FT + ( i + j*T0 ) + 1 )) { ++counter_inc; }
+        } }
+      } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    }
+
+    // Create RL View
+    {
+      typedef typename Kokkos::View< Scalar**, LayoutRL_2D_2x4, ExecSpace > ViewType;
+      Kokkos::View< Scalar**, LayoutRL_2D_2x4, ExecSpace > v("v", N0, N1);
+
+      typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
+
+      // Initialize host-view
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          hv(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i + j*T0 );
+        } }
+      } }
+
+      // copy to device
+      Kokkos::deep_copy(v, hv);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 2 RL", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int ti, const int tj) {
+          for ( int j = 0; j < T1; ++j ) {
+          for ( int i = 0; i < T0; ++i ) {
+            if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
+          } }
+        });
+
+      Kokkos::deep_copy(hv, v);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+        auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
+          if ( tile_subview(i,j) != (( ti*NT1 + tj )*FT + ( i + j*T0 ) + 1 )) { ++counter_inc; }
+        } }
+      } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+
+    // Create LR View
+    {
+      typedef typename Kokkos::View< Scalar**, LayoutLR_2D_2x4, ExecSpace > ViewType;
+      Kokkos::View< Scalar**, LayoutLR_2D_2x4, ExecSpace > v("v", N0, N1);
+
+      typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
+
+      // Initialize host-view
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+          hv(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i*T1 + j );
+        } }
+      } }
+
+      // copy to device
+      Kokkos::deep_copy(v, hv);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int ti, const int tj) {
+          for ( int j = 0; j < T1; ++j ) {
+          for ( int i = 0; i < T0; ++i ) {
+            if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
+          } }
+        });
+
+      Kokkos::deep_copy(hv, v);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+          if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
+          if ( tile_subview(i,j) != ( ( ti + tj*NT0 )*FT + ( i*T1 + j ) + 1 ) ) { ++counter_inc; }
+        } }
+      } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+
+    // Create RR View
+    {
+      typedef typename Kokkos::View< Scalar**, LayoutRR_2D_2x4, ExecSpace > ViewType;
+      Kokkos::View< Scalar**, LayoutRR_2D_2x4, ExecSpace > v("v", N0, N1);
+
+      typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v);
+
+      // Initialize host-view
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+          hv(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i*T1 + j );
+        } }
+      } }
+
+      // copy to device
+      Kokkos::deep_copy(v, hv);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0}, {NT0, NT1}, {T0,T1} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 2 LR", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int ti, const int tj) {
+          for ( int j = 0; j < T1; ++j ) {
+          for ( int i = 0; i < T0; ++i ) {
+            if ( (ti*T0 + i < N0) && (tj*T1 + j < N1) ) { v(ti*T0 + i, tj*T1+j) += 1; }
+          } }
+        });
+
+      Kokkos::deep_copy(hv, v);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+        auto tile_subview = Kokkos::tile_subview( hv, ti, tj );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+          if ( tile_subview(i,j) != hv(ti*T0+i, tj*T1+j) ) { ++counter_subview; }
+          if ( tile_subview(i,j) != ( ( ti*NT1 + tj )*FT + ( i*T1 + j ) + 1 ) ) { ++counter_inc; }
+        } }
+      } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+#endif
+#endif
+  } // end test_view_layout_tiled_2d
+
+
+  static void test_view_layout_tiled_3d( const int N0, const int N1, const int N2 )
+  {
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
+
+    const int FT = T0*T1*T2;
+
+    const int NT0 = int( std::ceil( N0 / T0 ) );
+    const int NT1 = int( std::ceil( N1 / T1 ) );
+    const int NT2 = int( std::ceil( N2 / T2 ) );
+
+    // Create LL View
+    {
+      typedef Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, ExecSpace > ViewType;
+      Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
+
+      typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
+
+      // Initialize on host
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 );
+        } } }
+      } } }
+
+      // copy to device
+      Kokkos::deep_copy(dv, v);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 3 LL", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int i, const int j, const int k) {
+          dv(i,j,k) += 1;
+        });
+
+      Kokkos::deep_copy(v, dv);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
+          if ( tile_subview(i,j,k) != ( ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ) +  1 ) ) { ++counter_inc; }
+        } } }
+      } } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+
+    // Create RL View
+    {
+      typedef Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, ExecSpace > ViewType;
+      Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
+
+      typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
+
+      // Initialize on host
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 );
+        } } }
+      } } }
+
+      // copy to device
+      Kokkos::deep_copy(dv, v);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 3 RL", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int i, const int j, const int k) {
+          dv(i,j,k) += 1;
+        });
+
+      Kokkos::deep_copy(v, dv);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
+          if ( tile_subview(i,j,k) != ( ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ) + 1 ) ) { ++counter_inc; }
+        } } }
+      } } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+
+    // Create LR View
+    {
+      typedef Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, ExecSpace > ViewType;
+      Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
+
+      typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
+
+      // Initialize on host
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k );
+        } } }
+      } } }
+
+      // copy to device
+      Kokkos::deep_copy(dv, v);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 3 LR", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int i, const int j, const int k) {
+          dv(i,j,k) += 1;
+        });
+
+      Kokkos::deep_copy(v, dv);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+          if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
+          if ( tile_subview(i,j,k) != ( ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ) + 1 ) ) { ++counter_inc; }
+        } } }
+      } } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+
+    // Create RR View
+    {
+      typedef Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, ExecSpace > ViewType;
+      Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, ExecSpace > dv("dv", N0, N1, N2);
+
+      typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
+
+      // Initialize on host
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k );
+        } } }
+      } } }
+
+      // copy to device
+      Kokkos::deep_copy(dv, v);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0}, {N0,N1,N2}, {T0,T1,T2} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 3 RR", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int i, const int j, const int k) {
+          dv(i,j,k) += 1;
+        });
+
+      Kokkos::deep_copy(v, dv);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+          if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter_subview; }
+          if ( tile_subview(i,j,k) != ( ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ) + 1 ) ) { ++counter_inc; }
+        } } }
+      } } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+#endif
+#endif
+  } // end test_view_layout_tiled_3d
+
+
+  static void test_view_layout_tiled_4d( const int N0, const int N1, const int N2, const int N3 )
+  {
+#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
+#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
+    const int FT = T0*T1*T2*T3;
+
+    const int NT0 = int( std::ceil( N0 / T0 ) );
+    const int NT1 = int( std::ceil( N1 / T1 ) );
+    const int NT2 = int( std::ceil( N2 / T2 ) );
+    const int NT3 = int( std::ceil( N3 / T3 ) );
+
+    // Create LL View
+    {
+      typedef Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace > ViewType;
+      Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
+
+      typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
+
+      // Initialize on host
+      for ( int tl = 0; tl < NT3; ++tl ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int l = 0; l < T3; ++l ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
+        } } } }
+      } } } }
+
+      // copy to device
+      Kokkos::deep_copy(dv, v);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 4 LL", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
+          dv(i,j,k,l) += 1;
+        });
+
+      Kokkos::deep_copy(v, dv);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int tl = 0; tl < NT3; ++tl ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
+        for ( int l = 0; l < T3; ++l ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
+          if ( tile_subview(i,j,k,l) != ( ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) + 1 ) ) { ++counter_inc; }
+        } } } }
+      } } } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+
+    // Create RL View
+    {
+      typedef Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace > ViewType;
+      Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
+
+      typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
+
+      // Initialize on host
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tl = 0; tl < NT3; ++tl ) {
+        for ( int l = 0; l < T3; ++l ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
+        } } } }
+      } } } }
+
+      // copy to device
+      Kokkos::deep_copy(dv, v);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 4 RL", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
+          dv(i,j,k,l) += 1;
+        });
+
+      Kokkos::deep_copy(v, dv);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tl = 0; tl < NT3; ++tl ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
+        for ( int l = 0; l < T3; ++l ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
+          if ( tile_subview(i,j,k,l) != ( ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) + 1 ) ) { ++counter_inc; }
+        } } } }
+      } } } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+
+    // Create LR View
+    {
+      typedef Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace > ViewType;
+      Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
+
+      typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
+
+      // Initialize on host
+      for ( int tl = 0; tl < NT3; ++tl ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int l = 0; l < T3; ++l ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
+        } } } }
+      } } } }
+
+      // copy to device
+      Kokkos::deep_copy(dv, v);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 4 LR", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
+          dv(i,j,k,l) += 1;
+        });
+
+      Kokkos::deep_copy(v, dv);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+      for ( int tl = 0; tl < NT3; ++tl ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int l = 0; l < T3; ++l ) {
+          if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
+          if ( tile_subview(i,j,k,l) != ( ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) + 1 ) ) { ++counter_inc; }
+        } } } }
+      } } } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+
+    // Create RR View
+    {
+      typedef Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace > ViewType;
+      Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, ExecSpace > dv("dv", N0, N1, N2, N3);
+
+      typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv);
+
+      // Initialize on host
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tl = 0; tl < NT3; ++tl ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int l = 0; l < T3; ++l ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
+        } } } }
+      } } } }
+
+      // copy to device
+      Kokkos::deep_copy(dv, v);
+
+      Kokkos::MDRangePolicy< Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, ExecSpace > mdrangepolicy( {0,0,0,0}, {N0,N1,N2,N3}, {T0,T1,T2,T3} );
+
+      // iterate by tile
+      Kokkos::parallel_for( "ViewTile rank 4 RR", mdrangepolicy, 
+        KOKKOS_LAMBDA (const int i, const int j, const int k, const int l) {
+          dv(i,j,k,l) += 1;
+        });
+
+      Kokkos::deep_copy(v, dv);
+
+      long counter_subview = 0;
+      long counter_inc = 0;
+
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tl = 0; tl < NT3; ++tl ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int l = 0; l < T3; ++l ) {
+          if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter_subview; }
+          if ( tile_subview(i,j,k,l) != ( ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) + 1 ) ) { ++counter_inc; }
+        } } } }
+      } } } }
+      ASSERT_EQ(counter_subview, long(0));
+      ASSERT_EQ(counter_inc, long(0));
+    } // end scope
+#endif
+#endif
+  } // end test_view_layout_tiled_4d
+
+
+  static void test_view_layout_tiled_subtile_2d( const int N0, const int N1 )
+  {
+    const int FT = T0*T1;
+
+    const int NT0 = int( std::ceil( N0 / T0 ) );
+    const int NT1 = int( std::ceil( N1 / T1 ) );
+
+    // Counter to check for errors at the end
+    long counter[4] = {0};
+
+    // Create LL View
+    {
+      Kokkos::View< Scalar**, LayoutLL_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i + j*T0 );
+        } }
+      } }
+
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj );
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[0]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
+          std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << "  v = " << v(ti*T0 + i, tj*T1+j) << "  flat idx = " << ( ti + tj*NT0 )*FT + ( i + j*T0 ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
+#endif
+        } }
+      } }
+    } // end scope
+
+    // Create RL View
+    {
+      Kokkos::View< Scalar**, LayoutRL_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i + j*T0 );
+        } }
+      } }
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj );
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[1]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
+          std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << "  v = " << v(ti*T0 + i, tj*T1+j) << "  flat idx = " << ( ti*NT1 + tj )*FT + ( i + j*T0 ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
+#endif
+        } }
+      } }
+    } // end scope
+
+    // Create LR View
+    {
+      Kokkos::View< Scalar**, LayoutLR_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+          v(ti*T0 + i, tj*T1+j) = ( ti + tj*NT0 )*FT + ( i*T1 + j );
+        } }
+      } }
+
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+          if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[2]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
+          std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << "  v = " << v(ti*T0 + i, tj*T1+j) << "  flat idx = " << ( ti + tj*NT0 )*FT + ( i*T1 + j ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
+#endif
+        } }
+      } }
+    } // end scope
+
+    // Create RR View
+    {
+      Kokkos::View< Scalar**, LayoutRR_2D_2x4, Kokkos::HostSpace > v("v", N0, N1);
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+          v(ti*T0 + i, tj*T1+j) = ( ti*NT1 + tj )*FT + ( i*T1 + j );
+        } }
+      } }
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+          if ( tile_subview(i,j) != v(ti*T0+i, tj*T1+j) ) { ++counter[3]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1 = " << ti*T0 + i << "," << tj*T1 + j << std::endl;
+          std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," << j << "  v = " << v(ti*T0 + i, tj*T1+j) << "  flat idx = " << ( ti*NT1 + tj )*FT + ( i*T1 + j ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j) << std::endl;
+          std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
+#endif
+        } }
+      } }
+    } // end scope
+
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+    std::cout << "subview_tile vs view errors:\n"
+      << " LL: " << counter[0]
+      << " RL: " << counter[1]
+      << " LR: " << counter[2]
+      << " RR: " << counter[3] 
+      << std::endl;
+#endif
+
+    ASSERT_EQ(counter[0], long(0));
+    ASSERT_EQ(counter[1], long(0));
+    ASSERT_EQ(counter[2], long(0));
+    ASSERT_EQ(counter[3], long(0));
+  } // end test_view_layout_tiled_subtile_2d
+
+
+  static void test_view_layout_tiled_subtile_3d( const int N0, const int N1, const int N2 )
+  {
+
+    const int FT = T0*T1*T2;
+
+    const int NT0 = int( std::ceil( N0 / T0 ) );
+    const int NT1 = int( std::ceil( N1 / T1 ) );
+    const int NT2 = int( std::ceil( N2 / T2 ) );
+
+    // Counter to check for errors at the end
+    long counter[4] = {0};
+    // Create LL View
+    {
+      Kokkos::View< Scalar***, LayoutLL_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 );
+        } } }
+      } } }
+
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[0]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
+          std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << "  v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << "  flat idx = " << ( ti + tj*NT0 + tk*N0*N1 )*FT + ( i + j*T0 + k*T0*T1 ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
+          std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
+#endif
+        } } }
+      } } }
+    } // end scope
+
+    // Create RL View
+    {
+      Kokkos::View< Scalar***, LayoutRL_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 );
+        } } }
+      } } }
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[1]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
+          std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << "  v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << "  flat idx = " << ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i + j*T0 + k*T0*T1 ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
+#endif
+        } } }
+      } } }
+    } // end scope
+
+    // Create LR View
+    {
+      Kokkos::View< Scalar***, LayoutLR_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k );
+        } } }
+      } } }
+
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+          if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[2]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
+          std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << "  v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << "  flat idx = " << ( ti + tj*NT0 + tk*NT0*NT1 )*FT + ( i*T1*T2 + j*T2 + k ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
+          std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
+#endif
+        } } }
+      } } }
+    } // end scope
+
+    // Create RR View
+    {
+      Kokkos::View< Scalar***, LayoutRR_3D_2x4x4, Kokkos::HostSpace > v("v", N0, N1, N2);
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k) = ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k );
+        } } }
+      } } }
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+          if ( tile_subview(i,j,k) != v(ti*T0+i, tj*T1+j, tk*T2+k) ) { ++counter[3]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1,idx2 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << std::endl;
+          std::cout << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk << "," << i << "," << j << "," << k << "  v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k) << "  flat idx = " << ( ti*NT1*NT2 + tj*NT2 + tk )*FT + ( i*T1*T2 + j*T2 + k ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j,k) << std::endl;
+          std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
+#endif
+        } } }
+      } } }
+    } // end scope
+
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+    std::cout << "subview_tile vs view errors:\n"
+      << " LL: " << counter[0]
+      << " RL: " << counter[1]
+      << " LR: " << counter[2]
+      << " RR: " << counter[3] 
+      << std::endl;
+#endif
+
+    ASSERT_EQ(counter[0], long(0));
+    ASSERT_EQ(counter[1], long(0));
+    ASSERT_EQ(counter[2], long(0));
+    ASSERT_EQ(counter[3], long(0));
+
+  } // end test_view_layout_tiled_subtile_3d
+
+
+  static void test_view_layout_tiled_subtile_4d( const int N0, const int N1, const int N2, const int N3 )
+  {
+    const int FT = T0*T1*T2*T3;
+
+    const int NT0 = int( std::ceil( N0 / T0 ) );
+    const int NT1 = int( std::ceil( N1 / T1 ) );
+    const int NT2 = int( std::ceil( N2 / T2 ) );
+    const int NT3 = int( std::ceil( N3 / T3 ) );
+
+    // Counter to check for errors at the end
+    long counter[4] = {0};
+    // Create LL View
+    {
+      Kokkos::View< Scalar****, LayoutLL_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
+      for ( int tl = 0; tl < NT3; ++tl ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int l = 0; l < T3; ++l ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
+        } } } }
+      } } } }
+
+      for ( int tl = 0; tl < NT3; ++tl ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
+        for ( int l = 0; l < T3; ++l ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[0]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
+          std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
+          << "  i,j,k,l: " <<  i << "," << j << "," << k << "," << l
+          << "  v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) 
+          << "  flat idx = " << ( ti + tj*NT0 + tk*N0*N1 + tl*N0*N1*N2 )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
+          std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
+#endif
+        } } } }
+      } } } }
+    } // end scope
+
+    // Create RL View
+    {
+      Kokkos::View< Scalar****, LayoutRL_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tl = 0; tl < NT3; ++tl ) {
+        for ( int l = 0; l < T3; ++l ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 );
+        } } } }
+      } } } }
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tl = 0; tl < NT3; ++tl ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
+        for ( int l = 0; l < T3; ++l ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int i = 0; i < T0; ++i ) {
+          if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[1]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
+          std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
+          << "  i,j,k,l: " <<  i << "," << j << "," << k << "," << l
+          << "  v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) 
+          << "  flat idx = " << ( ti*NT1*NT2*N3 + tj*NT2*N3 + tk*N3 + tl )*FT + ( i + j*T0 + k*T0*T1 + l*T0*T1*T2 ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
+          std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
+#endif
+        } } } }
+      } } } }
+    } // end scope
+
+    // Create LR View
+    {
+      Kokkos::View< Scalar****, LayoutLR_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
+      for ( int tl = 0; tl < NT3; ++tl ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int l = 0; l < T3; ++l ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
+        } } } }
+      } } } }
+
+      for ( int tl = 0; tl < NT3; ++tl ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int ti = 0; ti < NT0; ++ti ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int l = 0; l < T3; ++l ) {
+          if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[2]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
+          std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
+          << "  i,j,k,l: " <<  i << "," << j << "," << k << "," << l
+          << "  v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) 
+          << "  flat idx = " << ( ti + tj*NT0 + tk*NT0*NT1 + tl*NT0*NT1*NT2 )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
+          std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
+#endif
+        } } } }
+      } } } }
+    } // end scope
+
+    // Create RR View
+    {
+      Kokkos::View< Scalar****, LayoutRR_4D_2x4x4x2, Kokkos::HostSpace > v("v", N0, N1, N2, N3);
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tl = 0; tl < NT3; ++tl ) {
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int l = 0; l < T3; ++l ) {
+          v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) = ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l );
+        } } } }
+      } } } }
+
+      for ( int ti = 0; ti < NT0; ++ti ) {
+      for ( int tj = 0; tj < NT1; ++tj ) {
+      for ( int tk = 0; tk < NT2; ++tk ) {
+      for ( int tl = 0; tl < NT3; ++tl ) {
+        auto tile_subview = Kokkos::tile_subview( v, ti, tj, tk, tl );
+        for ( int i = 0; i < T0; ++i ) {
+        for ( int j = 0; j < T1; ++j ) {
+        for ( int k = 0; k < T2; ++k ) {
+        for ( int l = 0; l < T3; ++l ) {
+          if ( tile_subview(i,j,k,l) != v(ti*T0+i, tj*T1+j, tk*T2+k, tl*T3 + l) ) { ++counter[3]; }
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+          std::cout << "idx0,idx1,idx2,idx3 = " << ti*T0 + i << "," << tj*T1 + j << "," << tk*T2 + k << "," << tl*T3 + l<< std::endl;
+          std::cout << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk << "," << tl << ","
+          << "  i,j,k,l: " <<  i << "," << j << "," << k << "," << l
+          << "  v = " << v(ti*T0 + i, tj*T1+j, tk*T2 + k, tl*T3 + l) 
+          << "  flat idx = " << ( ti*NT1*NT2*NT3 + tj*NT2*NT3 + tk*NT3 + tl )*FT + ( i*T1*T2*T3 + j*T2*T3 + k*T3 + l ) << std::endl;
+          std::cout << "subview_tile output = " << tile_subview(i,j,k,l) << std::endl;
+          std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) << std::endl;
+#endif
+        } } } }
+      } } } }
+    } // end scope
+
+#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT
+    std::cout << "subview_tile vs view errors:\n"
+      << " LL: " << counter[0]
+      << " RL: " << counter[1]
+      << " LR: " << counter[2]
+      << " RR: " << counter[3] 
+      << std::endl;
+#endif
+
+    ASSERT_EQ(counter[0], long(0));
+    ASSERT_EQ(counter[1], long(0));
+    ASSERT_EQ(counter[2], long(0));
+    ASSERT_EQ(counter[3], long(0));
+
+  } // end test_view_layout_tiled_subtile_4d
+
+}; // end TestViewLayoutTiled struct
+
+} // namespace
+
+TEST_F( TEST_CATEGORY , view_layouttiled) {
+  // These two examples are iterating by tile, then within a tile - not by extents
+  // If N# is not a power of two, but want to iterate by tile then within a tile, need to check that mapped index is within extent
+  TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_2d( 4, 12 );
+  TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_3d( 4, 12, 16 );
+  TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_4d( 4, 12, 16, 12 );
+}
+TEST_F( TEST_CATEGORY , view_layouttiled_subtile) {
+  // These two examples are iterating by tile, then within a tile - not by extents
+  // If N# is not a power of two, but want to iterate by tile then within a tile, need to check that mapped index is within extent
+  TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_2d( 4, 12 );
+  TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_3d( 4, 12, 16 );
+  TestViewLayoutTiled< TEST_EXECSPACE >::test_view_layout_tiled_subtile_4d( 4, 12, 16, 12 );
+}
+#endif
+} // namespace Test
diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy.cpp
new file mode 100644
index 000000000..374859235
--- /dev/null
+++ b/packages/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewCopy.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cuda/TestCudaHostPinned_Category.hpp>
+#include <TestViewCopy.hpp>
diff --git a/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy.cpp b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy.cpp
new file mode 100644
index 000000000..b0ea67a1b
--- /dev/null
+++ b/packages/kokkos/core/unit_test/cuda/TestCudaUVM_ViewCopy.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cuda/TestCudaUVM_Category.hpp>
+#include <TestViewCopy.hpp>
diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Other.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Other.cpp
index f63409da2..788e458ee 100644
--- a/packages/kokkos/core/unit_test/cuda/TestCuda_Other.cpp
+++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Other.cpp
@@ -50,3 +50,4 @@
 #include<TestTile.hpp>
 
 #include<TestViewCtorPropEmbeddedDim.hpp>
+#include<TestViewLayoutTiled.hpp>
diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Reductions_DeviceView.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Reductions_DeviceView.cpp
new file mode 100644
index 000000000..5b3409014
--- /dev/null
+++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Reductions_DeviceView.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cuda/TestCuda_Category.hpp>
+#include <TestReduceDeviceView.hpp>
diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_Team.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_Team.cpp
index c63358509..374068345 100644
--- a/packages/kokkos/core/unit_test/cuda/TestCuda_Team.cpp
+++ b/packages/kokkos/core/unit_test/cuda/TestCuda_Team.cpp
@@ -68,6 +68,22 @@ TEST_F( TEST_CATEGORY, team_reduce )
   TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 );
   TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 );
 }
+
+TEST_F( TEST_CATEGORY, team_broadcast )
+{
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 0 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 0 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 2 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 2 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 16 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 16 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 1000 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 1000 );
+}
+
 }
 
 #include <TestTeamVector.hpp>
diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp
index 879633b0c..dcb6896b8 100644
--- a/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp
+++ b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp
@@ -65,6 +65,10 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request )
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
 }
+
+TEST_F( TEST_CATEGORY, scratch_align) {
+  TestScratchAlignment< TEST_EXECSPACE >();
+}
 #endif
 #endif
 
diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamTeamSize.cpp
new file mode 100644
index 000000000..7e61e0810
--- /dev/null
+++ b/packages/kokkos/core/unit_test/cuda/TestCuda_TeamTeamSize.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cuda/TestCuda_Category.hpp>
+#include <TestTeamTeamSize.hpp>
diff --git a/packages/kokkos/core/unit_test/cuda/TestCuda_ViewLayoutStrideAssignment.cpp b/packages/kokkos/core/unit_test/cuda/TestCuda_ViewLayoutStrideAssignment.cpp
new file mode 100644
index 000000000..2732cd4ba
--- /dev/null
+++ b/packages/kokkos/core/unit_test/cuda/TestCuda_ViewLayoutStrideAssignment.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <cuda/TestCuda_Category.hpp>
+#include <TestViewLayoutStrideAssignment.hpp>
+
diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp
index 566891bb3..0ddd67acf 100644
--- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp
+++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp
@@ -50,6 +50,7 @@
 #include<TestTile.hpp>
 
 #include<TestViewCtorPropEmbeddedDim.hpp>
+#include<TestViewLayoutTiled.hpp>
 
 #include <mutex>
 
diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Reductions_DeviceView.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Reductions_DeviceView.cpp
new file mode 100644
index 000000000..99fe5842c
--- /dev/null
+++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Reductions_DeviceView.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <openmp/TestOpenMP_Category.hpp>
+#include <TestReduceDeviceView.hpp>
diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp
index 790ea9e6d..e5b900ac5 100644
--- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp
+++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp
@@ -68,6 +68,21 @@ TEST_F( TEST_CATEGORY, team_reduce )
   TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 );
   TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 );
 }
+
+TEST_F( TEST_CATEGORY, team_broadcast )
+{
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 0 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 0 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 2 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 2 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 16 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 16 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 1000 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 1000 );
+}
 }
 
 #include <TestTeamVector.hpp>
diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp
index f57da139a..64d757533 100644
--- a/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp
+++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp
@@ -65,6 +65,9 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request )
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
 }
+TEST_F( TEST_CATEGORY, scratch_align) {
+  TestScratchAlignment< TEST_EXECSPACE >();
+}
 #endif
 #endif
 
diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamTeamSize.cpp
new file mode 100644
index 000000000..a98728f02
--- /dev/null
+++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_TeamTeamSize.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <openmp/TestOpenMP_Category.hpp>
+#include <TestTeamTeamSize.hpp>
+
diff --git a/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewLayoutStrideAssignment.cpp b/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewLayoutStrideAssignment.cpp
new file mode 100644
index 000000000..81b296d15
--- /dev/null
+++ b/packages/kokkos/core/unit_test/openmp/TestOpenMP_ViewLayoutStrideAssignment.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <openmp/TestOpenMP_Category.hpp>
+#include <TestViewLayoutStrideAssignment.hpp>
+
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewCopy.cpp b/packages/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewCopy.cpp
new file mode 100644
index 000000000..4636691d9
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCmHostPinned_ViewCopy.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmHostPinned_Category.hpp>
+#include <TestViewCopy.hpp>
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_Crs.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_Crs.cpp
new file mode 100644
index 000000000..05a90da83
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_Crs.cpp
@@ -0,0 +1,47 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestCrs.hpp>
+
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_a.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_a.cpp
new file mode 100644
index 000000000..23edcbcc3
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_a.cpp
@@ -0,0 +1,54 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestMDRange.hpp>
+namespace Test {
+
+TEST_F( TEST_CATEGORY , mdrange_5d_reduce ) {
+  TestMDRange_5D< TEST_EXECSPACE >::test_reduce5( 100, 10, 10, 10, 5 );
+}
+
+}
+
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_b.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_b.cpp
new file mode 100644
index 000000000..5f02e893a
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_b.cpp
@@ -0,0 +1,54 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestMDRange.hpp>
+namespace Test {
+
+TEST_F( TEST_CATEGORY , mdrange_6d_reduce ) {
+  TestMDRange_6D< TEST_EXECSPACE >::test_reduce6( 10, 10, 10, 10, 10, 5 );
+}
+
+}
+
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_c.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_c.cpp
new file mode 100644
index 000000000..6a84962d1
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_c.cpp
@@ -0,0 +1,54 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestMDRange.hpp>
+namespace Test {
+
+TEST_F( TEST_CATEGORY , mdrange_2d_reduce ) {
+  TestMDRange_2D< TEST_EXECSPACE >::test_reduce2( 100, 100 );
+}
+
+}
+
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_d.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_d.cpp
new file mode 100644
index 000000000..c6c6ba291
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_d.cpp
@@ -0,0 +1,54 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestMDRange.hpp>
+namespace Test {
+
+TEST_F( TEST_CATEGORY , mdrange_3d_reduce ) {
+  TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 100, 100, 5 );
+}
+
+}
+
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_e.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_e.cpp
new file mode 100644
index 000000000..6afd8b8ec
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_MDRangeReduce_e.cpp
@@ -0,0 +1,54 @@
+
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<rocm/TestROCm_Category.hpp>
+#include<TestMDRange.hpp>
+namespace Test {
+
+TEST_F( TEST_CATEGORY , mdrange_4d_reduce ) {
+  TestMDRange_4D< TEST_EXECSPACE >::test_reduce4( 100, 100, 10, 5 );
+}
+
+}
+
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_SubView_c13.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_SubView_c13.cpp
new file mode 100644
index 000000000..03b39972a
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_SubView_c13.cpp
@@ -0,0 +1,54 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <rocm/TestROCmHostPinned_Category.hpp>
+#include <TestViewSubview.hpp>
+
+namespace Test {
+
+TEST_F( TEST_CATEGORY, view_test_unmanaged_subview_reset )
+{
+  TestViewSubview::test_unmanaged_subview_reset< TEST_EXECSPACE >();
+}
+
+} // namespace Test
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp
index e1025f1ba..57887450e 100644
--- a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamReductionScan.cpp
@@ -46,7 +46,6 @@
 
 namespace Test {
 
-#if !defined(KOKKOS_ROCM_CLANG_WORKAROUND)
 TEST_F( TEST_CATEGORY, team_scan )
 {
   TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 );
@@ -56,7 +55,6 @@ TEST_F( TEST_CATEGORY, team_scan )
   TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10000 );
   TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 );
 }
-#endif
 
 TEST_F( TEST_CATEGORY, team_long_reduce )
 {
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp
index 1968ab31e..c7255919d 100644
--- a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamScratch.cpp
@@ -65,6 +65,10 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request )
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
 }
+
+TEST_F( TEST_CATEGORY, scratch_align) {
+  TestScratchAlignment< TEST_EXECSPACE >();
+}
 #endif
 #endif
 
diff --git a/packages/kokkos/core/unit_test/rocm/TestROCm_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamTeamSize.cpp
new file mode 100644
index 000000000..583e01fcb
--- /dev/null
+++ b/packages/kokkos/core/unit_test/rocm/TestROCm_TeamTeamSize.cpp
@@ -0,0 +1,49 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+// Doesn't work right now due to bug with static sized array member
+
+//#include <rocm/TestROCm_Category.hpp>
+//#ifndef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND
+//#include <TestTeamTeamSize.hpp>
+//#endif
diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Other.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Other.cpp
index 0c3bae377..26a218c5c 100644
--- a/packages/kokkos/core/unit_test/serial/TestSerial_Other.cpp
+++ b/packages/kokkos/core/unit_test/serial/TestSerial_Other.cpp
@@ -50,3 +50,4 @@
 #include<TestTile.hpp>
 
 #include<TestViewCtorPropEmbeddedDim.hpp>
+#include<TestViewLayoutTiled.hpp>
diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Reductions_DeviceView.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Reductions_DeviceView.cpp
new file mode 100644
index 000000000..1c20670c2
--- /dev/null
+++ b/packages/kokkos/core/unit_test/serial/TestSerial_Reductions_DeviceView.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <serial/TestSerial_Category.hpp>
+#include <TestReduceDeviceView.hpp>
diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_Team.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_Team.cpp
index 619cb727a..47d02f700 100644
--- a/packages/kokkos/core/unit_test/serial/TestSerial_Team.cpp
+++ b/packages/kokkos/core/unit_test/serial/TestSerial_Team.cpp
@@ -68,6 +68,21 @@ TEST_F( TEST_CATEGORY, team_reduce )
   TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 );
   TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 );
 }
+
+TEST_F( TEST_CATEGORY, team_broadcast )
+{
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 0 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 0 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 2 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 2 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 16 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 16 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 1000 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 1000 );
+}
 }
 
 #include <TestTeamVector.hpp>
diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp
index 963908c92..029999ab7 100644
--- a/packages/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp
+++ b/packages/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp
@@ -65,6 +65,10 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request )
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
 }
+
+TEST_F( TEST_CATEGORY, scratch_align) {
+  TestScratchAlignment< TEST_EXECSPACE >();
+}
 #endif
 #endif
 
diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_TeamTeamSize.cpp
new file mode 100644
index 000000000..53451b30c
--- /dev/null
+++ b/packages/kokkos/core/unit_test/serial/TestSerial_TeamTeamSize.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <serial/TestSerial_Category.hpp>
+#include <TestTeamTeamSize.hpp>
diff --git a/packages/kokkos/core/unit_test/serial/TestSerial_ViewLayoutStrideAssignment.cpp b/packages/kokkos/core/unit_test/serial/TestSerial_ViewLayoutStrideAssignment.cpp
new file mode 100644
index 000000000..64c5b642d
--- /dev/null
+++ b/packages/kokkos/core/unit_test/serial/TestSerial_ViewLayoutStrideAssignment.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <serial/TestSerial_Category.hpp>
+#include <TestViewLayoutStrideAssignment.hpp>
+
diff --git a/packages/kokkos/core/unit_test/standalone/Makefile b/packages/kokkos/core/unit_test/standalone/Makefile
new file mode 100644
index 000000000..f8a75616c
--- /dev/null
+++ b/packages/kokkos/core/unit_test/standalone/Makefile
@@ -0,0 +1,55 @@
+KOKKOS_DEVICES=Cuda
+KOKKOS_CUDA_OPTIONS=enable_lambda
+KOKKOS_ARCH = "SNB,Kepler35"
+
+MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
+
+ifndef KOKKOS_PATH
+  KOKKOS_PATH = $(MAKEFILE_PATH)../../../
+endif
+
+SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
+HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
+HEADERS = $(wildcard $(MAKEFILE_PATH)/../*.hpp)
+
+vpath %.cpp $(sort $(dir $(SRC)))
+
+default: build
+	echo "Start Build"
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
+EXE = test.cuda
+else
+CXX = g++
+EXE = test.host
+endif
+
+CXXFLAGS ?= -O3 -g
+override CXXFLAGS += -I$(MAKEFILE_PATH) -I$(KOKKOS_PATH)/core/unit_test -I$(KOKKOS_PATH)/tpls/gtest -DTESTFILE=$(TESTFILE)
+#SRC += $(KOKKOS_PATH)/tpls/gtest/gtest/gtest-all.cc
+
+DEPFLAGS = -M
+LINK = ${CXX}
+LINKFLAGS =
+
+OBJ = $(notdir $(SRC:.cpp=.o))
+LIB =
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) gtest-all.o
+	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) gtest-all.o  -o $(EXE)
+
+clean: kokkos-clean
+	rm -f *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS)
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
+
+gtest-all.o:$(KOKKOS_PATH)/tpls/gtest/gtest/gtest-all.cc
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(KOKKOS_PATH)/tpls/gtest/gtest/gtest-all.cc
diff --git a/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp b/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp
new file mode 100644
index 000000000..2db51658c
--- /dev/null
+++ b/packages/kokkos/core/unit_test/standalone/UnitTestMainInit.cpp
@@ -0,0 +1,71 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+#include <cstdlib>
+
+#include <Kokkos_Core.hpp>
+
+#ifdef KOKKOS_ENABLE_ROCM
+#include <rocm/TestROCm_Category.hpp>
+#endif
+#ifdef KOKKOS_ENABLE_CUDA
+#include <cuda/TestCuda_Category.hpp>
+#endif
+#ifdef KOKKOS_ENABLE_OPENMP
+#include <openmp/TestOpenMP_Category.hpp>
+#endif
+#ifdef KOKKOS_ENABLE_THREADS
+#include <threads/TestThreads_Category.hpp>
+#endif
+
+#include <TestMemoryPool.hpp>
+
+int main( int argc, char *argv[] ) {
+  Kokkos::initialize(argc,argv);
+  ::testing::InitGoogleTest( &argc, argv );
+
+  int result =  RUN_ALL_TESTS();
+  Kokkos::finalize();
+  return result;
+}
diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_Other.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_Other.cpp
index a0c8b4159..13786aa4a 100644
--- a/packages/kokkos/core/unit_test/threads/TestThreads_Other.cpp
+++ b/packages/kokkos/core/unit_test/threads/TestThreads_Other.cpp
@@ -50,3 +50,4 @@
 #include<TestTile.hpp>
 
 #include<TestViewCtorPropEmbeddedDim.hpp>
+#include<TestViewLayoutTiled.hpp>
diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_Reductions_DeviceView.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_Reductions_DeviceView.cpp
new file mode 100644
index 000000000..28f4b7d57
--- /dev/null
+++ b/packages/kokkos/core/unit_test/threads/TestThreads_Reductions_DeviceView.cpp
@@ -0,0 +1,45 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <threads/TestThreads_Category.hpp>
+#include <TestReduceDeviceView.hpp>
diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_Team.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_Team.cpp
index b87c1f77d..63d250935 100644
--- a/packages/kokkos/core/unit_test/threads/TestThreads_Team.cpp
+++ b/packages/kokkos/core/unit_test/threads/TestThreads_Team.cpp
@@ -68,6 +68,21 @@ TEST_F( TEST_CATEGORY, team_reduce )
   TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 );
   TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 );
 }
+
+TEST_F( TEST_CATEGORY, team_broadcast )
+{
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 0 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 0 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 2 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 2 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 16 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 16 );
+
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_teambroadcast( 1000 );
+  TestTeamBroadcast< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_teambroadcast( 1000 );
+}
 }
 
 #include <TestTeamVector.hpp>
diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp
index c07fae77c..d17119579 100644
--- a/packages/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp
+++ b/packages/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp
@@ -65,6 +65,10 @@ TEST_F( TEST_CATEGORY, team_lambda_shared_request )
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >();
   TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >();
 }
+
+TEST_F( TEST_CATEGORY, scratch_align) {
+  TestScratchAlignment< TEST_EXECSPACE >();
+}
 #endif
 #endif
 
diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_TeamTeamSize.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_TeamTeamSize.cpp
new file mode 100644
index 000000000..b1cf4ec87
--- /dev/null
+++ b/packages/kokkos/core/unit_test/threads/TestThreads_TeamTeamSize.cpp
@@ -0,0 +1,47 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <threads/TestThreads_Category.hpp>
+#include <TestTeamTeamSize.hpp>
+
+
diff --git a/packages/kokkos/core/unit_test/threads/TestThreads_ViewLayoutStrideAssignment.cpp b/packages/kokkos/core/unit_test/threads/TestThreads_ViewLayoutStrideAssignment.cpp
new file mode 100644
index 000000000..5ddd07108
--- /dev/null
+++ b/packages/kokkos/core/unit_test/threads/TestThreads_ViewLayoutStrideAssignment.cpp
@@ -0,0 +1,46 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <threads/TestThreads_Category.hpp>
+#include <TestViewLayoutStrideAssignment.hpp>
+
diff --git a/packages/kokkos/doc/kokkos-promotion.txt b/packages/kokkos/doc/kokkos-promotion.txt
index 5a1306ecc..0aede5f32 100644
--- a/packages/kokkos/doc/kokkos-promotion.txt
+++ b/packages/kokkos/doc/kokkos-promotion.txt
@@ -149,7 +149,9 @@ Step 5: This step can be done on any SEMS machine (e.g. kokkos-dev). Actually, t
         git clone -b kokkos-develop git@github.com:trilinos/Trilinos.git
         TRILINOS_PATH=$PWD/Trilinos
 
-  5.2. Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees.
+  5.2. Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees. 
+
+      * Use the master branch of Kokkos for this.
 
         module load sems-python/2.7.9
         python $KOKKOS_PATH/scripts/snapshot.py $KOKKOS_PATH $TRILINOS_PATH/packages
@@ -173,20 +175,22 @@ Step 5: This step can be done on any SEMS machine (e.g. kokkos-dev). Actually, t
 
        ## KokkosKernels Changelog
 
-  5.4. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3)
-
-       cd $TRILINOS_PATH
-       mkdir CHECKIN
-       cd CHECKIN
-       nohup ../cmake/std/sems/checkin-test-sems.sh --do-all --push &
-
-       Although Trilinos has experimental Pull Request testing, it is not good enough to replace the checkin script yet.
+  5.4. Wait for Trilinos Autotester results
 
   5.5. If there are failures, fix and backtrack. Otherwise, go to next step
 
 // -------------------------------------------------------------------------------- //
 
-Step 6: Push Kokkos master to GitHub (requires Owner permission).
+Step 6: Push Kokkos master and develop to GitHub (requires Owner permission).
       
+  6.1. Master branch:
        cd KOKKOS_PATH
+       git checkout master
        git push --follow-tags origin master 
+
+  6.2. Develop branch: First merge (--no-ff) master back into develop
+       cd KOKKOS_PATH
+       git checkout develop
+       git merge --no-ff maseter
+       git push origin develop 
+
diff --git a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp
index ff3002e64..e8c1550fc 100644
--- a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp
+++ b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp
@@ -81,13 +81,22 @@ int main(int narg, char* args[]) {
   Kokkos::initialize(narg,args);
 
   // Launch 12 teams of the maximum number of threads per team
-  const team_policy policy( 12 , team_policy::team_size_max( hello_world() ) );
-  
+  const int team_size_max = team_policy(1,1).team_size_max(hello_world(), Kokkos::ParallelReduceTag());
+  const team_policy policy_a( 12 , team_size_max );
+
   int sum = 0;
-  Kokkos::parallel_reduce( policy , hello_world() , sum );
+  Kokkos::parallel_reduce( policy_a , hello_world() , sum );
+
+  // The result will be 12*team_size_max
+  printf("Result A: %i == %i\n",sum, team_size_max*12);
+
+  // In practice it is often better to let Kokkos decide on the team_size
+  const team_policy policy_b( 12 , Kokkos::AUTO );
 
-  // The result will be 12*team_policy::team_size_max( hello_world())
-  printf("Result %i\n",sum);
+  Kokkos::parallel_reduce( policy_b , hello_world() , sum );
+  // The result will be 12*policy_b.team_size_recommended( hello_world(),  Kokkos::ParallelReduceTag())
+  const int team_size_recommended = policy_b.team_size_recommended( hello_world(),  Kokkos::ParallelReduceTag());
+  printf("Result B: %i %i\n",sum, team_size_recommended*12);
 
   Kokkos::finalize();
 }
diff --git a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp
index 721aab2d3..bbb1000e9 100644
--- a/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp
+++ b/packages/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp
@@ -79,7 +79,8 @@ int main(int narg, char* args[]) {
   Kokkos::initialize(narg,args);
 
   // Launch 3 teams of the maximum number of threads per team
-  const team_policy policy( 3 , team_policy::team_size_max( hello_world() ) );
+  const int team_size_max = team_policy(3,1).team_size_max( hello_world(), Kokkos::ParallelReduceTag());
+  const team_policy policy( 3 , team_size_max );
   
   int sum = 0;
   Kokkos::parallel_reduce( policy , hello_world() , sum );
diff --git a/packages/kokkos/example/virtual_functions/Makefile b/packages/kokkos/example/virtual_functions/Makefile
new file mode 100644
index 000000000..06186786c
--- /dev/null
+++ b/packages/kokkos/example/virtual_functions/Makefile
@@ -0,0 +1,55 @@
+KOKKOS_DEVICES=Cuda
+KOKKOS_CUDA_OPTIONS=enable_lambda
+KOKKOS_ARCH = "SNB,Kepler35"
+
+#KOKKOS_DEVICES=OpenMP
+#KOKKOS_CUDA_OPTIONS=enable_lambda
+#KOKKOS_ARCH = "SNB"
+
+MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
+
+ifndef KOKKOS_PATH
+  KOKKOS_PATH = $(MAKEFILE_PATH)../..
+endif
+
+SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
+HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
+
+vpath %.cpp $(sort $(dir $(SRC)))
+
+default: build
+	echo "Start Build"
+
+LINKFLAGS =
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
+EXE = virtual.cuda
+override LINKFLAGS += --remove-duplicate-link-files
+else
+CXX = g++
+EXE = virtual.host
+endif
+
+CXXFLAGS ?= -O3 -g
+override CXXFLAGS += -I$(MAKEFILE_PATH)
+
+DEPFLAGS = -M
+LINK = ${CXX}
+
+OBJ = $(notdir $(SRC:.cpp=.o))
+LIB =
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean: kokkos-clean
+	rm -f *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS)
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
diff --git a/packages/kokkos/example/virtual_functions/classes.cpp b/packages/kokkos/example/virtual_functions/classes.cpp
new file mode 100644
index 000000000..1b9d7cb38
--- /dev/null
+++ b/packages/kokkos/example/virtual_functions/classes.cpp
@@ -0,0 +1,26 @@
+#include<classes.hpp>
+
+KOKKOS_FUNCTION
+Foo::Foo() {
+  val = 0;
+} 
+
+KOKKOS_FUNCTION
+Foo_1::Foo_1() {
+  val = 1;
+}
+
+KOKKOS_FUNCTION
+int Foo_1::value() {
+  return val;  
+}
+
+KOKKOS_FUNCTION
+Foo_2::Foo_2() {
+  val = 2;
+}
+
+KOKKOS_FUNCTION
+int Foo_2::value() {
+  return val;  
+}
diff --git a/packages/kokkos/example/virtual_functions/classes.hpp b/packages/kokkos/example/virtual_functions/classes.hpp
new file mode 100644
index 000000000..362c473ce
--- /dev/null
+++ b/packages/kokkos/example/virtual_functions/classes.hpp
@@ -0,0 +1,39 @@
+#ifndef KOKKOS_EXAMPLE_VIRTUAL_FUNCTIONS_CLASSES_HPP
+#define KOKKOS_EXAMPLE_VIRTUAL_FUNCTIONS_CLASSES_HPP
+
+#include<Kokkos_Core.hpp>
+
+class Foo {
+  protected:
+    int val;
+  public:
+    KOKKOS_FUNCTION
+    Foo();
+
+    KOKKOS_FUNCTION
+    virtual int value() { return 0; };
+
+    KOKKOS_FUNCTION
+    virtual ~Foo() {}
+};
+
+class Foo_1: public Foo {
+  public:
+    KOKKOS_FUNCTION
+    Foo_1();
+
+    KOKKOS_FUNCTION
+    int value();
+};
+
+class Foo_2: public Foo {
+  public:
+    KOKKOS_FUNCTION
+    Foo_2();
+
+    KOKKOS_FUNCTION
+    int value();
+};
+
+#endif //KOKKOS_EXAMPLE_VIRTUAL_FUNCTIONS_CLASSES_HPP
+
diff --git a/packages/kokkos/example/virtual_functions/main.cpp b/packages/kokkos/example/virtual_functions/main.cpp
new file mode 100644
index 000000000..a16e0cf73
--- /dev/null
+++ b/packages/kokkos/example/virtual_functions/main.cpp
@@ -0,0 +1,36 @@
+#include<classes.hpp>
+
+int main(int argc, char* argv[]) {
+  Kokkos::initialize(argc,argv);
+
+  {
+    Foo* f_1 = (Foo*) Kokkos::kokkos_malloc(sizeof(Foo_1));
+    Foo* f_2 = (Foo*) Kokkos::kokkos_malloc(sizeof(Foo_2));
+
+    Kokkos::parallel_for("CreateObjects",1, KOKKOS_LAMBDA (const int&) {
+      new ((Foo_1*)f_1) Foo_1();
+      new ((Foo_2*)f_2) Foo_2();
+    });
+
+    int value_1,value_2;
+    Kokkos::parallel_reduce("CheckValues",1, KOKKOS_LAMBDA (const int&, int& lsum) {
+      lsum = f_1->value();
+    },value_1);
+
+    Kokkos::parallel_reduce("CheckValues",1, KOKKOS_LAMBDA (const int&, int& lsum) {
+      lsum = f_2->value();
+    },value_2);
+
+    printf("Values: %i %i\n",value_1,value_2);
+
+    Kokkos::parallel_for("DestroyObjects",1, KOKKOS_LAMBDA (const int&) {
+      f_1->~Foo();
+      f_2->~Foo();
+    });
+
+    Kokkos::kokkos_free(f_1);
+    Kokkos::kokkos_free(f_2);
+  }
+
+  Kokkos::finalize();
+}
diff --git a/packages/kokkos/generate_makefile.bash b/packages/kokkos/generate_makefile.bash
index 4225e5b2d..34be03f98 100755
--- a/packages/kokkos/generate_makefile.bash
+++ b/packages/kokkos/generate_makefile.bash
@@ -97,12 +97,21 @@ do
         echo "Invalid compiler by --compiler command: '${COMPILER}'"
         exit
       fi
+      # ... valid compiler, ensure absolute path set 
+      WCOMPATH=`which $COMPILER`
+      COMPDIR=`dirname $WCOMPATH`
+      COMPNAME=`basename $WCOMPATH`
+      COMPILER=${COMPDIR}/${COMPNAME}
       ;;
     --with-options*)
       KOKKOS_OPT="${key#*=}"
       ;;
+    --gcc-toolchain*)
+      KOKKOS_GCC_TOOLCHAIN="${key#*=}"
+      ;;
     --help)
       echo "Kokkos configure options:"
+      echo ""
       echo "--kokkos-path=/Path/To/Kokkos:        Path to the Kokkos root directory."
       echo "--qthreads-path=/Path/To/Qthreads:    Path to Qthreads install directory."
       echo "                                        Overrides path given by --with-qthreads."
@@ -171,6 +180,7 @@ do
       echo "                                "
       echo "--with-cuda-options=[OPT]:    Additional options to CUDA:"
       echo "                                force_uvm, use_ldg, enable_lambda, rdc"
+      echo "--gcc-toolchain=/Path/To/GccRoot:  Set the gcc toolchain to use with clang (e.g. /usr)" 
       echo "--make-j=[NUM]:               DEPRECATED: call make with appropriate"
       echo "                                -j flag"
       exit 0
@@ -195,7 +205,7 @@ else
 fi
 
 if [ "${KOKKOS_PATH}"  = "${PWD}" ] || [ "${KOKKOS_PATH}"  = "${PWD}/" ]; then
-  echo "Running generate_makefile.sh in the Kokkos root directory is not allowed"
+  echo "Running generate_makefile.bash in the Kokkos root directory is not allowed"
   exit
 fi
 
@@ -204,8 +214,13 @@ KOKKOS_SRC_PATH=${KOKKOS_PATH}
 KOKKOS_SETTINGS="KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}"
 #KOKKOS_SETTINGS="KOKKOS_PATH=${KOKKOS_PATH}"
 
+# The double [[  ]] in the elif branch is not a typo
 if [ ${#COMPILER} -gt 0 ]; then
   KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}"
+elif
+   [ ${#COMPILER} -eq 0 ] && [[ ${KOKKOS_DEVICES} =~ .*Cuda.* ]]; then
+  COMPILER="${KOKKOS_PATH}/bin/nvcc_wrapper"
+  KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}"   
 fi
 
 if [ ${#KOKKOS_DEVICES} -gt 0 ]; then
@@ -265,6 +280,10 @@ if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then
   KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}"
 fi
 
+if [ ${#KOKKOS_GCC_TOOLCHAIN} -gt 0 ]; then
+  KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_INTERNAL_GCC_TOOLCHAIN=${KOKKOS_GCC_TOOLCHAIN}"
+fi
+
 KOKKOS_SETTINGS_NO_KOKKOS_PATH="${KOKKOS_SETTINGS}"
 
 KOKKOS_TEST_INSTALL_PATH="${PWD}/install"
@@ -276,7 +295,7 @@ fi
 
 mkdir -p install
 gen_makefile=Makefile.kokkos
-echo "#Makefile to satisfy existens of target kokkos-clean before installing the library" > install/${gen_makefile}
+echo "#Makefile to satisfy existence of target kokkos-clean before installing the library" > install/${gen_makefile}
 echo "kokkos-clean:" >> install/${gen_makefile}
 echo "" >> install/${gen_makefile}
 mkdir -p core
diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt
index fe90cc296..08453309d 100644
--- a/packages/kokkos/master_history.txt
+++ b/packages/kokkos/master_history.txt
@@ -14,3 +14,4 @@ tag:  2.04.11    date: 10:28:2017    master: 54a1330a    develop: ed36c017
 tag:  2.5.00     date: 12:15:2017    master: dfe685f4    develop: ec7ad6d8
 tag:  2.6.00     date: 03:07:2018    master: 62e760fa    develop: d1ba7d71
 tag:  2.7.00     date: 05:24:2018    master: e01945d0    develop: 2d13f608
+tag:  2.7.24     date: 11:04:2018    master: d3a94192    develop: 7a06fc81
diff --git a/packages/kokkos/scripts/eti/generate_view_copy_cpp_files_write b/packages/kokkos/scripts/eti/generate_view_copy_cpp_files_write
index ffe2a593b..45208e76b 100755
--- a/packages/kokkos/scripts/eti/generate_view_copy_cpp_files_write
+++ b/packages/kokkos/scripts/eti/generate_view_copy_cpp_files_write
@@ -19,8 +19,8 @@ echo "KOKKOS_IMPL_VIEWCOPY_ETI_DECL(${SCALAR_TYPE}${RANK_STARS},${LAYOUT_TYPE},L
 echo "KOKKOS_IMPL_VIEWFILL_ETI_DECL(${SCALAR_TYPE}${RANK_STARS},${LAYOUT_TYPE},KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE,${INDEX_TYPE})" >> common/Kokkos_ViewFillCopyETIDecl_Macros.hpp
 
 
-FileName=${EXECUTION_SPACE_DIR}/Kokkos_${EXECUTION_SPACE}_ViewCopyETIInst_${INDEX_TYPE}_${SCALAR_TYPE}_${LAYOUT_TYPE}_Rank${RANK}.cpp
-ObjectName=Kokkos_${EXECUTION_SPACE}_ViewCopyETIInst_${INDEX_TYPE}_${SCALAR_TYPE}_${LAYOUT_TYPE}_Rank${RANK}.o
+FileName=${EXECUTION_SPACE_DIR}/Kokkos_${EXECUTION_SPACE_DIR}_ViewCopyETIInst_${INDEX_TYPE}_${SCALAR_TYPE}_${LAYOUT_TYPE}_Rank${RANK}.cpp
+ObjectName=Kokkos_${EXECUTION_SPACE_DIR}_ViewCopyETIInst_${INDEX_TYPE}_${SCALAR_TYPE}_${LAYOUT_TYPE}_Rank${RANK}.o
 
 cp ${SCRIPT_PATH}/../../LICENSE ${FileName}
 
@@ -37,5 +37,5 @@ echo "" >> ${FileName}
 echo "}" >> ${FileName}
 echo "}" >> ${FileName}
 
-echo "${ObjectName}: \$(KOKKOS_CPP_DEPENDS) \$(KOKKOS_ETI_PATH)/${FileName}" >> ${EXECUTION_SPACE_DIR}/Makefile.eti_${EXECUTION_SPACE}
-echo -e "\t\$(CXX) \$(KOKKOS_CPPFLAGS) \$(KOKKOS_CXXFLAGS) \$(CXXFLAGS) -c \$(KOKKOS_ETI_PATH)/${FileName}" >> ${EXECUTION_SPACE_DIR}/Makefile.eti_${EXECUTION_SPACE}
+echo "${ObjectName}: \$(KOKKOS_CPP_DEPENDS) \$(KOKKOS_ETI_PATH)/${FileName}" >> ${EXECUTION_SPACE_DIR}/Makefile.eti_${EXECUTION_SPACE_DIR}
+echo -e "\t\$(CXX) \$(KOKKOS_CPPFLAGS) \$(KOKKOS_CXXFLAGS) \$(CXXFLAGS) -c \$(KOKKOS_ETI_PATH)/${FileName}" >> ${EXECUTION_SPACE_DIR}/Makefile.eti_${EXECUTION_SPACE_DIR}
diff --git a/packages/kokkos/scripts/testing_scripts/test_all_sandia b/packages/kokkos/scripts/testing_scripts/test_all_sandia
new file mode 100755
index 000000000..d1424ade8
--- /dev/null
+++ b/packages/kokkos/scripts/testing_scripts/test_all_sandia
@@ -0,0 +1,790 @@
+#!/bin/bash -e
+
+#
+# Global config
+#
+
+set -o pipefail
+
+# Determine current machine.
+
+MACHINE=""
+HOSTNAME=$(hostname)
+PROCESSOR=`uname -p`
+
+if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
+  MACHINE=white
+  module load git
+fi
+
+if [[ "$HOSTNAME" =~ .*bowman.* ]]; then
+  MACHINE=bowman
+  module load git
+fi
+
+if [[ "$HOSTNAME" == *blake* ]]; then # Warning: very generic name
+  MACHINE=blake
+  module load git
+fi
+
+if [[ "$HOSTNAME" == apollo\.* ]]; then
+  MACHINE=apollo
+  module load git
+fi
+
+if [[ "$HOSTNAME" == mayer\.* ]]; then
+  MACHINE=mayer
+#  module load git
+fi
+if [[ "$HOSTNAME" == cn* ]]; then # Warning: very generic name
+  MACHINE=mayer
+fi
+
+if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
+  if [[ "$MACHINE" = "" ]]; then
+    MACHINE=sems
+    module load sems-git
+  fi  
+fi
+
+if [[ "$MACHINE" = "" ]]; then
+  echo "Unrecognized machine" >&2
+  exit 1
+fi
+
+echo "Running on machine: $MACHINE"
+
+GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
+IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
+ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
+INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
+CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
+CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
+CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"
+
+GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
+IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
+CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
+INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
+CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
+#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
+PGI_WARNING_FLAGS=""
+
+# Default. Machine specific can override.
+DEBUG=False
+ARGS=""
+CUSTOM_BUILD_LIST=""
+QTHREADS_PATH=""
+DRYRUN=False
+BUILD_ONLY=False
+declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1
+TEST_SCRIPT=False
+SKIP_HWLOC=False
+SPOT_CHECK=False
+
+PRINT_HELP=False
+OPT_FLAG=""
+CXX_FLAGS_EXTRA=""
+LD_FLAGS_EXTRA=""
+KOKKOS_OPTIONS=""
+
+#
+# Handle arguments.
+#
+
+while [[ $# > 0 ]]
+do
+  key="$1"
+
+  case $key in
+    --kokkos-path*)
+      KOKKOS_PATH="${key#*=}"
+      ;;
+    --qthreads-path*)
+      QTHREADS_PATH="${key#*=}"
+      ;;
+    --build-list*)
+      CUSTOM_BUILD_LIST="${key#*=}"
+      ;;
+    --debug*)
+      DEBUG=True
+      ;;
+    --build-only*)
+      BUILD_ONLY=True
+      ;;
+    --test-script*)
+      TEST_SCRIPT=True
+      ;;
+    --skip-hwloc*)
+      SKIP_HWLOC=True
+      ;;
+    --num*)
+      NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
+      ;;
+    --dry-run*)
+      DRYRUN=True
+      ;;
+    --spot-check*)
+      SPOT_CHECK=True
+      ;;
+    --arch*)
+      ARCH_FLAG="--arch=${key#*=}"
+      ;;
+    --opt-flag*)
+      OPT_FLAG="${key#*=}"
+      ;;
+    --with-cuda-options*)
+      KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
+      ;;
+    --with-options*)
+      KOKKOS_OPTIONS="--with-options=${key#*=}"
+      ;;
+    --cxxflags-extra*)
+      CXX_FLAGS_EXTRA="${key#*=}"
+      ;;
+    --ldflags-extra*)
+      LD_FLAGS_EXTRA="${key#*=}"
+      ;;
+    --help*)
+      PRINT_HELP=True
+      ;;
+    *)
+      # args, just append
+      ARGS="$ARGS $1"
+      ;;
+  esac
+
+  shift
+done
+
+SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd ../.. && pwd )
+
+# Set kokkos path.
+if [ -z "$KOKKOS_PATH" ]; then
+  KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
+else
+  # Ensure KOKKOS_PATH is abs path.
+  KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
+fi
+
+UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null`
+if ! [ -z "$UNCOMMITTED" ]; then
+  echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :"
+  echo "$UNCOMMITTED"
+  echo ""
+fi
+
+GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline`
+echo "Repository Status: " ${GITSTATUS}
+echo ""
+echo ""
+
+#
+# Machine specific config.
+#
+
+if [ "$MACHINE" = "sems" ]; then
+  source /projects/sems/modulefiles/utils/sems-modules-init.sh
+
+  BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
+  CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
+  CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
+
+  if [ -z "$ARCH_FLAG" ]; then
+    ARCH_FLAG=""
+  fi
+
+  if [ "$SPOT_CHECK" = "True" ]; then
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
+               "gcc/7.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
+               "intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "clang/4.0.1 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  else
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/4.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  fi
+elif [ "$MACHINE" = "white" ]; then
+  source /etc/profile.d/modules.sh
+  SKIP_HWLOC=True
+  export SLURM_TASKS_PER_NODE=32
+
+  BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
+  IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
+  CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/7.2.0,ibm/xl/16.1.0"
+
+  # Don't do pthread on white.
+  GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
+
+  # Format: (compiler module-list build-list exe-name warning-flag)
+  COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
+             "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+  )
+
+  if [ -z "$ARCH_FLAG" ]; then
+    ARCH_FLAG="--arch=Power8,Kepler37"
+  fi
+
+elif [ "$MACHINE" = "bowman" ]; then
+  source /etc/profile.d/modules.sh
+  SKIP_HWLOC=True
+  export SLURM_TASKS_PER_NODE=32
+
+  BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
+
+  OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
+
+  # Format: (compiler module-list build-list exe-name warning-flag)
+  COMPILERS=("intel/16.4.258 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+             "intel/17.2.174 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+             "intel/18.2.199 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+  )
+
+  if [ -z "$ARCH_FLAG" ]; then
+    ARCH_FLAG="--arch=KNL"
+  fi
+
+elif [ "$MACHINE" = "mayer" ]; then
+  SKIP_HWLOC=True
+  export SLURM_TASKS_PER_NODE=96
+
+  BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
+  ARM_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
+
+  # Format: (compiler module-list build-list exe-name warning-flag)
+  COMPILERS=("gcc/7.2.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "arm/18.4.0 $ARM_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS")
+
+  if [ -z "$ARCH_FLAG" ]; then
+    ARCH_FLAG="--arch=ARMv8-TX2"
+  fi
+
+elif [ "$MACHINE" = "blake" ]; then
+  source /etc/profile.d/modules.sh
+  SKIP_HWLOC=True
+  export SLURM_TASKS_PER_NODE=32
+
+  BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
+  BASE_MODULE_LIST_INTEL="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
+
+  if [ "$SPOT_CHECK" = "True" ]; then
+
+  # Format: (compiler module-list build-list exe-name warning-flag)
+  COMPILERS=("intel/18.1.163 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+             "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS"
+  )
+  else
+  COMPILERS=("intel/18.1.163 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+             "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "gcc/5.5.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "gcc/6.4.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "gcc/8.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+             "pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS"
+  )
+
+  fi
+  if [ -z "$ARCH_FLAG" ]; then
+    ARCH_FLAG="--arch=SKX"
+  fi
+
+elif [ "$MACHINE" = "apollo" ]; then
+  source /projects/sems/modulefiles/utils/sems-modules-init.sh
+  module use /home/projects/modulefiles/local/x86-64
+  module load kokkos-env
+
+  module load sems-git
+  module load sems-tex
+  module load sems-cmake/3.5.2
+  module load sems-gdb
+
+  SKIP_HWLOC=True
+
+  BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
+  CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
+  CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
+
+  CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.0.69"
+  CLANG7_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.1"
+  NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
+
+  BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
+  BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread"
+  BUILD_LIST_CLANG="Serial,Pthread,OpenMP"
+
+  if [ "$SPOT_CHECK" = "True" ]; then
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
+               "gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
+               "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread,OpenMP" clang++ $CUDA_WARNING_FLAGS"
+               "cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  else
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
+               "clang/7.0 $CLANG7_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
+               "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
+               "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+    )
+  fi
+
+  if [ -z "$ARCH_FLAG" ]; then
+    ARCH_FLAG="--arch=SNB,Volta70"
+  fi
+
+else
+  echo "Unhandled machine $MACHINE" >&2
+  exit 1
+fi
+
+export OMP_NUM_THREADS=8
+export OMP_PROC_BIND=spread
+export OMP_PLACES=cores
+
+declare -i NUM_RESULTS_TO_KEEP=7
+
+RESULT_ROOT_PREFIX=TestAll
+
+if [ "$PRINT_HELP" = "True" ]; then
+  echo "test_all_sandia <ARGS> <OPTIONS>:"
+  echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
+  echo "    Defaults to root repo containing this script"
+  echo "--debug: Run tests in debug. Defaults to False"
+  echo "--test-script: Test this script, not Kokkos"
+  echo "--skip-hwloc: Do not do hwloc tests"
+  echo "--num=N: Number of jobs to run in parallel"
+  echo "--spot-check: Minimal test set to issue pull request"
+  echo "--dry-run: Just print what would be executed"
+  echo "--build-only: Just do builds, don't run anything"
+  echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
+  echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS"
+  echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS"
+  echo "--arch=ARCHITECTURE: overwrite architecture flags"
+  echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
+  echo "--build-list=BUILD,BUILD,BUILD..."
+  echo "    Provide a comma-separated list of builds instead of running all builds"
+  echo "    Valid items:"
+  echo "      OpenMP, Pthread, Qthreads, Serial, OpenMP_Serial, Pthread_Serial"
+  echo "      Qthreads_Serial, Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
+  echo ""
+
+  echo "ARGS: list of expressions matching compilers to test"
+  echo "  supported compilers sems"
+  for COMPILER_DATA in "${COMPILERS[@]}"; do
+    ARR=($COMPILER_DATA)
+    COMPILER=${ARR[0]}
+    echo "    $COMPILER"
+  done
+  echo ""
+
+  echo "Examples:"
+  echo "  Run all tests"
+  echo "  % test_all_sandia"
+  echo ""
+  echo "  Run all gcc tests"
+  echo "  % test_all_sandia gcc"
+  echo ""
+  echo "  Run all gcc/4.8.4 and all intel tests"
+  echo "  % test_all_sandia gcc/4.8.4 intel"
+  echo ""
+  echo "  Run all tests in debug"
+  echo "  % test_all_sandia --debug"
+  echo ""
+  echo "  Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds"
+  echo "  % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial"
+  echo ""
+  echo "If you want to kill the tests, do:"
+  echo "  hit ctrl-z"
+  echo "  % kill -9 %1"
+  echo
+  exit 0
+fi
+
+# Set build type.
+if [ "$DEBUG" = "True" ]; then
+  BUILD_TYPE=debug
+else
+  BUILD_TYPE=release
+fi
+
+# If no args provided, do all compilers.
+if [ -z "$ARGS" ]; then
+  ARGS='?'
+fi
+
+# Process args to figure out which compilers to test.
+COMPILERS_TO_TEST=""
+
+for ARG in $ARGS; do
+  for COMPILER_DATA in "${COMPILERS[@]}"; do
+    ARR=($COMPILER_DATA)
+    COMPILER=${ARR[0]}
+
+    if [[ "$COMPILER" = $ARG* ]]; then
+      if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then
+        COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER"
+      else
+        echo "Tried to add $COMPILER twice"
+      fi
+    fi
+  done
+done
+
+# Check if Qthreads build requested.
+HAVE_QTHREADS_BUILD="False"
+if [ -n "$CUSTOM_BUILD_LIST" ]; then
+  if [[ "$CUSTOM_BUILD_LIST" = *Qthreads* ]]; then
+    HAVE_QTHREADS_BUILD="True"
+  fi
+else
+  for COMPILER_DATA in "${COMPILERS[@]}"; do
+    ARR=($COMPILER_DATA)
+    BUILD_LIST=${ARR[2]}
+    if [[ "$BUILD_LIST" = *Qthreads* ]]; then
+      HAVE_QTHREADS_BUILD="True"
+    fi
+  done
+fi
+
+# Ensure Qthreads path is set if Qthreads build is requested.
+if [ "$HAVE_QTHREADS_BUILD" = "True" ]; then
+  if [ -z "$QTHREADS_PATH" ]; then
+    echo "Need to supply Qthreads path (--qthreads-path) when testing Qthreads backend." >&2
+    exit 1
+  else
+    # Strip trailing slashes from path.
+    QTHREADS_PATH=$(echo $QTHREADS_PATH | sed 's/\/*$//')
+  fi
+fi
+
+#
+# Functions.
+#
+
+# get_compiler_name <COMPILER>
+get_compiler_name() {
+  echo $1 | cut -d/ -f1
+}
+
+# get_compiler_version <COMPILER>
+get_compiler_version() {
+  echo $1 | cut -d/ -f2
+}
+
+# Do not call directly.
+get_compiler_data() {
+  local compiler=$1
+  local item=$2
+  local compiler_name=$(get_compiler_name $compiler)
+  local compiler_vers=$(get_compiler_version $compiler)
+
+  local compiler_data
+  for compiler_data in "${COMPILERS[@]}" ; do
+    local arr=($compiler_data)
+
+    if [ "$compiler" = "${arr[0]}" ]; then
+      echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g"
+      return 0
+    fi
+  done
+
+  # Not found.
+  echo "Unreconized compiler $compiler" >&2
+  exit 1
+}
+
+#
+# For all getters, usage: <GETTER> <COMPILER>
+#
+
+get_compiler_modules() {
+  get_compiler_data $1 1
+}
+
+get_compiler_build_list() {
+  get_compiler_data $1 2
+}
+
+get_compiler_exe_name() {
+  get_compiler_data $1 3
+}
+
+get_compiler_warning_flags() {
+  get_compiler_data $1 4
+}
+
+run_cmd() {
+  echo "RUNNING: $*"
+  if [ "$DRYRUN" != "True" ]; then
+    eval "$* 2>&1"
+  fi
+}
+
+# report_and_log_test_results <SUCCESS> <DESC> <COMMENT>
+report_and_log_test_result() {
+  # Use sane var names.
+  local success=$1; local desc=$2; local comment=$3;
+
+  if [ "$success" = "0" ]; then
+    echo "  PASSED $desc"
+    echo $comment > $PASSED_DIR/$desc
+  else
+    # For failures, comment should be the name of the phase that failed.
+    echo "  FAILED $desc" >&2
+    echo $comment > $FAILED_DIR/$desc
+    cat ${desc}.${comment}.log
+  fi
+}
+
+setup_env() {
+  local compiler=$1
+  local compiler_modules=$(get_compiler_modules $compiler)
+
+  module purge
+
+  local mod
+  for mod in $compiler_modules; do
+    echo "Loading module $mod"
+    module load $mod 2>&1
+    # It is ridiculously hard to check for the success of a loaded
+    # module. Module does not return error codes and piping to grep
+    # causes module to run in a subshell.
+    module list 2>&1 | grep "$mod" >& /dev/null || return 1
+  done
+
+  return 0
+}
+
+# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE>
+single_build_and_test() {
+  # Use sane var names.
+  local compiler=$1; local build=$2; local build_type=$3;
+
+  # Set up env.
+  mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type"
+  cd $ROOT_DIR/$compiler/"${build}-$build_type"
+  local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g')
+  setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
+
+  # Set up flags.
+  local compiler_warning_flags=$(get_compiler_warning_flags $compiler)
+  local compiler_exe=$(get_compiler_exe_name $compiler)
+
+  if [[ "$build_type" = hwloc* ]]; then
+    local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
+  fi
+
+  if [[ "$build" = *Qthreads* ]]; then
+    if [[ "$build_type" = hwloc* ]]; then
+      local extra_args="$extra_args --qthreads-path=${QTHREADS_PATH}_hwloc"
+    else
+      local extra_args="$extra_args --qthreads-path=$QTHREADS_PATH"
+    fi
+  fi
+
+  if [[ "$OPT_FLAG" = "" ]]; then
+    OPT_FLAG="-O3"
+  fi
+
+  if [[ "$build_type" = *debug* ]]; then
+    local extra_args="$extra_args --debug"
+    local cxxflags="-g $compiler_warning_flags"
+    local ldflags="-g"
+  else
+    local cxxflags="$OPT_FLAG $compiler_warning_flags"
+    local ldflags="${OPT_FLAG}"
+  fi
+
+  local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}"
+  local ldflags="${ldflags} ${LD_FLAGS_EXTRA}"
+
+  if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
+    local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
+  fi
+  if [[ "$KOKKOS_OPTIONS" != "" ]]; then
+    local extra_args="$extra_args $KOKKOS_OPTIONS"
+  else
+    local extra_args="$extra_args --with-options=enable_large_mem_tests"
+  fi    
+
+  echo "  Starting job $desc"
+
+  local comment="no_comment"
+
+  if [ "$TEST_SCRIPT" = "True" ]; then
+    local rand=$[ 1 + $[ RANDOM % 10 ]]
+    sleep $rand
+
+    if [ $rand -gt 5 ]; then
+      run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
+    fi
+  else
+    run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
+    local -i build_start_time=$(date +%s)
+    run_cmd make -j 48 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
+    local -i build_end_time=$(date +%s)
+    comment="build_time=$(($build_end_time-$build_start_time))"
+
+    if [[ "$BUILD_ONLY" == False ]]; then
+      run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; }
+      local -i run_end_time=$(date +%s)
+      comment="$comment run_time=$(($run_end_time-$build_end_time))"
+    fi
+  fi
+
+  report_and_log_test_result 0 $desc "$comment"
+
+  return 0
+}
+
+# wait_for_jobs <NUM-JOBS>
+wait_for_jobs() {
+  local -i max_jobs=$1
+  local -i num_active_jobs=$(jobs | wc -l)
+  while [ $num_active_jobs -ge $max_jobs ]
+  do
+    sleep 1
+    num_active_jobs=$(jobs | wc -l)
+    jobs >& /dev/null
+  done
+}
+
+# run_in_background <COMPILER> <BUILD> <BUILD_TYPE>
+run_in_background() {
+  local compiler=$1
+
+  local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
+  # Don't override command line input.
+  # if [[ "$BUILD_ONLY" == True ]]; then
+  #   num_jobs=8
+  # else
+    if [[ "$compiler" == cuda* ]]; then
+      num_jobs=1
+    fi
+    if [[ "$compiler" == clang ]]; then 
+      num_jobs=1
+    fi
+  # fi
+  wait_for_jobs $num_jobs
+
+  single_build_and_test $* &
+}
+
+# build_and_test_all <COMPILER>
+build_and_test_all() {
+  # Get compiler data.
+  local compiler=$1
+  if [ -z "$CUSTOM_BUILD_LIST" ]; then
+    local compiler_build_list=$(get_compiler_build_list $compiler)
+  else
+    local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ')
+  fi
+
+  # Do builds.
+  local build
+  for build in $compiler_build_list
+  do
+    run_in_background $compiler $build $BUILD_TYPE
+
+    # If not cuda, do a hwloc test too.
+    if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then
+      run_in_background $compiler $build "hwloc-$BUILD_TYPE"
+    fi
+  done
+
+  return 0
+}
+
+get_test_root_dir() {
+  local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort)
+  local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l)
+  local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP}
+
+  if [ $num_to_delete -gt 0 ]; then
+    /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete)
+  fi
+
+  echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S")
+}
+
+wait_summarize_and_exit() {
+  wait_for_jobs 1
+
+  echo "#######################################################"
+  echo "PASSED TESTS"
+  echo "#######################################################"
+
+  local passed_test
+  for passed_test in $(\ls -1 $PASSED_DIR | sort)
+  do
+    echo $passed_test $(cat $PASSED_DIR/$passed_test)
+  done
+
+  local -i rv=0
+  if [ "$(ls -A $FAILED_DIR)" ]; then
+    echo "#######################################################"
+    echo "FAILED TESTS"
+    echo "#######################################################"
+
+    local failed_test
+    for failed_test in $(\ls -1 $FAILED_DIR | sort)
+    do
+      echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
+      rv=$rv+1
+    done
+  fi
+
+  exit $rv
+}
+
+#
+# Main.
+#
+
+ROOT_DIR=$(get_test_root_dir)
+mkdir -p $ROOT_DIR
+cd $ROOT_DIR
+
+PASSED_DIR=$ROOT_DIR/results/passed
+FAILED_DIR=$ROOT_DIR/results/failed
+mkdir -p $PASSED_DIR
+mkdir -p $FAILED_DIR
+
+echo "Going to test compilers: " $COMPILERS_TO_TEST
+for COMPILER in $COMPILERS_TO_TEST; do
+  echo "Testing compiler $COMPILER"
+  build_and_test_all $COMPILER
+done
+
+wait_summarize_and_exit
diff --git a/packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_pthread_intel b/packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel
similarity index 82%
rename from packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_pthread_intel
rename to packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel
index 3b2c72551..df370509a 100755
--- a/packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_pthread_intel
+++ b/packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel
@@ -1,6 +1,8 @@
 #!/bin/bash -el
 ulimit -c 0
-module load devpack/openmpi/2.1.1/intel/17.4.196/cuda/none
+module load devpack/20171203/openmpi/2.1.2/intel/18.1.163
+# Trilinos now requires cmake version >= 3.10.0
+module swap cmake/3.9.0 cmake/3.10.2
 
 KOKKOS_BRANCH=$1
 TRILINOS_UPDATE_BRANCH=$2
@@ -28,8 +30,9 @@ export JENKINS_DO_PTHREAD=ON
 export JENKINS_DO_SERIAL=OFF
 export JENKINS_DO_COMPLEX=OFF
 
-export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX2 -mkl"
-export JENKINS_ARCH_C_FLAG="-xCORE-AVX2 -mkl"
+export JENKINS_ARCH=SKX
+export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX512 -mkl"
+export JENKINS_ARCH_C_FLAG="-xCORE-AVX512 -mkl"
 export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a"
 export LAPACK_LIBRARIES=${BLAS_LIBRARIES}
 
@@ -37,7 +40,7 @@ export JENKINS_DO_TESTS=ON
 export JENKINS_DO_EXAMPLES=ON
 export JENKINS_DO_SHARED=ON
 
-export QUEUE=haswell
+export QUEUE=blake
 
 
 module load python
diff --git a/packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_serial_intel b/packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel
similarity index 82%
rename from packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_serial_intel
rename to packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel
index 9ce936ae2..04f1378ce 100755
--- a/packages/kokkos/scripts/trilinos-integration/shepard_jenkins_run_script_serial_intel
+++ b/packages/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel
@@ -1,6 +1,8 @@
 #!/bin/bash -el
 ulimit -c 0
-module load devpack/openmpi/2.1.1/intel/17.4.196/cuda/none
+module load devpack/20171203/openmpi/2.1.2/intel/18.1.163
+# Trilinos now requires cmake version >= 3.10.0
+module swap cmake/3.9.0 cmake/3.10.2
 
 KOKKOS_BRANCH=$1
 TRILINOS_UPDATE_BRANCH=$2
@@ -28,8 +30,9 @@ export JENKINS_DO_PTHREAD=OFF
 export JENKINS_DO_SERIAL=ON
 export JENKINS_DO_COMPLEX=ON
 
-export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX2 -mkl"
-export JENKINS_ARCH_C_FLAG="-xCORE-AVX2 -mkl"
+export JENKINS_ARCH=SKX
+export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX512 -mkl"
+export JENKINS_ARCH_C_FLAG="-xCORE-AVX512 -mkl"
 export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a"
 export LAPACK_LIBRARIES=${BLAS_LIBRARIES}
 
@@ -37,7 +40,7 @@ export JENKINS_DO_TESTS=ON
 export JENKINS_DO_EXAMPLES=ON
 export JENKINS_DO_SHARED=ON
 
-export QUEUE=haswell
+export QUEUE=blake
 
 
 module load python
diff --git a/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda b/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda
index 2716767fe..98900c3c9 100755
--- a/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda
+++ b/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda
@@ -20,7 +20,10 @@ then
   TRILINOS_PRISTINE_BRANCH=develop
 fi
 
-module load devpack/openmpi/1.10.4/gcc/5.4.0/cuda/8.0.44
+module load devpack/20180521/openmpi/2.1.2/gcc/7.2.0/cuda/9.2.88
+module swap openblas/0.2.20/gcc/7.2.0 netlib/3.8.0/gcc/7.2.0
+# Trilinos now requires cmake version >= 3.10.0
+module swap cmake/3.9.6 cmake/3.12.3
 export OMP_NUM_THREADS=8
 export JENKINS_DO_CUDA=ON
 export JENKINS_DO_OPENMP=OFF
@@ -28,6 +31,7 @@ export JENKINS_DO_PTHREAD=OFF
 export JENKINS_DO_SERIAL=ON
 export JENKINS_DO_COMPLEX=OFF
 
+export JENKINS_ARCH="Power8,Kepler37"
 export JENKINS_ARCH_CXX_FLAG="-mcpu=power8 -arch=sm_37"
 export JENKINS_ARCH_C_FLAG="-mcpu=power8"
 export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a;gfortran;gomp"
diff --git a/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp b/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp
index ff1086507..9c5244cd3 100755
--- a/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp
+++ b/packages/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp
@@ -20,7 +20,10 @@ then
   TRILINOS_PRISTINE_BRANCH=develop
 fi
 
-module load devpack/openmpi/1.10.4/gcc/5.4.0/cuda/8.0.44
+module load devpack/20180521/openmpi/2.1.2/gcc/7.2.0/cuda/9.2.88
+module swap openblas/0.2.20/gcc/7.2.0 netlib/3.8.0/gcc/7.2.0
+# Trilinos now requires cmake version >= 3.10.0
+module swap cmake/3.9.6 cmake/3.12.3
 export OMP_NUM_THREADS=8
 export JENKINS_DO_CUDA=OFF
 export JENKINS_DO_OPENMP=ON
@@ -28,6 +31,7 @@ export JENKINS_DO_PTHREAD=OFF
 export JENKINS_DO_SERIAL=OFF
 export JENKINS_DO_COMPLEX=OFF
 
+export JENKINS_ARCH="Power8"
 export JENKINS_ARCH_CXX_FLAG="-mcpu=power8"
 export JENKINS_ARCH_C_FLAG="-mcpu=power8"
 export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a;gfortran;gomp"
-- 
GitLab